Files
cs249r_book/interviews/vault-cli/scripts/promote_drafts.py
Vijay Janapa Reddi a74c98576e Merge origin/dev into yaml-audit
Sync the yaml-audit branch with the latest dev work since the previous
sync (5c5af75ed). Brings in 73 commits including:

  - CI security fixes: postcss XSS bump, uuid bounds bump, codeql
    paths-ignore for vendored bundles, read-only token on
    staffml-validate-vault workflow
  - kits/ dark mode polish: code-block readability, dropdown contrast
  - vault-cli/: pre-commit ruff hook + 20 ruff fixes, all-contributors
    auto-credit workflow change to pull_request_target
  - dev's earlier merge of yaml-audit (836d481b5) carrying the
    pre-trailer-strip Phase 1/2/3 history; this merge harmonises that
    with the current trailer-clean yaml-audit tip
  - misc bug fixes (tinytorch perceptron seed, infra workflows,
    socratiq vite dev injector)

Conflicts resolved (if any) preserve the yaml-audit-side authoritative
state for vault/* files (we own those) and the dev-side authoritative
state for .github/workflows/* and other shared infrastructure.

# Conflicts:
#	.github/workflows/all-contributors-auto-credit.yml
#	.github/workflows/staffml-preview-dev.yml
#	interviews/staffml/src/data/corpus-summary.json
#	interviews/staffml/src/data/vault-manifest.json
#	interviews/staffml/tests/chain-and-vault-smoke.mjs
#	interviews/vault-cli/README.md
#	interviews/vault-cli/docs/CHAIN_ROADMAP.md
#	interviews/vault-cli/scripts/build_chains_with_gemini.py
#	interviews/vault-cli/scripts/generate_question_for_gap.py
#	interviews/vault-cli/scripts/merge_chain_passes.py
#	interviews/vault-cli/scripts/validate_drafts.py
#	interviews/vault-cli/src/vault_cli/legacy_export.py
#	interviews/vault-cli/tests/test_chain_validation.py
#	interviews/vault/.gitignore
#	interviews/vault/ARCHITECTURE.md
#	interviews/vault/chains.json
#	interviews/vault/id-registry.yaml
#	interviews/vault/questions/edge/optimization/edge-2536.yaml
#	interviews/vault/questions/mobile/deployment/mobile-2147.yaml
#	tinytorch/src/03_layers/03_layers.py
2026-05-02 11:06:43 -04:00

222 lines
8.4 KiB
Python
Executable File

#!/usr/bin/env python3
"""Promote LLM-authored question drafts to the corpus (Phase 3.d helper).
A draft is a `<id>.yaml.draft` file under `interviews/vault/questions/`,
written by `generate_question_for_gap.py`. Promotion does five things:
1. Strips the private ``_authoring`` block and replaces it with the
real schema fields (``provenance``, ``status``, ``authors``,
``human_reviewed``, ``created_at``, plus a ``gap-bridge:<from>-<to>``
tag for traceability).
2. Renames ``<id>.yaml.draft`` → ``<id>.yaml``.
3. Appends an entry to ``interviews/vault/id-registry.yaml``
(append-only, CI-enforced).
4. Optionally flips ``status`` to ``published`` (default: keep
``draft`` so the human reviewer's workflow stays explicit).
5. Optionally flips ``human_reviewed.status`` to ``verified`` with
``--reviewed-by <handle>``.
Selection modes — pick one:
--all-passing # promote every draft whose row in
# draft-validation-scorecard.json verdict=pass
--qids edge-2536,edge-2537 # explicit list (whether they passed or not)
--from-scorecard <path> # use a non-default scorecard path
--dry-run # show what would change, write nothing
Examples:
# After reviewing the 4 pilot drafts, promote them all and mark verified:
python3 promote_drafts.py --all-passing --publish --reviewed-by vj
# Promote two specific qids as drafts (no publish, no review stamp):
python3 promote_drafts.py --qids edge-2536,mobile-2146
# Preview only:
python3 promote_drafts.py --all-passing --dry-run
The script never overwrites a `<id>.yaml` that already exists. It refuses
to run if `vault check --strict` would fail post-promotion (run that
yourself after this script as the final gate).
"""
from __future__ import annotations
import argparse
import json
import sys
from datetime import UTC, datetime
from pathlib import Path
from typing import Any
import yaml
REPO_ROOT = Path(__file__).resolve().parents[3]
VAULT_DIR = REPO_ROOT / "interviews" / "vault"
QUESTIONS_DIR = VAULT_DIR / "questions"
ID_REGISTRY = VAULT_DIR / "id-registry.yaml"
# AI-pipeline scorecard lives under _pipeline/ (gitignored).
# See interviews/CLAUDE.md.
PIPELINE_DIR = VAULT_DIR / "_pipeline"
DEFAULT_SCORECARD = PIPELINE_DIR / "draft-validation-scorecard.json"
def find_draft(qid: str) -> Path | None:
"""Resolve a qid to its draft path. Returns None if no draft for qid exists."""
matches = list(QUESTIONS_DIR.rglob(f"{qid}.yaml.draft"))
if len(matches) > 1:
raise RuntimeError(f"multiple drafts found for {qid}: {matches}")
return matches[0] if matches else None
def select_drafts(args: argparse.Namespace) -> list[Path]:
if args.qids:
out: list[Path] = []
for qid in args.qids:
p = find_draft(qid)
if p is None:
raise RuntimeError(f"no draft found for {qid!r}")
out.append(p)
return out
# --all-passing: read scorecard
scorecard_path = args.from_scorecard or DEFAULT_SCORECARD
if not scorecard_path.exists():
raise RuntimeError(f"missing {scorecard_path}; run validate_drafts.py first "
f"or pass --qids explicitly")
sc = json.loads(scorecard_path.read_text(encoding="utf-8"))
passing_qids = [r["draft_id"] for r in sc.get("rows", []) if r.get("verdict") == "pass"]
out = []
for qid in passing_qids:
p = find_draft(qid)
if p is None:
print(f" warning: scorecard has {qid} as pass but no draft file found "
f"(maybe already promoted?)", file=sys.stderr)
continue
out.append(p)
return out
def clean_body(body: dict[str, Any], publish: bool, reviewed_by: str | None,
now: str) -> dict[str, Any]:
auth = body.pop("_authoring", None) or {}
body["provenance"] = "llm-draft"
body["status"] = "published" if publish else "draft"
body["authors"] = [auth.get("origin", "gemini-3.1-pro-preview")]
body["human_reviewed"] = {
"status": "verified" if reviewed_by else "not-reviewed",
"by": reviewed_by,
"date": now if reviewed_by else None,
}
body.setdefault("created_at", auth.get("generated_at") or now)
# gap-bridge:<lower>-<higher> tag for traceability
gap = auth.get("gap") or {}
if gap and gap.get("between"):
existing = body.get("tags") or []
bridge_tag = f"gap-bridge:{'-'.join(gap['between'])}"
body["tags"] = list(dict.fromkeys(existing + [bridge_tag]))
return body
def append_registry(qids: list[str], now: str) -> None:
"""Append-only — never rewrite the file.
Format mirrors existing entries: one YAML mapping per line under entries.
"""
lines = "\n".join(
f" - {{id: {qid}, created_at: {now}, created_by: promote_drafts.py}}"
for qid in qids
)
with ID_REGISTRY.open("a", encoding="utf-8") as f:
f.write(lines + "\n")
def promote_one(draft_path: Path, *, publish: bool, reviewed_by: str | None,
now: str, dry_run: bool) -> tuple[str, Path]:
body = yaml.safe_load(draft_path.read_text(encoding="utf-8"))
if not isinstance(body, dict) or "id" not in body:
raise RuntimeError(f"{draft_path}: malformed draft (no id field)")
qid = body["id"]
promoted_path = draft_path.with_suffix("") # drops .draft → .yaml
if promoted_path.exists():
raise RuntimeError(
f"{promoted_path} already exists — refusing to overwrite. "
f"Resolve manually before promoting."
)
cleaned = clean_body(body, publish=publish, reviewed_by=reviewed_by, now=now)
if dry_run:
print(f" DRY: {draft_path.name}{promoted_path.name} "
f"(status={cleaned['status']}, "
f"human_reviewed={cleaned['human_reviewed']['status']})")
else:
promoted_path.write_text(
yaml.safe_dump(cleaned, sort_keys=False, allow_unicode=True, width=100),
encoding="utf-8",
)
draft_path.unlink()
print(f"{draft_path.name}{promoted_path.name} "
f"(status={cleaned['status']})")
return qid, promoted_path
def main() -> int:
ap = argparse.ArgumentParser(description=__doc__)
g = ap.add_mutually_exclusive_group(required=True)
g.add_argument("--all-passing", action="store_true",
help="promote every draft whose scorecard verdict is 'pass'")
g.add_argument("--qids", type=lambda s: [x.strip() for x in s.split(",") if x.strip()],
help="comma-separated explicit qid list")
ap.add_argument("--from-scorecard", type=Path, default=None,
help=f"scorecard JSON (default {DEFAULT_SCORECARD})")
ap.add_argument("--publish", action="store_true",
help="set status=published (default: status=draft, gating on review)")
ap.add_argument("--reviewed-by", default=None,
help="set human_reviewed.status=verified, by=<handle>, date=<now>. "
"Implies the user has actually reviewed the drafts.")
ap.add_argument("--dry-run", action="store_true",
help="show what would happen, don't write")
args = ap.parse_args()
drafts = select_drafts(args)
if not drafts:
print("no drafts to promote.")
return 0
print(f"promoting {len(drafts)} draft(s)"
f"{' [DRY-RUN]' if args.dry_run else ''}"
f"{' as PUBLISHED' if args.publish else ' as draft'}"
f"{f' (reviewed_by={args.reviewed_by})' if args.reviewed_by else ''}:")
print()
now = datetime.now(UTC).isoformat(timespec="seconds")
promoted_qids: list[str] = []
for p in drafts:
try:
qid, _ = promote_one(p, publish=args.publish, reviewed_by=args.reviewed_by,
now=now, dry_run=args.dry_run)
promoted_qids.append(qid)
except RuntimeError as e:
print(f"{p.name}: {e}", file=sys.stderr)
if not args.dry_run and promoted_qids:
append_registry(promoted_qids, now)
print(f"\nappended {len(promoted_qids)} entries to "
f"{ID_REGISTRY.relative_to(REPO_ROOT)}")
print("\nNow run: vault check --strict && vault build --local-json")
if any(args.publish for _ in promoted_qids):
print("(promoted as published — chainCount may grow on next "
"build_chains_with_gemini.py --all run)")
return 0
if __name__ == "__main__":
raise SystemExit(main())