Files
Vijay Janapa Reddi 8d385b0c1a feat(d1): cutover production D1 to schema v1.0 + live worker serving
Four deployment-level fixes landed on the live Cloudflare worker + D1
instance:

1. compiler.py — populate chains table from chains.json. Pre-v1.0 the
   table was never filled, which only mattered once D1 (which enforces
   FKs by default, unlike SQLite) tried to insert chain_questions. The
   cutover failed with FOREIGN KEY constraint failed until chains(id)
   was populated.

2. types.ts (worker) — add competency_area, bloom_level, phase, and
   human_review_* fields. Worker SQL was already SELECT *, so the new
   columns flow through without code changes, but the TypeScript row
   interface needed updating for downstream consumers.

3. rate_limit.ts — Math.max(60, …) floor on expirationTtl. Old calc
   could emit values as low as 11s, which D1's KV backend rejects
   (minimum 60s). Was throwing 1101 on every request after the
   deployment. Tail logs showed 'Invalid expiration_ttl of 14'.

4. wrangler.toml — bump SCHEMA_FINGERPRINT to match the v1.0 vault.db
   (b97218dae6354b1b…). Without this, /manifest reports
   schema_fingerprint_ok: false and clients degrade.

New script:
  scripts/ship_d1.py — end-to-end reload of D1 from the current YAMLs.
  'vault build' → SQL dump → 'wrangler d1 execute --file'. Handles FK
  ordering (chains first, then questions, then chain_questions). Used
  for this cutover; repeatable for future schema bumps.

Deployment state (2026-04-22):
  Worker URL:    https://staffml-vault.mlsysbook-ai-account.workers.dev
  D1 database:   staffml-vault (254f630f-…) — 9,199 questions loaded
  Release hash:  997747a8f43bbd89e03c6bb0e67865f8de35ac8316fbb0457ee0b8f955afb32f
  Manifest:      curl …/manifest returns 9,199 / schema_fingerprint_ok=true
  GET question:  /questions/cloud-0185 returns the post-Phase-2 v1.0 record
                 (zone=mastery, level=L6+, competency_area=latency, …)
  Filtered list: /questions?track=cloud&level=L6%2B works with pagination

Site cutover is NOT in this commit. The existing hybrid path
(bundled corpus.json primary + worker /search secondary) keeps
working unchanged. To flip the site entirely to the worker:
  export NEXT_PUBLIC_VAULT_API=https://staffml-vault.mlsysbook-ai-account.workers.dev
  unset NEXT_PUBLIC_VAULT_FALLBACK
  # then: next build && next deploy
That flip converts every caller from sync 'getQuestions()' to async
via corpus-source.ts — deferred because callers need an audit pass
to handle async correctly.
2026-04-22 10:29:35 -04:00

133 lines
5.0 KiB
Python

#!/usr/bin/env python3
"""Ship the current vault.db to the live Cloudflare D1 database.
Generates a full-reload SQL script (DROP + CREATE + INSERT) from a fresh
vault.db build, then applies it via `wrangler d1 execute --remote`.
Usage:
# Fresh build + push to production D1:
python3 interviews/vault-cli/scripts/ship_d1.py
# Dry-run: write SQL to /tmp/d1_cutover.sql, don't apply:
python3 interviews/vault-cli/scripts/ship_d1.py --dry-run
# Stage instead of production:
python3 interviews/vault-cli/scripts/ship_d1.py --env staging
Requires:
- `vault` CLI installed (`pip install -e interviews/vault-cli/`)
- `wrangler` CLI authenticated (`wrangler whoami`)
"""
from __future__ import annotations
import argparse
import shlex
import sqlite3
import subprocess
import sys
from pathlib import Path
REPO = Path(__file__).resolve().parents[3]
VAULT_DIR = REPO / "interviews" / "vault"
VAULT_DB = VAULT_DIR / "vault.db"
WORKER_DIR = REPO / "interviews" / "staffml-vault-worker"
D1_SCHEMA = REPO / "interviews" / "vault-cli" / "scripts" / "d1-schema.sql"
def _sql_lit(v):
if v is None:
return "NULL"
if isinstance(v, (int, float)):
return str(v)
return "'" + str(v).replace("'", "''") + "'"
def generate_sql(vault_db: Path, d1_schema: Path, out: Path) -> None:
parts = []
parts.append("-- D1 full-reload migration — DROP + CREATE + INSERT")
parts.append(f"-- Generated from {vault_db.name} (byte-for-byte reproducible)")
parts.append("PRAGMA foreign_keys = OFF;")
for tbl in ["questions_fts", "chain_questions", "chains", "tags", "release_metadata", "questions"]:
parts.append(f"DROP TABLE IF EXISTS {tbl};")
for trig in ["questions_ai", "questions_ad", "questions_au"]:
parts.append(f"DROP TRIGGER IF EXISTS {trig};")
parts.append("")
parts.append(d1_schema.read_text())
parts.append("PRAGMA foreign_keys = ON;")
conn = sqlite3.connect(vault_db)
cur = conn.cursor()
# Parent tables first so FKs don't bounce the child inserts.
parts.append("\n-- chains (parent of chain_questions)")
for row in cur.execute("SELECT id, name, topic FROM chains"):
parts.append(f"INSERT INTO chains VALUES ({','.join(_sql_lit(v) for v in row)});")
parts.append("\n-- questions")
for row in cur.execute("""
SELECT id, title, topic, track, level, zone, competency_area, bloom_level, phase,
status, scenario, common_mistake, realistic_solution, napkin_math,
provenance, created_at, last_modified,
human_review_status, human_review_by, human_review_date,
file_path, content_hash, authors_json
FROM questions
"""):
parts.append(f"INSERT INTO questions VALUES ({','.join(_sql_lit(v) for v in row)});")
parts.append("\n-- chain_questions")
for row in cur.execute("SELECT chain_id, question_id, position FROM chain_questions"):
parts.append(f"INSERT INTO chain_questions VALUES ({','.join(_sql_lit(v) for v in row)});")
parts.append("\n-- tags")
for row in cur.execute("SELECT question_id, tag FROM tags"):
parts.append(f"INSERT INTO tags VALUES ({','.join(_sql_lit(v) for v in row)});")
parts.append("\n-- release_metadata")
for row in cur.execute("SELECT key, value FROM release_metadata"):
parts.append(f"INSERT INTO release_metadata VALUES ({','.join(_sql_lit(v) for v in row)});")
conn.close()
out.write_text("\n".join(parts) + "\n")
def main() -> int:
ap = argparse.ArgumentParser(description="Ship vault.db to Cloudflare D1.")
ap.add_argument("--env", choices=["production", "staging"], default="production",
help="Target D1 environment (default: production).")
ap.add_argument("--dry-run", action="store_true",
help="Write SQL to /tmp/d1_cutover.sql but do not apply.")
ap.add_argument("--skip-build", action="store_true",
help="Assume vault.db is already up to date.")
args = ap.parse_args()
if not args.skip_build:
print("==> vault build", flush=True)
subprocess.check_call(
["vault", "build", "--vault-dir", str(VAULT_DIR), "--release-id", f"ship-{args.env}"],
cwd=REPO,
)
sql_path = Path("/tmp/d1_cutover.sql")
print(f"==> generating {sql_path} from {VAULT_DB.name}", flush=True)
generate_sql(VAULT_DB, D1_SCHEMA, sql_path)
size_mb = sql_path.stat().st_size / 1024 / 1024
with sql_path.open() as fh:
line_count = sum(1 for _ in fh)
print(f" wrote {line_count} lines ({size_mb:.1f} MiB)", flush=True)
if args.dry_run:
print("==> dry-run; skipping wrangler d1 execute", flush=True)
return 0
env_flag = "" if args.env == "production" else f"--env {args.env}"
cmd = f"npx wrangler d1 execute staffml-vault --remote {env_flag} --file={sql_path}".strip()
print(f"==> {cmd}", flush=True)
subprocess.check_call(shlex.split(cmd), cwd=WORKER_DIR)
print("==> done", flush=True)
return 0
if __name__ == "__main__":
sys.exit(main())