mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-05-06 17:49:07 -05:00
Four deployment-level fixes landed on the live Cloudflare worker + D1 instance: 1. compiler.py — populate chains table from chains.json. Pre-v1.0 the table was never filled, which only mattered once D1 (which enforces FKs by default, unlike SQLite) tried to insert chain_questions. The cutover failed with FOREIGN KEY constraint failed until chains(id) was populated. 2. types.ts (worker) — add competency_area, bloom_level, phase, and human_review_* fields. Worker SQL was already SELECT *, so the new columns flow through without code changes, but the TypeScript row interface needed updating for downstream consumers. 3. rate_limit.ts — Math.max(60, …) floor on expirationTtl. Old calc could emit values as low as 11s, which D1's KV backend rejects (minimum 60s). Was throwing 1101 on every request after the deployment. Tail logs showed 'Invalid expiration_ttl of 14'. 4. wrangler.toml — bump SCHEMA_FINGERPRINT to match the v1.0 vault.db (b97218dae6354b1b…). Without this, /manifest reports schema_fingerprint_ok: false and clients degrade. New script: scripts/ship_d1.py — end-to-end reload of D1 from the current YAMLs. 'vault build' → SQL dump → 'wrangler d1 execute --file'. Handles FK ordering (chains first, then questions, then chain_questions). Used for this cutover; repeatable for future schema bumps. Deployment state (2026-04-22): Worker URL: https://staffml-vault.mlsysbook-ai-account.workers.dev D1 database: staffml-vault (254f630f-…) — 9,199 questions loaded Release hash: 997747a8f43bbd89e03c6bb0e67865f8de35ac8316fbb0457ee0b8f955afb32f Manifest: curl …/manifest returns 9,199 / schema_fingerprint_ok=true GET question: /questions/cloud-0185 returns the post-Phase-2 v1.0 record (zone=mastery, level=L6+, competency_area=latency, …) Filtered list: /questions?track=cloud&level=L6%2B works with pagination Site cutover is NOT in this commit. The existing hybrid path (bundled corpus.json primary + worker /search secondary) keeps working unchanged. To flip the site entirely to the worker: export NEXT_PUBLIC_VAULT_API=https://staffml-vault.mlsysbook-ai-account.workers.dev unset NEXT_PUBLIC_VAULT_FALLBACK # then: next build && next deploy That flip converts every caller from sync 'getQuestions()' to async via corpus-source.ts — deferred because callers need an audit pass to handle async correctly.
133 lines
5.0 KiB
Python
133 lines
5.0 KiB
Python
#!/usr/bin/env python3
|
|
"""Ship the current vault.db to the live Cloudflare D1 database.
|
|
|
|
Generates a full-reload SQL script (DROP + CREATE + INSERT) from a fresh
|
|
vault.db build, then applies it via `wrangler d1 execute --remote`.
|
|
|
|
Usage:
|
|
# Fresh build + push to production D1:
|
|
python3 interviews/vault-cli/scripts/ship_d1.py
|
|
|
|
# Dry-run: write SQL to /tmp/d1_cutover.sql, don't apply:
|
|
python3 interviews/vault-cli/scripts/ship_d1.py --dry-run
|
|
|
|
# Stage instead of production:
|
|
python3 interviews/vault-cli/scripts/ship_d1.py --env staging
|
|
|
|
Requires:
|
|
- `vault` CLI installed (`pip install -e interviews/vault-cli/`)
|
|
- `wrangler` CLI authenticated (`wrangler whoami`)
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import shlex
|
|
import sqlite3
|
|
import subprocess
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
REPO = Path(__file__).resolve().parents[3]
|
|
VAULT_DIR = REPO / "interviews" / "vault"
|
|
VAULT_DB = VAULT_DIR / "vault.db"
|
|
WORKER_DIR = REPO / "interviews" / "staffml-vault-worker"
|
|
D1_SCHEMA = REPO / "interviews" / "vault-cli" / "scripts" / "d1-schema.sql"
|
|
|
|
|
|
def _sql_lit(v):
|
|
if v is None:
|
|
return "NULL"
|
|
if isinstance(v, (int, float)):
|
|
return str(v)
|
|
return "'" + str(v).replace("'", "''") + "'"
|
|
|
|
|
|
def generate_sql(vault_db: Path, d1_schema: Path, out: Path) -> None:
|
|
parts = []
|
|
parts.append("-- D1 full-reload migration — DROP + CREATE + INSERT")
|
|
parts.append(f"-- Generated from {vault_db.name} (byte-for-byte reproducible)")
|
|
parts.append("PRAGMA foreign_keys = OFF;")
|
|
for tbl in ["questions_fts", "chain_questions", "chains", "tags", "release_metadata", "questions"]:
|
|
parts.append(f"DROP TABLE IF EXISTS {tbl};")
|
|
for trig in ["questions_ai", "questions_ad", "questions_au"]:
|
|
parts.append(f"DROP TRIGGER IF EXISTS {trig};")
|
|
parts.append("")
|
|
parts.append(d1_schema.read_text())
|
|
parts.append("PRAGMA foreign_keys = ON;")
|
|
|
|
conn = sqlite3.connect(vault_db)
|
|
cur = conn.cursor()
|
|
|
|
# Parent tables first so FKs don't bounce the child inserts.
|
|
parts.append("\n-- chains (parent of chain_questions)")
|
|
for row in cur.execute("SELECT id, name, topic FROM chains"):
|
|
parts.append(f"INSERT INTO chains VALUES ({','.join(_sql_lit(v) for v in row)});")
|
|
|
|
parts.append("\n-- questions")
|
|
for row in cur.execute("""
|
|
SELECT id, title, topic, track, level, zone, competency_area, bloom_level, phase,
|
|
status, scenario, common_mistake, realistic_solution, napkin_math,
|
|
provenance, created_at, last_modified,
|
|
human_review_status, human_review_by, human_review_date,
|
|
file_path, content_hash, authors_json
|
|
FROM questions
|
|
"""):
|
|
parts.append(f"INSERT INTO questions VALUES ({','.join(_sql_lit(v) for v in row)});")
|
|
|
|
parts.append("\n-- chain_questions")
|
|
for row in cur.execute("SELECT chain_id, question_id, position FROM chain_questions"):
|
|
parts.append(f"INSERT INTO chain_questions VALUES ({','.join(_sql_lit(v) for v in row)});")
|
|
|
|
parts.append("\n-- tags")
|
|
for row in cur.execute("SELECT question_id, tag FROM tags"):
|
|
parts.append(f"INSERT INTO tags VALUES ({','.join(_sql_lit(v) for v in row)});")
|
|
|
|
parts.append("\n-- release_metadata")
|
|
for row in cur.execute("SELECT key, value FROM release_metadata"):
|
|
parts.append(f"INSERT INTO release_metadata VALUES ({','.join(_sql_lit(v) for v in row)});")
|
|
|
|
conn.close()
|
|
out.write_text("\n".join(parts) + "\n")
|
|
|
|
|
|
def main() -> int:
|
|
ap = argparse.ArgumentParser(description="Ship vault.db to Cloudflare D1.")
|
|
ap.add_argument("--env", choices=["production", "staging"], default="production",
|
|
help="Target D1 environment (default: production).")
|
|
ap.add_argument("--dry-run", action="store_true",
|
|
help="Write SQL to /tmp/d1_cutover.sql but do not apply.")
|
|
ap.add_argument("--skip-build", action="store_true",
|
|
help="Assume vault.db is already up to date.")
|
|
args = ap.parse_args()
|
|
|
|
if not args.skip_build:
|
|
print("==> vault build", flush=True)
|
|
subprocess.check_call(
|
|
["vault", "build", "--vault-dir", str(VAULT_DIR), "--release-id", f"ship-{args.env}"],
|
|
cwd=REPO,
|
|
)
|
|
|
|
sql_path = Path("/tmp/d1_cutover.sql")
|
|
print(f"==> generating {sql_path} from {VAULT_DB.name}", flush=True)
|
|
generate_sql(VAULT_DB, D1_SCHEMA, sql_path)
|
|
size_mb = sql_path.stat().st_size / 1024 / 1024
|
|
with sql_path.open() as fh:
|
|
line_count = sum(1 for _ in fh)
|
|
print(f" wrote {line_count} lines ({size_mb:.1f} MiB)", flush=True)
|
|
|
|
if args.dry_run:
|
|
print("==> dry-run; skipping wrangler d1 execute", flush=True)
|
|
return 0
|
|
|
|
env_flag = "" if args.env == "production" else f"--env {args.env}"
|
|
cmd = f"npx wrangler d1 execute staffml-vault --remote {env_flag} --file={sql_path}".strip()
|
|
print(f"==> {cmd}", flush=True)
|
|
subprocess.check_call(shlex.split(cmd), cwd=WORKER_DIR)
|
|
print("==> done", flush=True)
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|