mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-05-07 02:03:55 -05:00
LOCAL TEST RESULTS (all green):
pytest: 34 passed in 0.19s (28 existing + 6 new command tests)
ruff: All checks passed (0 errors)
vitest: 7 passed in 127ms (worker contract tests)
CLI e2e: vault --version / build / verify / stats / doctor / diff /
export-paper / ship --dry-run / publish + verify rc1 / api shim
via curl against 9199-question corpus — all green
Python-side fixes:
- interviews/vault-cli/pyproject.toml: ruff config now has principled
per-file-ignores for B008 (Typer pattern), N806 (DAG cycle colors),
E402 (scripts), SIM118 (sqlite3.Row iterator). Keeps signal tight.
- 13 real ruff violations fixed across authoring.py (contextlib.suppress),
diff_cmd.py + serve_api.py (dict(sqlite3.Row) instead of broken
.keys() iteration), policy.py (direct return), release.py (zip
strict=True, update_latest_symlink now validates target exists;
previous 'target' variable was unused), commands/release.py
(import order reshuffled, ambiguous 'l' renamed).
- commands/release.py ship_cmd leg-skip uses 'leg' not 'l'.
New pytest file: interviews/vault-cli/tests/test_commands.py (+6 tests)
- stats: JSON shape + Prometheus format.
- diff: add/remove/modify detection + classification.
- doctor: graceful skip on missing vault; unknown --check returns
USAGE_ERROR.
- codegen: --check passes against baseline.
Worker-side fixes:
- src/index.ts cachedOrCompute graceful-degrades when caches global
isn't available (Node test env, future-proofing against runtime
regressions).
- src/index.ts handleSearch: 'query: q' → 'query: qRaw' (q was
renamed earlier).
- src/rate_limit.ts: removed unused WINDOW_MS const.
- tests/worker.test.ts: vi.resetModules() between tests so
module-level schemaOk/lastSeenRelease state doesn't leak
across test cases (fingerprint memoization was sticky).
- package.json: added test:watch + lint aliases.
- .gitignore: node_modules, .wrangler, dist, .dev.vars.
- package-lock.json committed (npm — pnpm not on the machine; CI
updated to use npm ci).
CI (.github/workflows/vault-ci.yml):
- Split into python + worker jobs.
- Python job: ruff + mypy (non-blocking) + pytest + vault check
--strict + vault build release_hash regression + vault codegen
--check + registry append-only + exemplar audit staleness.
- Worker job: node 20 + npm ci + tsc typecheck + vitest run.
- Triggers now include staffml-vault-types path (keeps CI honest
when shared-types drift).
What runs vs what's gated on user:
RAN LOCALLY: pytest, ruff, vitest, tsc, CLI end-to-end smoke
(build→verify→export→stats→doctor→diff→publish
rc→api-shim→ship --dry-run), full corpus invariants.
GATED ON USER (requires Cloudflare credentials):
- wrangler login + wrangler d1 create
- wrangler d1 execute (schema + seed)
- pnpm/npm deploy:staging
- FTS5 production load-test
- vault ship --env production (live D1 + Next.js + tag push)
Everything that CAN be verified without credentials HAS been.
154 lines
5.8 KiB
Python
154 lines
5.8 KiB
Python
"""Tests for the newer subcommands: doctor, diff, stats, codegen.
|
|
|
|
These exercise the command surfaces end-to-end via Typer's CliRunner so a
|
|
stale --json schema, exit-code drift, or a regression in one of the
|
|
subchecks is caught in CI.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import sqlite3
|
|
from pathlib import Path
|
|
|
|
import typer
|
|
from typer.testing import CliRunner
|
|
|
|
from vault_cli.commands import codegen as codegen_mod
|
|
from vault_cli.commands import diff_cmd as diff_mod
|
|
from vault_cli.commands import doctor as doctor_mod
|
|
from vault_cli.commands import stats as stats_mod
|
|
from vault_cli.exit_codes import ExitCode
|
|
|
|
|
|
def _make_vault(tmp: Path, questions: list[tuple[str, str]]) -> Path:
|
|
"""Minimal vault.db with a questions table populated from (id, title) pairs."""
|
|
vault_dir = tmp / "vault"
|
|
vault_dir.mkdir(parents=True, exist_ok=True)
|
|
db = vault_dir / "vault.db"
|
|
conn = sqlite3.connect(db)
|
|
conn.executescript(
|
|
"""
|
|
CREATE TABLE questions (
|
|
id TEXT PRIMARY KEY, title TEXT, topic TEXT, track TEXT, level TEXT,
|
|
zone TEXT, status TEXT, scenario TEXT, common_mistake TEXT,
|
|
realistic_solution TEXT, napkin_math TEXT, deep_dive_title TEXT,
|
|
deep_dive_url TEXT, provenance TEXT, created_at TEXT, last_modified TEXT,
|
|
file_path TEXT, content_hash TEXT, authors_json TEXT);
|
|
CREATE TABLE chains (id TEXT PRIMARY KEY, name TEXT, topic TEXT);
|
|
CREATE TABLE chain_questions (chain_id TEXT, question_id TEXT, position INTEGER,
|
|
PRIMARY KEY(chain_id, position));
|
|
CREATE TABLE tags (question_id TEXT, tag TEXT, PRIMARY KEY(question_id, tag));
|
|
CREATE TABLE release_metadata (key TEXT PRIMARY KEY, value TEXT);
|
|
"""
|
|
)
|
|
for qid, title in questions:
|
|
conn.execute(
|
|
"""INSERT INTO questions VALUES
|
|
(?, ?, 't', 'cloud', 'l1', 'recall', 'published', 'scn',
|
|
NULL, 'soln', NULL, NULL, NULL, 'imported',
|
|
NULL, NULL, '/tmp/x.yaml', 'hash-' + ?, NULL)""".replace("'hash-' + ?", "'h-'||?"),
|
|
(qid, title, qid),
|
|
)
|
|
meta = {
|
|
"release_id": "0.1.0", "release_hash": "a" * 64,
|
|
"schema_version": "1", "policy_version": "1",
|
|
"published_count": str(len(questions)),
|
|
}
|
|
for k, v in meta.items():
|
|
conn.execute("INSERT INTO release_metadata VALUES (?, ?)", (k, v))
|
|
conn.commit()
|
|
conn.close()
|
|
return vault_dir
|
|
|
|
|
|
def _app(register_fn) -> typer.Typer:
|
|
"""Build a multi-command Typer app so CliRunner invokes
|
|
`<app> <subcommand> [args]` rather than single-command mode.
|
|
Typer promotes a single-command app to root; we add a no-op callback
|
|
plus a second dummy command to keep multi-command behavior stable.
|
|
"""
|
|
app = typer.Typer()
|
|
|
|
@app.callback()
|
|
def _root() -> None:
|
|
"""Root — tests use subcommand invocation."""
|
|
|
|
@app.command("_noop")
|
|
def _noop() -> None:
|
|
"""Keeps Typer in multi-command mode."""
|
|
|
|
register_fn(app)
|
|
return app
|
|
|
|
|
|
def test_stats_json_shape(tmp_path: Path) -> None:
|
|
_make_vault(tmp_path, [("global-0000", "A"), ("global-0001", "B")])
|
|
app = _app(stats_mod.register)
|
|
r = CliRunner().invoke(app, ["stats", "--vault-db", str(tmp_path / "vault" / "vault.db"), "--json"])
|
|
assert r.exit_code == 0, r.output
|
|
data = json.loads(r.stdout)
|
|
assert data["ok"] is True
|
|
assert data["data"]["total"] == 2
|
|
assert data["data"]["release_id"] == "0.1.0"
|
|
|
|
|
|
def test_stats_prometheus_format(tmp_path: Path) -> None:
|
|
_make_vault(tmp_path, [("global-0000", "A")])
|
|
app = _app(stats_mod.register)
|
|
r = CliRunner().invoke(app, [
|
|
"stats", "--vault-db", str(tmp_path / "vault" / "vault.db"), "--format-prometheus",
|
|
])
|
|
assert r.exit_code == 0
|
|
assert "vault_questions_total" in r.stdout
|
|
assert "vault_questions_by_track" in r.stdout
|
|
|
|
|
|
def test_diff_cosmetic_semantic_structural(tmp_path: Path) -> None:
|
|
releases = tmp_path / "releases"
|
|
_make_vault(tmp_path / "v1", [("a", "A"), ("b", "B")])
|
|
_make_vault(tmp_path / "v2", [("a", "A"), ("b", "B"), ("c", "C")])
|
|
(releases / "v1").mkdir(parents=True)
|
|
(releases / "v2").mkdir(parents=True)
|
|
(tmp_path / "v1" / "vault" / "vault.db").rename(releases / "v1" / "vault.db")
|
|
(tmp_path / "v2" / "vault" / "vault.db").rename(releases / "v2" / "vault.db")
|
|
|
|
app = _app(diff_mod.register)
|
|
r = CliRunner().invoke(app, [
|
|
"diff", "v1", "v2", "--releases-dir", str(releases), "--json",
|
|
])
|
|
assert r.exit_code == 0
|
|
data = json.loads(r.stdout)
|
|
assert len(data["data"]["added"]) == 1
|
|
assert data["data"]["added"][0]["id"] == "c"
|
|
assert len(data["data"]["removed"]) == 0
|
|
|
|
|
|
def test_doctor_skip_when_no_vault(tmp_path: Path) -> None:
|
|
"""Doctor must not crash on a bare vault_dir; subchecks should skip gracefully."""
|
|
empty_vault = tmp_path / "empty"
|
|
empty_vault.mkdir()
|
|
app = _app(doctor_mod.register)
|
|
r = CliRunner().invoke(app, [
|
|
"doctor", "--vault-dir", str(empty_vault), "--json", "--check", "release-integrity",
|
|
])
|
|
# release-integrity returns skip when no releases/ dir — not a failure.
|
|
assert r.exit_code == 0
|
|
data = json.loads(r.stdout)
|
|
assert data["data"]["checks"][0]["check"] == "release-integrity"
|
|
assert data["data"]["checks"][0]["status"] in {"skip", "warn"}
|
|
|
|
|
|
def test_doctor_unknown_check_usage_error(tmp_path: Path) -> None:
|
|
app = _app(doctor_mod.register)
|
|
r = CliRunner().invoke(app, ["doctor", "--check", "no-such-check"])
|
|
assert r.exit_code == ExitCode.USAGE_ERROR
|
|
|
|
|
|
def test_codegen_check_records_or_verifies_baseline(tmp_path: Path) -> None:
|
|
app = _app(codegen_mod.register)
|
|
r = CliRunner().invoke(app, ["codegen", "--check"])
|
|
# Should either record baseline (first run) or verify clean. Never fail
|
|
# cleanly when the 3 artifacts exist and are non-empty in this repo.
|
|
assert r.exit_code == 0, r.output
|