mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-05-07 18:18:42 -05:00
LOCAL TEST RESULTS (all green):
pytest: 34 passed in 0.19s (28 existing + 6 new command tests)
ruff: All checks passed (0 errors)
vitest: 7 passed in 127ms (worker contract tests)
CLI e2e: vault --version / build / verify / stats / doctor / diff /
export-paper / ship --dry-run / publish + verify rc1 / api shim
via curl against 9199-question corpus — all green
Python-side fixes:
- interviews/vault-cli/pyproject.toml: ruff config now has principled
per-file-ignores for B008 (Typer pattern), N806 (DAG cycle colors),
E402 (scripts), SIM118 (sqlite3.Row iterator). Keeps signal tight.
- 13 real ruff violations fixed across authoring.py (contextlib.suppress),
diff_cmd.py + serve_api.py (dict(sqlite3.Row) instead of broken
.keys() iteration), policy.py (direct return), release.py (zip
strict=True, update_latest_symlink now validates target exists;
previous 'target' variable was unused), commands/release.py
(import order reshuffled, ambiguous 'l' renamed).
- commands/release.py ship_cmd leg-skip uses 'leg' not 'l'.
New pytest file: interviews/vault-cli/tests/test_commands.py (+6 tests)
- stats: JSON shape + Prometheus format.
- diff: add/remove/modify detection + classification.
- doctor: graceful skip on missing vault; unknown --check returns
USAGE_ERROR.
- codegen: --check passes against baseline.
Worker-side fixes:
- src/index.ts cachedOrCompute graceful-degrades when caches global
isn't available (Node test env, future-proofing against runtime
regressions).
- src/index.ts handleSearch: 'query: q' → 'query: qRaw' (q was
renamed earlier).
- src/rate_limit.ts: removed unused WINDOW_MS const.
- tests/worker.test.ts: vi.resetModules() between tests so
module-level schemaOk/lastSeenRelease state doesn't leak
across test cases (fingerprint memoization was sticky).
- package.json: added test:watch + lint aliases.
- .gitignore: node_modules, .wrangler, dist, .dev.vars.
- package-lock.json committed (npm — pnpm not on the machine; CI
updated to use npm ci).
CI (.github/workflows/vault-ci.yml):
- Split into python + worker jobs.
- Python job: ruff + mypy (non-blocking) + pytest + vault check
--strict + vault build release_hash regression + vault codegen
--check + registry append-only + exemplar audit staleness.
- Worker job: node 20 + npm ci + tsc typecheck + vitest run.
- Triggers now include staffml-vault-types path (keeps CI honest
when shared-types drift).
What runs vs what's gated on user:
RAN LOCALLY: pytest, ruff, vitest, tsc, CLI end-to-end smoke
(build→verify→export→stats→doctor→diff→publish
rc→api-shim→ship --dry-run), full corpus invariants.
GATED ON USER (requires Cloudflare credentials):
- wrangler login + wrangler d1 create
- wrangler d1 execute (schema + seed)
- pnpm/npm deploy:staging
- FTS5 production load-test
- vault ship --env production (live D1 + Next.js + tag push)
Everything that CAN be verified without credentials HAS been.
109 lines
4.6 KiB
Python
109 lines
4.6 KiB
Python
"""Tests for vault ship commit protocol (§6.1.1, Dean R3-NH-1)."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from vault_cli.ship import LegPlan, LegState, ShipError, ShipJournal, ShipOutcome, run_ship
|
|
|
|
|
|
def test_all_legs_succeed(tmp_path: Path) -> None:
|
|
journal = tmp_path / ".ship-journal.json"
|
|
calls: list[str] = []
|
|
legs = [
|
|
LegPlan(name="d1", forward=lambda: calls.append("d1") or {}, rollback=lambda: {}),
|
|
LegPlan(name="nextjs", forward=lambda: calls.append("nextjs") or {}, rollback=lambda: {}),
|
|
LegPlan(name="paper", forward=lambda: calls.append("paper") or {}, rollback=None),
|
|
]
|
|
j = run_ship(version="1.0.0", env="staging", journal_path=journal, legs=legs)
|
|
assert j.outcome is ShipOutcome.SUCCESS
|
|
assert j.point_of_no_return is True
|
|
assert [leg.state for leg in j.legs] == [LegState.DEPLOYED] * 3
|
|
assert calls == ["d1", "nextjs", "paper"]
|
|
|
|
|
|
def test_pre_paper_failure_auto_rolls_back(tmp_path: Path) -> None:
|
|
"""nextjs leg fails → d1 must be rolled back in reverse order."""
|
|
journal = tmp_path / ".ship-journal.json"
|
|
rollback_order: list[str] = []
|
|
|
|
def fail_nextjs() -> dict:
|
|
raise RuntimeError("next.js crashed")
|
|
|
|
legs = [
|
|
LegPlan(
|
|
name="d1",
|
|
forward=lambda: {},
|
|
rollback=lambda: rollback_order.append("d1") or {},
|
|
),
|
|
LegPlan(name="nextjs", forward=fail_nextjs, rollback=lambda: {}),
|
|
LegPlan(name="paper", forward=lambda: {}, rollback=None),
|
|
]
|
|
with pytest.raises(ShipError, match="auto-rolled back"):
|
|
run_ship(version="1.0.0", env="staging", journal_path=journal, legs=legs)
|
|
j = ShipJournal.load(journal)
|
|
assert j.outcome is ShipOutcome.FAILED_AUTO_ROLLED_BACK
|
|
assert j.point_of_no_return is False
|
|
assert j.legs[0].state is LegState.ROLLED_BACK
|
|
assert j.legs[1].state is LegState.FAILED
|
|
assert j.legs[2].state is LegState.PENDING
|
|
assert rollback_order == ["d1"]
|
|
|
|
|
|
def test_paper_leg_failure_needs_manual(tmp_path: Path) -> None:
|
|
"""paper-leg failure MUST NOT auto-rollback earlier legs (git tag push
|
|
cannot be un-pushed safely per §6.1.1)."""
|
|
journal = tmp_path / ".ship-journal.json"
|
|
d1_rolled: list[str] = []
|
|
|
|
def fail_paper() -> dict:
|
|
raise RuntimeError("git push --tags failed")
|
|
|
|
legs = [
|
|
LegPlan(name="d1", forward=lambda: {}, rollback=lambda: d1_rolled.append("d1") or {}),
|
|
LegPlan(name="nextjs", forward=lambda: {}, rollback=lambda: d1_rolled.append("nextjs") or {}),
|
|
LegPlan(name="paper", forward=fail_paper, rollback=None),
|
|
]
|
|
with pytest.raises(ShipError, match="paper-leg failure"):
|
|
run_ship(version="1.0.0", env="prod", journal_path=journal, legs=legs)
|
|
j = ShipJournal.load(journal)
|
|
assert j.outcome is ShipOutcome.FAILED_NEEDS_MANUAL
|
|
# Both earlier legs remain DEPLOYED — no auto-rollback after paper-leg commits.
|
|
assert j.legs[0].state is LegState.DEPLOYED
|
|
assert j.legs[1].state is LegState.DEPLOYED
|
|
assert j.legs[2].state is LegState.FAILED
|
|
assert d1_rolled == []
|
|
|
|
|
|
def test_resume_continues_from_last_successful(tmp_path: Path) -> None:
|
|
"""--resume must pick up from the first non-DEPLOYED leg without re-running
|
|
already-deployed ones (idempotency across operator interruptions)."""
|
|
journal = tmp_path / ".ship-journal.json"
|
|
calls1: list[str] = []
|
|
|
|
def fail_nextjs() -> dict:
|
|
raise RuntimeError("transient fail")
|
|
|
|
legs1 = [
|
|
LegPlan(name="d1", forward=lambda: calls1.append("d1") or {}, rollback=lambda: {}),
|
|
LegPlan(name="nextjs", forward=fail_nextjs, rollback=lambda: {}),
|
|
LegPlan(name="paper", forward=lambda: {}, rollback=None),
|
|
]
|
|
with pytest.raises(ShipError):
|
|
run_ship(version="1.0.0", env="staging", journal_path=journal, legs=legs1)
|
|
|
|
# Resume with a healed nextjs — d1 should NOT be called again because it
|
|
# was rolled back during auto-rollback. Verify by checking the leg state.
|
|
calls2: list[str] = []
|
|
legs2 = [
|
|
LegPlan(name="d1", forward=lambda: calls2.append("d1") or {}, rollback=lambda: {}),
|
|
LegPlan(name="nextjs", forward=lambda: calls2.append("nextjs") or {}, rollback=lambda: {}),
|
|
LegPlan(name="paper", forward=lambda: calls2.append("paper") or {}, rollback=None),
|
|
]
|
|
# After auto-rollback, d1 is ROLLED_BACK so resume should re-deploy it.
|
|
j = run_ship(version="1.0.0", env="staging", journal_path=journal, legs=legs2, resume=True)
|
|
assert j.outcome is ShipOutcome.SUCCESS
|
|
assert calls2 == ["d1", "nextjs", "paper"]
|