Files
Vijay Janapa Reddi 4aae33c036 test+ci: green test matrix + lint-clean + real vitest + committed lockfile
LOCAL TEST RESULTS (all green):
  pytest:  34 passed in 0.19s (28 existing + 6 new command tests)
  ruff:    All checks passed  (0 errors)
  vitest:  7 passed in 127ms (worker contract tests)
  CLI e2e: vault --version / build / verify / stats / doctor / diff /
           export-paper / ship --dry-run / publish + verify rc1 / api shim
           via curl against 9199-question corpus — all green

Python-side fixes:
- interviews/vault-cli/pyproject.toml: ruff config now has principled
  per-file-ignores for B008 (Typer pattern), N806 (DAG cycle colors),
  E402 (scripts), SIM118 (sqlite3.Row iterator). Keeps signal tight.
- 13 real ruff violations fixed across authoring.py (contextlib.suppress),
  diff_cmd.py + serve_api.py (dict(sqlite3.Row) instead of broken
  .keys() iteration), policy.py (direct return), release.py (zip
  strict=True, update_latest_symlink now validates target exists;
  previous 'target' variable was unused), commands/release.py
  (import order reshuffled, ambiguous 'l' renamed).
- commands/release.py ship_cmd leg-skip uses 'leg' not 'l'.

New pytest file: interviews/vault-cli/tests/test_commands.py (+6 tests)
  - stats: JSON shape + Prometheus format.
  - diff: add/remove/modify detection + classification.
  - doctor: graceful skip on missing vault; unknown --check returns
    USAGE_ERROR.
  - codegen: --check passes against baseline.

Worker-side fixes:
- src/index.ts cachedOrCompute graceful-degrades when caches global
  isn't available (Node test env, future-proofing against runtime
  regressions).
- src/index.ts handleSearch: 'query: q' → 'query: qRaw' (q was
  renamed earlier).
- src/rate_limit.ts: removed unused WINDOW_MS const.
- tests/worker.test.ts: vi.resetModules() between tests so
  module-level schemaOk/lastSeenRelease state doesn't leak
  across test cases (fingerprint memoization was sticky).
- package.json: added test:watch + lint aliases.
- .gitignore: node_modules, .wrangler, dist, .dev.vars.
- package-lock.json committed (npm — pnpm not on the machine; CI
  updated to use npm ci).

CI (.github/workflows/vault-ci.yml):
- Split into python + worker jobs.
- Python job: ruff + mypy (non-blocking) + pytest + vault check
  --strict + vault build release_hash regression + vault codegen
  --check + registry append-only + exemplar audit staleness.
- Worker job: node 20 + npm ci + tsc typecheck + vitest run.
- Triggers now include staffml-vault-types path (keeps CI honest
  when shared-types drift).

What runs vs what's gated on user:
  RAN LOCALLY: pytest, ruff, vitest, tsc, CLI end-to-end smoke
              (build→verify→export→stats→doctor→diff→publish
              rc→api-shim→ship --dry-run), full corpus invariants.
  GATED ON USER (requires Cloudflare credentials):
    - wrangler login + wrangler d1 create
    - wrangler d1 execute (schema + seed)
    - pnpm/npm deploy:staging
    - FTS5 production load-test
    - vault ship --env production (live D1 + Next.js + tag push)

Everything that CAN be verified without credentials HAS been.
2026-04-16 14:30:20 -04:00

109 lines
4.6 KiB
Python

"""Tests for vault ship commit protocol (§6.1.1, Dean R3-NH-1)."""
from __future__ import annotations
from pathlib import Path
import pytest
from vault_cli.ship import LegPlan, LegState, ShipError, ShipJournal, ShipOutcome, run_ship
def test_all_legs_succeed(tmp_path: Path) -> None:
journal = tmp_path / ".ship-journal.json"
calls: list[str] = []
legs = [
LegPlan(name="d1", forward=lambda: calls.append("d1") or {}, rollback=lambda: {}),
LegPlan(name="nextjs", forward=lambda: calls.append("nextjs") or {}, rollback=lambda: {}),
LegPlan(name="paper", forward=lambda: calls.append("paper") or {}, rollback=None),
]
j = run_ship(version="1.0.0", env="staging", journal_path=journal, legs=legs)
assert j.outcome is ShipOutcome.SUCCESS
assert j.point_of_no_return is True
assert [leg.state for leg in j.legs] == [LegState.DEPLOYED] * 3
assert calls == ["d1", "nextjs", "paper"]
def test_pre_paper_failure_auto_rolls_back(tmp_path: Path) -> None:
"""nextjs leg fails → d1 must be rolled back in reverse order."""
journal = tmp_path / ".ship-journal.json"
rollback_order: list[str] = []
def fail_nextjs() -> dict:
raise RuntimeError("next.js crashed")
legs = [
LegPlan(
name="d1",
forward=lambda: {},
rollback=lambda: rollback_order.append("d1") or {},
),
LegPlan(name="nextjs", forward=fail_nextjs, rollback=lambda: {}),
LegPlan(name="paper", forward=lambda: {}, rollback=None),
]
with pytest.raises(ShipError, match="auto-rolled back"):
run_ship(version="1.0.0", env="staging", journal_path=journal, legs=legs)
j = ShipJournal.load(journal)
assert j.outcome is ShipOutcome.FAILED_AUTO_ROLLED_BACK
assert j.point_of_no_return is False
assert j.legs[0].state is LegState.ROLLED_BACK
assert j.legs[1].state is LegState.FAILED
assert j.legs[2].state is LegState.PENDING
assert rollback_order == ["d1"]
def test_paper_leg_failure_needs_manual(tmp_path: Path) -> None:
"""paper-leg failure MUST NOT auto-rollback earlier legs (git tag push
cannot be un-pushed safely per §6.1.1)."""
journal = tmp_path / ".ship-journal.json"
d1_rolled: list[str] = []
def fail_paper() -> dict:
raise RuntimeError("git push --tags failed")
legs = [
LegPlan(name="d1", forward=lambda: {}, rollback=lambda: d1_rolled.append("d1") or {}),
LegPlan(name="nextjs", forward=lambda: {}, rollback=lambda: d1_rolled.append("nextjs") or {}),
LegPlan(name="paper", forward=fail_paper, rollback=None),
]
with pytest.raises(ShipError, match="paper-leg failure"):
run_ship(version="1.0.0", env="prod", journal_path=journal, legs=legs)
j = ShipJournal.load(journal)
assert j.outcome is ShipOutcome.FAILED_NEEDS_MANUAL
# Both earlier legs remain DEPLOYED — no auto-rollback after paper-leg commits.
assert j.legs[0].state is LegState.DEPLOYED
assert j.legs[1].state is LegState.DEPLOYED
assert j.legs[2].state is LegState.FAILED
assert d1_rolled == []
def test_resume_continues_from_last_successful(tmp_path: Path) -> None:
"""--resume must pick up from the first non-DEPLOYED leg without re-running
already-deployed ones (idempotency across operator interruptions)."""
journal = tmp_path / ".ship-journal.json"
calls1: list[str] = []
def fail_nextjs() -> dict:
raise RuntimeError("transient fail")
legs1 = [
LegPlan(name="d1", forward=lambda: calls1.append("d1") or {}, rollback=lambda: {}),
LegPlan(name="nextjs", forward=fail_nextjs, rollback=lambda: {}),
LegPlan(name="paper", forward=lambda: {}, rollback=None),
]
with pytest.raises(ShipError):
run_ship(version="1.0.0", env="staging", journal_path=journal, legs=legs1)
# Resume with a healed nextjs — d1 should NOT be called again because it
# was rolled back during auto-rollback. Verify by checking the leg state.
calls2: list[str] = []
legs2 = [
LegPlan(name="d1", forward=lambda: calls2.append("d1") or {}, rollback=lambda: {}),
LegPlan(name="nextjs", forward=lambda: calls2.append("nextjs") or {}, rollback=lambda: {}),
LegPlan(name="paper", forward=lambda: calls2.append("paper") or {}, rollback=None),
]
# After auto-rollback, d1 is ROLLED_BACK so resume should re-deploy it.
j = run_ship(version="1.0.0", env="staging", journal_path=journal, legs=legs2, resume=True)
assert j.outcome is ShipOutcome.SUCCESS
assert calls2 == ["d1", "nextjs", "paper"]