Files
cs249r_book/interviews/vault-cli/scripts/check_schema_sync.py
Vijay Janapa Reddi 6ccee10a9d feat(vault-cli): add vault lint + schema drift check
Two new tools.

vault lint <path>
  Author-facing linter. Accepts a single YAML file or a directory.
  Severity levels:
    ERROR    schema violation; question cannot be loaded
    WARN     likely misclassification (zone-level affinity mismatch,
             chain position duplication, etc.)
    INFO     hygiene suggestions (human-review-pending on published Qs)

  Zone-level affinity warning implements paper §3.3 Table 2 (line 397):
  'An L1 question tagged as evaluation is flagged for review, since
  evaluation is cognitively inconsistent with Bloom's Remember level.'
  The warning is soft — marking an outlier does not reject it; it
  surfaces for reviewer judgement. Quickly identifies the ~943 L6+
  questions currently carrying zone=design that should probably be
  zone=mastery.

scripts/check_schema_sync.py
  CI drift check. Compares enum values in schema/enums.py against
  schema/question_schema.yaml (the authoritative LinkML schema) and
  exits non-zero if they disagree. Prevents the three-schema drift
  that caused the v0.1 migration defects from recurring.

  Enums cross-checked: Track, Level, Zone, BloomLevel, Phase, Status,
  Provenance, HumanReviewStatus. Output on success: 'OK: 8 enums in
  sync.' Wire into CI in a follow-up PR.
2026-04-21 18:04:10 -04:00

86 lines
2.6 KiB
Python

#!/usr/bin/env python3
"""Verify enums.py stays in sync with question_schema.yaml.
Runs in CI as a drift check. Exits non-zero if the hand-maintained Python
enum constants disagree with the authoritative LinkML schema.
Usage:
python3 interviews/vault-cli/scripts/check_schema_sync.py
"""
from __future__ import annotations
import sys
from pathlib import Path
import yaml
REPO = Path(__file__).resolve().parents[2]
LINKML = REPO / "vault" / "schema" / "question_schema.yaml"
ENUMS_PY = REPO / "vault" / "schema" / "enums.py"
# Map LinkML enum name -> Python constant name in enums.py.
ENUM_MAPPING = {
"Track": "VALID_TRACKS",
"Level": "VALID_LEVELS",
"Zone": "VALID_ZONES",
"BloomLevel": "VALID_BLOOM_LEVELS",
"Phase": "VALID_PHASES",
"Status": "VALID_STATUSES",
"Provenance": "VALID_PROVENANCES",
"HumanReviewStatus": "VALID_HUMAN_REVIEW_STATUSES",
}
def load_linkml_enums() -> dict[str, set[str]]:
with LINKML.open() as fh:
data = yaml.safe_load(fh)
out = {}
for name, spec in (data.get("enums") or {}).items():
values = set((spec or {}).get("permissible_values") or {})
out[name] = values
return out
def load_python_enums() -> dict[str, set[str]]:
# Import the module directly; avoids running the validator machinery.
spec_dir = ENUMS_PY.parent
if str(spec_dir) not in sys.path:
sys.path.insert(0, str(spec_dir))
import enums # type: ignore[import-not-found]
out: dict[str, set[str]] = {}
for linkml_name, py_name in ENUM_MAPPING.items():
out[linkml_name] = set(getattr(enums, py_name))
return out
def main() -> int:
linkml = load_linkml_enums()
py = load_python_enums()
drift = False
for enum_name in ENUM_MAPPING:
linkml_vals = linkml.get(enum_name, set())
py_vals = py.get(enum_name, set())
if linkml_vals != py_vals:
drift = True
print(f"[drift] {enum_name}:")
only_linkml = linkml_vals - py_vals
only_py = py_vals - linkml_vals
if only_linkml:
print(f" in LinkML only: {sorted(only_linkml)}")
if only_py:
print(f" in enums.py only: {sorted(only_py)}")
if drift:
print()
print("FAIL: schema/enums.py disagrees with schema/question_schema.yaml.")
print("Update enums.py to match the LinkML schema, then re-run.")
return 1
print(f"OK: {len(ENUM_MAPPING)} enums in sync.")
return 0
if __name__ == "__main__":
sys.exit(main())