mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-05-08 02:28:25 -05:00
New test_protocol.py validates 6 protocol invariants from PROTOCOL.md: - Invariant 1: constants sourced from mlsysim registries (not hardcoded) - Invariant 4: multi-part tabbed structure (4-5 parts + synthesis) - Invariant 5: multiple deployment contexts (2-3 hardware tiers) - Zone structure (4 zones: opening, widgets, tabs, ledger) - Ledger integration (ledger.save with correct chapter number) - Pedagogical flow (predictions per part, mo.stop gates, stakeholder msgs) Known gaps surface as xfail, not hard failures — provides a quality dashboard without blocking CI while labs are brought up to protocol.
375 lines
16 KiB
Python
375 lines
16 KiB
Python
"""
|
|
Level 4: Protocol Invariant Tests
|
|
==================================
|
|
|
|
Validates that each lab complies with the 6 Protocol Invariants
|
|
defined in .claude/docs/labs/PROTOCOL.md.
|
|
|
|
These are structural quality gates — they verify pedagogical
|
|
completeness rather than runtime correctness.
|
|
|
|
Invariant 1: Every Number Has a Source
|
|
Invariant 2: Structured Predictions (tested in test_static + test_widget)
|
|
Invariant 3: Failure States Mandatory (tested in test_static)
|
|
Invariant 4: Multi-Part Tabbed Structure (4-5 Parts + Synthesis)
|
|
Invariant 5: 2-3 Deployment Contexts
|
|
Invariant 6: No Instruments Before Chapter Introduction (manual only)
|
|
|
|
Usage:
|
|
python3 -m pytest labs/tests/test_protocol.py -v
|
|
python3 -m pytest labs/tests/test_protocol.py -v -k "vol1"
|
|
"""
|
|
|
|
import ast
|
|
import re
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
REPO_ROOT = Path(__file__).resolve().parents[2]
|
|
|
|
# Labs that are exempt from standard protocol checks
|
|
ORIENTATION_LABS = {"lab_00_introduction"}
|
|
|
|
|
|
# ── Helpers ──────────────────────────────────────────────────────────────────
|
|
|
|
def read_source(lab_path: str) -> str:
|
|
with open(lab_path) as f:
|
|
return f.read()
|
|
|
|
|
|
def lab_stem(lab_path: str) -> str:
|
|
return Path(lab_path).stem
|
|
|
|
|
|
def is_orientation(lab_path: str) -> bool:
|
|
return lab_stem(lab_path) in ORIENTATION_LABS
|
|
|
|
|
|
def extract_lab_number(lab_path: str) -> int | None:
|
|
"""Extract the lab number from the filename (e.g., lab_05_foo -> 5)."""
|
|
m = re.search(r"lab_(\d+)_", Path(lab_path).name)
|
|
return int(m.group(1)) if m else None
|
|
|
|
|
|
def count_builder_functions(source: str) -> tuple[int, bool]:
|
|
"""Count build_part_X() functions and whether build_synthesis() exists."""
|
|
parts = len(re.findall(r"def build_part_\w+\(", source))
|
|
has_synthesis = "def build_synthesis(" in source
|
|
return parts, has_synthesis
|
|
|
|
|
|
def extract_tab_keys(source: str) -> list[str]:
|
|
"""Extract tab key strings from mo.ui.tabs({...}) calls."""
|
|
keys = []
|
|
# Match string keys in mo.ui.tabs({ "key": ..., "key": ... })
|
|
# Find the mo.ui.tabs block
|
|
tabs_match = re.search(r"mo\.ui\.tabs\(\{(.+?)\}\)", source, re.DOTALL)
|
|
if tabs_match:
|
|
block = tabs_match.group(1)
|
|
keys = re.findall(r'"([^"]+)"(?:\s*:)', block)
|
|
return keys
|
|
|
|
|
|
def extract_hardware_references(source: str) -> set[str]:
|
|
"""Extract Hardware.Tier.Device references from source."""
|
|
return set(re.findall(r"Hardware\.(\w+\.\w+)", source))
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
# INVARIANT 1: Every Number Has a Source
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
|
|
class TestNumberSources:
|
|
"""Constants should come from mlsysim registries, not hardcoded."""
|
|
|
|
@pytest.mark.protocol
|
|
def test_no_hardcoded_hardware_specs(self, lab_path):
|
|
"""
|
|
Catch hardcoded hardware specs that should come from mlsysim.Hardware.
|
|
|
|
Flags suspicious large numbers that look like FLOPS, bandwidth, or
|
|
memory capacity values. These should be sourced from the registry.
|
|
"""
|
|
if is_orientation(lab_path):
|
|
pytest.skip("Lab 00 is orientation")
|
|
|
|
source = read_source(lab_path)
|
|
tree = ast.parse(source)
|
|
|
|
suspicious = []
|
|
for node in ast.walk(tree):
|
|
if isinstance(node, ast.Constant) and isinstance(node.value, (int, float)):
|
|
val = node.value
|
|
# Flag numbers that look like hardware specs
|
|
# 80 GB = 80_000_000_000, 2 TB/s = 2_000_000_000_000, etc.
|
|
if val >= 1_000_000_000 and isinstance(val, int):
|
|
# Check if it's inside a string (skip those)
|
|
suspicious.append(
|
|
f"Line {node.lineno}: large constant {val:,} — "
|
|
f"should this come from mlsysim.Hardware?"
|
|
)
|
|
|
|
# This is a warning, not a hard fail — some large constants are legitimate
|
|
if len(suspicious) > 5:
|
|
pytest.xfail(
|
|
f"Found {len(suspicious)} large numeric constants. "
|
|
f"Consider sourcing from mlsysim.Hardware:\n"
|
|
+ "\n".join(suspicious[:5])
|
|
)
|
|
|
|
@pytest.mark.protocol
|
|
def test_uses_mlsysim_engine(self, lab_path):
|
|
"""Labs should use Engine.solve() or Hardware/Models registries."""
|
|
if is_orientation(lab_path):
|
|
pytest.skip("Lab 00 is orientation")
|
|
|
|
source = read_source(lab_path)
|
|
uses_registry = (
|
|
"Hardware." in source
|
|
or "Models." in source
|
|
or "Engine.solve" in source
|
|
)
|
|
if not uses_registry:
|
|
pytest.xfail(
|
|
"Lab does not reference mlsysim Hardware, Models, or Engine. "
|
|
"Constants should come from the registry, not be hardcoded."
|
|
)
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
# INVARIANT 4: Multi-Part Tabbed Structure
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
|
|
class TestTabbedStructure:
|
|
"""Labs must have 4-5 parts + synthesis in mo.ui.tabs."""
|
|
|
|
@pytest.mark.protocol
|
|
def test_minimum_parts(self, lab_path):
|
|
"""Every lab (except lab_00) must have at least 3 build_part functions."""
|
|
if is_orientation(lab_path):
|
|
pytest.skip("Lab 00 is orientation")
|
|
|
|
source = read_source(lab_path)
|
|
parts, _ = count_builder_functions(source)
|
|
if parts < 4:
|
|
pytest.xfail(
|
|
f"Only {parts} build_part functions (protocol requires 4-5). "
|
|
f"Lab may need additional parts."
|
|
)
|
|
|
|
@pytest.mark.protocol
|
|
def test_has_synthesis(self, lab_path):
|
|
"""Every lab (except lab_00) must have a build_synthesis() function."""
|
|
if is_orientation(lab_path):
|
|
pytest.skip("Lab 00 is orientation")
|
|
|
|
source = read_source(lab_path)
|
|
_, has_synthesis = count_builder_functions(source)
|
|
assert has_synthesis, "Missing build_synthesis() function"
|
|
|
|
@pytest.mark.protocol
|
|
def test_tabs_contain_parts(self, lab_path):
|
|
"""The mo.ui.tabs dict should have Part keys matching builder functions."""
|
|
if is_orientation(lab_path):
|
|
pytest.skip("Lab 00 is orientation")
|
|
|
|
source = read_source(lab_path)
|
|
tab_keys = extract_tab_keys(source)
|
|
if not tab_keys:
|
|
pytest.skip("Could not parse tab keys")
|
|
|
|
part_tabs = [k for k in tab_keys if k.startswith("Part")]
|
|
if len(part_tabs) < 3:
|
|
pytest.xfail(
|
|
f"Only {len(part_tabs)} Part tabs found in mo.ui.tabs. "
|
|
f"Protocol requires 4-5. Found: {tab_keys}"
|
|
)
|
|
|
|
@pytest.mark.protocol
|
|
def test_tabs_include_synthesis(self, lab_path):
|
|
"""The mo.ui.tabs dict should include a Synthesis tab."""
|
|
if is_orientation(lab_path):
|
|
pytest.skip("Lab 00 is orientation")
|
|
|
|
source = read_source(lab_path)
|
|
tab_keys = extract_tab_keys(source)
|
|
if not tab_keys:
|
|
pytest.skip("Could not parse tab keys")
|
|
|
|
has_synth = any("synth" in k.lower() or "graduation" in k.lower() for k in tab_keys)
|
|
assert has_synth, (
|
|
f"No Synthesis/Graduation tab found. Tabs: {tab_keys}"
|
|
)
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
# INVARIANT 5: Deployment Contexts
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
|
|
class TestDeploymentContexts:
|
|
"""Labs should reference multiple hardware tiers for comparison."""
|
|
|
|
@pytest.mark.protocol
|
|
def test_multiple_hardware_tiers(self, lab_path):
|
|
"""Labs should reference at least 2 hardware tiers (Cloud/Edge/Tiny)."""
|
|
if is_orientation(lab_path):
|
|
pytest.skip("Lab 00 is orientation")
|
|
|
|
source = read_source(lab_path)
|
|
hw_refs = extract_hardware_references(source)
|
|
tiers = {ref.split(".")[0] for ref in hw_refs}
|
|
|
|
if len(tiers) < 2:
|
|
pytest.xfail(
|
|
f"Only {len(tiers)} hardware tier(s) referenced: {tiers}. "
|
|
f"Protocol recommends 2-3 deployment contexts."
|
|
)
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
# ZONE STRUCTURE
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
|
|
class TestZoneStructure:
|
|
"""Labs should follow the 4-zone architecture."""
|
|
|
|
@pytest.mark.protocol
|
|
def test_has_zone_comments(self, lab_path):
|
|
"""Labs should have ZONE A/B/C/D section markers."""
|
|
if is_orientation(lab_path):
|
|
pytest.skip("Lab 00 is orientation")
|
|
|
|
source = read_source(lab_path)
|
|
zones_found = re.findall(r"ZONE [A-D]", source)
|
|
unique_zones = set(zones_found)
|
|
if len(unique_zones) < 3:
|
|
pytest.xfail(
|
|
f"Only {len(unique_zones)} zone markers found: {unique_zones}. "
|
|
f"Protocol expects 4 zones (A: Opening, B: Widgets, C: Tabs, D: Ledger)."
|
|
)
|
|
|
|
@pytest.mark.protocol
|
|
def test_has_ledger_hud(self, lab_path):
|
|
"""Labs should have a ledger HUD footer."""
|
|
if is_orientation(lab_path):
|
|
pytest.skip("Lab 00 is orientation")
|
|
|
|
source = read_source(lab_path)
|
|
has_hud = "lab-hud" in source or "LEDGER" in source
|
|
assert has_hud, "Missing ledger HUD footer (class='lab-hud' or LEDGER zone)"
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
# LEDGER INTEGRATION
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
|
|
class TestLedgerIntegration:
|
|
"""Labs should save student decisions to the Design Ledger."""
|
|
|
|
@pytest.mark.protocol
|
|
def test_has_ledger_save(self, lab_path):
|
|
"""Every lab should call ledger.save() to record student decisions."""
|
|
if is_orientation(lab_path):
|
|
pytest.skip("Lab 00 is orientation")
|
|
|
|
source = read_source(lab_path)
|
|
if "ledger.save" not in source:
|
|
pytest.xfail(
|
|
"Missing ledger.save() call. "
|
|
"Protocol requires recording student design decisions."
|
|
)
|
|
|
|
@pytest.mark.protocol
|
|
def test_ledger_chapter_matches_filename(self, lab_path):
|
|
"""ledger.save(chapter=N) should match the lab number in the filename."""
|
|
if is_orientation(lab_path):
|
|
pytest.skip("Lab 00 is orientation")
|
|
|
|
source = read_source(lab_path)
|
|
lab_num = extract_lab_number(lab_path)
|
|
if lab_num is None:
|
|
pytest.skip("Could not extract lab number from filename")
|
|
|
|
# Find ledger.save(chapter=N, ...) calls
|
|
chapter_matches = re.findall(r"ledger\.save\(chapter=(\d+)", source)
|
|
if not chapter_matches:
|
|
pytest.skip("No ledger.save() call found")
|
|
|
|
for ch in chapter_matches:
|
|
assert int(ch) == lab_num, (
|
|
f"ledger.save(chapter={ch}) does not match lab number {lab_num}. "
|
|
f"File: {Path(lab_path).name}"
|
|
)
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
# PEDAGOGICAL FLOW
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
|
|
class TestPedagogicalFlow:
|
|
"""Verify the predict → discover → explain flow exists per part."""
|
|
|
|
@pytest.mark.protocol
|
|
def test_prediction_count_matches_parts(self, lab_path):
|
|
"""Each part should have at least one prediction widget."""
|
|
if is_orientation(lab_path):
|
|
pytest.skip("Lab 00 is orientation")
|
|
|
|
source = read_source(lab_path)
|
|
parts, _ = count_builder_functions(source)
|
|
predictions = len(re.findall(
|
|
r"mo\.ui\.(?:radio|number|dropdown)", source
|
|
))
|
|
|
|
if predictions < parts:
|
|
pytest.xfail(
|
|
f"{predictions} prediction widgets for {parts} parts. "
|
|
f"Protocol recommends at least one prediction per part."
|
|
)
|
|
|
|
@pytest.mark.protocol
|
|
def test_mo_stop_gates_exist(self, lab_path):
|
|
"""Labs should gate instruments behind predictions using mo.stop."""
|
|
if is_orientation(lab_path):
|
|
pytest.skip("Lab 00 is orientation")
|
|
|
|
source = read_source(lab_path)
|
|
stop_count = source.count("mo.stop(")
|
|
parts, _ = count_builder_functions(source)
|
|
|
|
if stop_count == 0:
|
|
pytest.xfail("No mo.stop() gates found. Instruments should be gated behind predictions.")
|
|
elif stop_count < parts - 1:
|
|
# Allow synthesis to not have a gate
|
|
pytest.xfail(
|
|
f"Only {stop_count} mo.stop() gates for {parts} parts. "
|
|
f"Most parts should gate instruments behind predictions."
|
|
)
|
|
|
|
@pytest.mark.protocol
|
|
def test_has_stakeholder_messages(self, lab_path):
|
|
"""Labs should have stakeholder messages framing each part."""
|
|
if is_orientation(lab_path):
|
|
pytest.skip("Lab 00 is orientation")
|
|
|
|
source = read_source(lab_path)
|
|
# Stakeholder messages use a colored left-border callout pattern
|
|
stakeholder_markers = [
|
|
"border-left:",
|
|
"border-left-color:",
|
|
"stakeholder",
|
|
"Stakeholder",
|
|
"STAKEHOLDER",
|
|
"📧",
|
|
"📋",
|
|
"💼",
|
|
]
|
|
has_stakeholder = any(marker in source for marker in stakeholder_markers)
|
|
if not has_stakeholder:
|
|
pytest.xfail(
|
|
"No stakeholder message pattern found. "
|
|
"Protocol requires a colored left-border callout framing each part."
|
|
)
|