mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-05-07 10:08:50 -05:00
- Add 16 stakeholder messages to Vol2 labs 10, 11, 12, 15 (4 per lab) - Add 32 Plotly hover templates across 6 labs with units and precision - Fix Lab 00 DecisionLog dataflow bug (missing from cell return + signature) - Fix test_widget slider regex to handle Python underscore literals (100_000) - Fix test_widget slider count test: check total controls, not just sliders - Fix test_engine to skip gracefully when marimo is not installed - Standardize 4 chart heights from 420-450px to 380px Tests: 1,326 passed, 28 skipped, 0 failed (full suite)
130 lines
4.5 KiB
Python
130 lines
4.5 KiB
Python
"""
|
|
Level 2: Engine Execution Tests
|
|
================================
|
|
|
|
Runs each lab's cells headlessly via marimo.App.run() to verify:
|
|
- All cells execute without exceptions
|
|
- mlsysim Engine.solve() calls produce valid results
|
|
- Key computed values are within expected ranges
|
|
|
|
These tests are slower (~2-5 sec per lab) but catch runtime errors
|
|
that static analysis misses (e.g., wrong mlsysim API calls, division
|
|
by zero, missing attributes).
|
|
|
|
Usage:
|
|
python3 -m pytest labs/tests/test_engine.py -v
|
|
python3 -m pytest labs/tests/test_engine.py -v -k "vol1"
|
|
python3 -m pytest labs/tests/test_engine.py -v -k "lab_01"
|
|
"""
|
|
|
|
import importlib.util
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
marimo = pytest.importorskip("marimo", reason="marimo not installed — skipping engine tests")
|
|
|
|
REPO_ROOT = Path(__file__).resolve().parents[2]
|
|
|
|
|
|
# ── Helpers ──────────────────────────────────────────────────────────────────
|
|
|
|
def load_app(lab_path: str):
|
|
"""Load a Marimo app from a .py file."""
|
|
import marimo
|
|
spec = importlib.util.spec_from_file_location("lab_module", lab_path)
|
|
mod = importlib.util.module_from_spec(spec)
|
|
spec.loader.exec_module(mod)
|
|
return mod.app
|
|
|
|
|
|
def run_app_safely(lab_path: str):
|
|
"""
|
|
Run a Marimo app and return (outputs, defs).
|
|
Returns (None, error_str) on failure.
|
|
"""
|
|
try:
|
|
app = load_app(lab_path)
|
|
outputs, defs = app.run()
|
|
return outputs, defs
|
|
except Exception as e:
|
|
return None, str(e)
|
|
|
|
|
|
# ── Test: Cell Execution ─────────────────────────────────────────────────────
|
|
|
|
class TestCellExecution:
|
|
"""Run all cells and check for runtime errors."""
|
|
|
|
@pytest.mark.engine
|
|
def test_app_runs_without_error(self, lab_path):
|
|
"""
|
|
The core test: execute all cells via app.run().
|
|
|
|
This catches:
|
|
- ImportError (wrong mlsysim API paths)
|
|
- AttributeError (nonexistent hardware/model attributes)
|
|
- TypeError (wrong Engine.solve() arguments)
|
|
- ZeroDivisionError (bad formulas)
|
|
- Any other runtime exception
|
|
"""
|
|
outputs, defs = run_app_safely(lab_path)
|
|
if outputs is None:
|
|
pytest.fail(f"App execution failed: {defs}")
|
|
|
|
@pytest.mark.engine
|
|
def test_app_produces_outputs(self, lab_path):
|
|
"""App.run() should produce non-empty outputs."""
|
|
outputs, defs = run_app_safely(lab_path)
|
|
if outputs is None:
|
|
pytest.skip(f"App failed to run: {defs}")
|
|
assert len(outputs) > 0, "App produced zero outputs"
|
|
|
|
|
|
|
|
# ── Test: mlsysim API Validation ─────────────────────────────────────────────
|
|
|
|
class TestMlsysimAPI:
|
|
"""Verify that mlsysim calls used in labs actually work."""
|
|
|
|
@pytest.mark.engine
|
|
def test_hardware_registry_accessible(self, mlsysim):
|
|
"""All hardware referenced in labs should exist in registry."""
|
|
hw = mlsysim.Hardware
|
|
# Cloud tier
|
|
assert hw.Cloud.H100 is not None
|
|
assert hw.Cloud.A100 is not None
|
|
# Edge tier
|
|
assert hw.Edge.JetsonOrinNX is not None
|
|
# Tiny tier
|
|
assert hw.Tiny.ESP32 is not None
|
|
|
|
@pytest.mark.engine
|
|
def test_model_registry_accessible(self, mlsysim):
|
|
"""All models referenced in labs should exist in registry."""
|
|
models = mlsysim.Models
|
|
assert models.ResNet50 is not None
|
|
assert models.GPT2 is not None
|
|
assert models.MobileNetV2 is not None
|
|
|
|
@pytest.mark.engine
|
|
def test_engine_solve_basic(self, mlsysim):
|
|
"""Engine.solve() works for a basic inference scenario."""
|
|
result = mlsysim.Engine.solve(
|
|
model=mlsysim.Models.ResNet50,
|
|
hardware=mlsysim.Hardware.Cloud.H100,
|
|
)
|
|
assert result.feasible is True
|
|
assert result.latency is not None
|
|
assert result.bottleneck in ("Compute", "Memory")
|
|
|
|
@pytest.mark.engine
|
|
def test_engine_solve_tiny_oom(self, mlsysim):
|
|
"""Large model on tiny device should be infeasible."""
|
|
result = mlsysim.Engine.solve(
|
|
model=mlsysim.Models.GPT2,
|
|
hardware=mlsysim.Hardware.Tiny.ESP32,
|
|
)
|
|
assert result.feasible is False, "GPT-2 should not fit on ESP32"
|