Files
cs249r_book/labs/tests/test_engine.py
Vijay Janapa Reddi 7fdb49ee5c feat(labs): gold-standard polish — stakeholders, hover templates, test fixes
- Add 16 stakeholder messages to Vol2 labs 10, 11, 12, 15 (4 per lab)
- Add 32 Plotly hover templates across 6 labs with units and precision
- Fix Lab 00 DecisionLog dataflow bug (missing from cell return + signature)
- Fix test_widget slider regex to handle Python underscore literals (100_000)
- Fix test_widget slider count test: check total controls, not just sliders
- Fix test_engine to skip gracefully when marimo is not installed
- Standardize 4 chart heights from 420-450px to 380px

Tests: 1,326 passed, 28 skipped, 0 failed (full suite)
2026-04-02 07:14:13 -04:00

130 lines
4.5 KiB
Python

"""
Level 2: Engine Execution Tests
================================
Runs each lab's cells headlessly via marimo.App.run() to verify:
- All cells execute without exceptions
- mlsysim Engine.solve() calls produce valid results
- Key computed values are within expected ranges
These tests are slower (~2-5 sec per lab) but catch runtime errors
that static analysis misses (e.g., wrong mlsysim API calls, division
by zero, missing attributes).
Usage:
python3 -m pytest labs/tests/test_engine.py -v
python3 -m pytest labs/tests/test_engine.py -v -k "vol1"
python3 -m pytest labs/tests/test_engine.py -v -k "lab_01"
"""
import importlib.util
import sys
from pathlib import Path
import pytest
marimo = pytest.importorskip("marimo", reason="marimo not installed — skipping engine tests")
REPO_ROOT = Path(__file__).resolve().parents[2]
# ── Helpers ──────────────────────────────────────────────────────────────────
def load_app(lab_path: str):
"""Load a Marimo app from a .py file."""
import marimo
spec = importlib.util.spec_from_file_location("lab_module", lab_path)
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
return mod.app
def run_app_safely(lab_path: str):
"""
Run a Marimo app and return (outputs, defs).
Returns (None, error_str) on failure.
"""
try:
app = load_app(lab_path)
outputs, defs = app.run()
return outputs, defs
except Exception as e:
return None, str(e)
# ── Test: Cell Execution ─────────────────────────────────────────────────────
class TestCellExecution:
"""Run all cells and check for runtime errors."""
@pytest.mark.engine
def test_app_runs_without_error(self, lab_path):
"""
The core test: execute all cells via app.run().
This catches:
- ImportError (wrong mlsysim API paths)
- AttributeError (nonexistent hardware/model attributes)
- TypeError (wrong Engine.solve() arguments)
- ZeroDivisionError (bad formulas)
- Any other runtime exception
"""
outputs, defs = run_app_safely(lab_path)
if outputs is None:
pytest.fail(f"App execution failed: {defs}")
@pytest.mark.engine
def test_app_produces_outputs(self, lab_path):
"""App.run() should produce non-empty outputs."""
outputs, defs = run_app_safely(lab_path)
if outputs is None:
pytest.skip(f"App failed to run: {defs}")
assert len(outputs) > 0, "App produced zero outputs"
# ── Test: mlsysim API Validation ─────────────────────────────────────────────
class TestMlsysimAPI:
"""Verify that mlsysim calls used in labs actually work."""
@pytest.mark.engine
def test_hardware_registry_accessible(self, mlsysim):
"""All hardware referenced in labs should exist in registry."""
hw = mlsysim.Hardware
# Cloud tier
assert hw.Cloud.H100 is not None
assert hw.Cloud.A100 is not None
# Edge tier
assert hw.Edge.JetsonOrinNX is not None
# Tiny tier
assert hw.Tiny.ESP32 is not None
@pytest.mark.engine
def test_model_registry_accessible(self, mlsysim):
"""All models referenced in labs should exist in registry."""
models = mlsysim.Models
assert models.ResNet50 is not None
assert models.GPT2 is not None
assert models.MobileNetV2 is not None
@pytest.mark.engine
def test_engine_solve_basic(self, mlsysim):
"""Engine.solve() works for a basic inference scenario."""
result = mlsysim.Engine.solve(
model=mlsysim.Models.ResNet50,
hardware=mlsysim.Hardware.Cloud.H100,
)
assert result.feasible is True
assert result.latency is not None
assert result.bottleneck in ("Compute", "Memory")
@pytest.mark.engine
def test_engine_solve_tiny_oom(self, mlsysim):
"""Large model on tiny device should be infeasible."""
result = mlsysim.Engine.solve(
model=mlsysim.Models.GPT2,
hardware=mlsysim.Hardware.Tiny.ESP32,
)
assert result.feasible is False, "GPT-2 should not fit on ESP32"