cs249r_book/labs/tests/test_engine.py

"""
Level 2: Engine Execution Tests
================================

Runs each lab's cells headlessly via marimo.App.run() to verify:
  - All cells execute without exceptions
  - mlsysim Engine.solve() calls produce valid results
  - Key computed values are within expected ranges

These tests are slower (~2-5 sec per lab) but catch runtime errors
that static analysis misses (e.g., wrong mlsysim API calls, division
by zero, missing attributes).

Usage:
  python3 -m pytest labs/tests/test_engine.py -v
  python3 -m pytest labs/tests/test_engine.py -v -k "vol1"
  python3 -m pytest labs/tests/test_engine.py -v -k "lab_01"
"""

import importlib.util
import sys
from pathlib import Path

import pytest

marimo = pytest.importorskip("marimo", reason="marimo not installed — skipping engine tests")

REPO_ROOT = Path(__file__).resolve().parents[2]


# ── Helpers ──────────────────────────────────────────────────────────────────

def load_app(lab_path: str):
    """Load a Marimo app from a .py file."""
    import marimo
    spec = importlib.util.spec_from_file_location("lab_module", lab_path)
    mod = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(mod)
    return mod.app


def run_app_safely(lab_path: str):
    """
    Run a Marimo app and return (outputs, defs).
    Returns (None, error_str) on failure.
    """
    try:
        app = load_app(lab_path)
        outputs, defs = app.run()
        return outputs, defs
    except Exception as e:
        return None, str(e)


# ── Test: Cell Execution ─────────────────────────────────────────────────────

class TestCellExecution:
    """Run all cells and check for runtime errors."""

    @pytest.mark.engine
    def test_app_runs_without_error(self, lab_path):
        """
        The core test: execute all cells via app.run().

        This catches:
        - ImportError (wrong mlsysim API paths)
        - AttributeError (nonexistent hardware/model attributes)
        - TypeError (wrong Engine.solve() arguments)
        - ZeroDivisionError (bad formulas)
        - Any other runtime exception
        """
        outputs, defs = run_app_safely(lab_path)
        if outputs is None:
            pytest.fail(f"App execution failed: {defs}")

    @pytest.mark.engine
    def test_app_produces_outputs(self, lab_path):
        """App.run() should produce non-empty outputs."""
        outputs, defs = run_app_safely(lab_path)
        if outputs is None:
            pytest.skip(f"App failed to run: {defs}")
        assert len(outputs) > 0, "App produced zero outputs"


# ── Test: mlsysim API Validation ─────────────────────────────────────────────

class TestMlsysimAPI:
    """Verify that mlsysim calls used in labs actually work."""

    @pytest.mark.engine
    def test_hardware_registry_accessible(self, mlsysim):
        """All hardware referenced in labs should exist in registry."""
        hw = mlsysim.Hardware
        # Cloud tier
        assert hw.Cloud.H100 is not None
        assert hw.Cloud.A100 is not None
        # Edge tier
        assert hw.Edge.JetsonOrinNX is not None
        # Tiny tier
        assert hw.Tiny.ESP32 is not None

    @pytest.mark.engine
    def test_model_registry_accessible(self, mlsysim):
        """All models referenced in labs should exist in registry."""
        models = mlsysim.Models
        assert models.ResNet50 is not None
        assert models.GPT2 is not None
        assert models.MobileNetV2 is not None

    @pytest.mark.engine
    def test_engine_solve_basic(self, mlsysim):
        """Engine.solve() works for a basic inference scenario."""
        result = mlsysim.Engine.solve(
            model=mlsysim.Models.ResNet50,
            hardware=mlsysim.Hardware.Cloud.H100,
        )
        assert result.feasible is True
        assert result.latency is not None
        assert result.bottleneck in ("Compute", "Memory")

    @pytest.mark.engine
    def test_engine_solve_tiny_oom(self, mlsysim):
        """Large model on tiny device should be infeasible."""
        result = mlsysim.Engine.solve(
            model=mlsysim.Models.GPT2,
            hardware=mlsysim.Hardware.Tiny.ESP32,
        )
        assert result.feasible is False, "GPT-2 should not fit on ESP32"