Files
cs249r_book/mlsysim/tests/test_engine.py
Vijay Janapa Reddi a78f1bd8b0 feat(mlsysim): add documentation site, typed registries, and 6-solver core
Complete MLSYSIM v0.1.0 implementation with:

- Documentation website (Quarto): landing page with animated hero
  and capability carousel, 4 tutorials (hello world, LLM serving,
  distributed training, sustainability), hardware/model/fleet/infra
  catalogs, solver guide, whitepaper, math foundations, glossary,
  and full quartodoc API reference
- Typed registry system: Hardware (18 devices across 5 tiers),
  Models (15 workloads), Systems (fleets, clusters, fabrics),
  Infrastructure (grid profiles, rack configs, datacenters)
- Core types: Pint-backed Quantity, Metadata provenance tracking,
  custom exception hierarchy (OOMError, SLAViolation)
- SimulationConfig with YAML/JSON loading and pre-validation
- Scenario system tying workloads to systems with SLA constraints
- Multi-level evaluation scorecard (feasibility, performance, macro)
- Examples, tests, and Jetson Orin NX spec fix (100 → 25 TFLOP/s)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-07 15:59:51 -05:00

41 lines
1.3 KiB
Python

import pytest
from mlsysim.core.engine import Engine
from mlsysim.hardware import Hardware
from mlsysim.models import Models
from mlsysim.core.exceptions import OOMError
def test_engine_single_inference():
resnet = Models.ResNet50
a100 = Hardware.A100
perf = Engine.solve(resnet, a100, batch_size=1)
# Check that performance profile is well-formed
assert perf.feasible is True
assert perf.latency.magnitude > 0
assert perf.throughput.magnitude > 0
assert perf.bottleneck in ["Compute", "Memory"]
def test_engine_oom_exception():
gpt4 = Models.GPT4
esp32 = Hardware.Tiny.ESP32
# This should be infeasible
perf = Engine.solve(gpt4, esp32, batch_size=1, raise_errors=False)
assert perf.feasible is False
# This should raise
with pytest.raises(OOMError):
Engine.solve(gpt4, esp32, batch_size=1, raise_errors=True)
def test_engine_precision_switching():
resnet = Models.ResNet50
a100 = Hardware.A100
perf_fp16 = Engine.solve(resnet, a100, batch_size=1, precision="fp16")
perf_fp32 = Engine.solve(resnet, a100, batch_size=1, precision="fp32")
# FP32 should have lower peak flops than FP16 tensor core
assert perf_fp32.peak_flops_actual < perf_fp16.peak_flops_actual
assert perf_fp32.latency > perf_fp16.latency