mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-04-29 00:59:07 -05:00
Complete MLSYSIM v0.1.0 implementation with: - Documentation website (Quarto): landing page with animated hero and capability carousel, 4 tutorials (hello world, LLM serving, distributed training, sustainability), hardware/model/fleet/infra catalogs, solver guide, whitepaper, math foundations, glossary, and full quartodoc API reference - Typed registry system: Hardware (18 devices across 5 tiers), Models (15 workloads), Systems (fleets, clusters, fabrics), Infrastructure (grid profiles, rack configs, datacenters) - Core types: Pint-backed Quantity, Metadata provenance tracking, custom exception hierarchy (OOMError, SLAViolation) - SimulationConfig with YAML/JSON loading and pre-validation - Scenario system tying workloads to systems with SLA constraints - Multi-level evaluation scorecard (feasibility, performance, macro) - Examples, tests, and Jetson Orin NX spec fix (100 → 25 TFLOP/s) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
41 lines
1.3 KiB
Python
41 lines
1.3 KiB
Python
import pytest
|
|
from mlsysim.core.engine import Engine
|
|
from mlsysim.hardware import Hardware
|
|
from mlsysim.models import Models
|
|
from mlsysim.core.exceptions import OOMError
|
|
|
|
def test_engine_single_inference():
|
|
resnet = Models.ResNet50
|
|
a100 = Hardware.A100
|
|
|
|
perf = Engine.solve(resnet, a100, batch_size=1)
|
|
|
|
# Check that performance profile is well-formed
|
|
assert perf.feasible is True
|
|
assert perf.latency.magnitude > 0
|
|
assert perf.throughput.magnitude > 0
|
|
assert perf.bottleneck in ["Compute", "Memory"]
|
|
|
|
def test_engine_oom_exception():
|
|
gpt4 = Models.GPT4
|
|
esp32 = Hardware.Tiny.ESP32
|
|
|
|
# This should be infeasible
|
|
perf = Engine.solve(gpt4, esp32, batch_size=1, raise_errors=False)
|
|
assert perf.feasible is False
|
|
|
|
# This should raise
|
|
with pytest.raises(OOMError):
|
|
Engine.solve(gpt4, esp32, batch_size=1, raise_errors=True)
|
|
|
|
def test_engine_precision_switching():
|
|
resnet = Models.ResNet50
|
|
a100 = Hardware.A100
|
|
|
|
perf_fp16 = Engine.solve(resnet, a100, batch_size=1, precision="fp16")
|
|
perf_fp32 = Engine.solve(resnet, a100, batch_size=1, precision="fp32")
|
|
|
|
# FP32 should have lower peak flops than FP16 tensor core
|
|
assert perf_fp32.peak_flops_actual < perf_fp16.peak_flops_actual
|
|
assert perf_fp32.latency > perf_fp16.latency
|