mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-03-09 07:15:51 -05:00
Complete MLSYSIM v0.1.0 implementation with: - Documentation website (Quarto): landing page with animated hero and capability carousel, 4 tutorials (hello world, LLM serving, distributed training, sustainability), hardware/model/fleet/infra catalogs, solver guide, whitepaper, math foundations, glossary, and full quartodoc API reference - Typed registry system: Hardware (18 devices across 5 tiers), Models (15 workloads), Systems (fleets, clusters, fabrics), Infrastructure (grid profiles, rack configs, datacenters) - Core types: Pint-backed Quantity, Metadata provenance tracking, custom exception hierarchy (OOMError, SLAViolation) - SimulationConfig with YAML/JSON loading and pre-validation - Scenario system tying workloads to systems with SLA constraints - Multi-level evaluation scorecard (feasibility, performance, macro) - Examples, tests, and Jetson Orin NX spec fix (100 → 25 TFLOP/s) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
42 lines
1.3 KiB
Python
42 lines
1.3 KiB
Python
import pytest
|
|
from mlsysim.core.solver import DistributedSolver, ReliabilitySolver, EconomicsSolver
|
|
from mlsysim.models import Models
|
|
from mlsysim.systems import Systems
|
|
from mlsysim.infra import Infra
|
|
|
|
def test_distributed_solver():
|
|
solver = DistributedSolver()
|
|
gpt3 = Models.GPT3
|
|
cluster = Systems.Clusters.Research_256
|
|
|
|
result = solver.solve(gpt3, cluster, batch_size=32)
|
|
assert "node_performance" in result
|
|
assert "communication_latency" in result
|
|
assert "scaling_efficiency" in result
|
|
|
|
assert result["scaling_efficiency"] > 0.0
|
|
assert result["scaling_efficiency"] <= 1.0
|
|
|
|
def test_reliability_solver():
|
|
solver = ReliabilitySolver()
|
|
cluster = Systems.Clusters.Frontier_8K
|
|
|
|
result = solver.solve(cluster, job_duration_hours=100.0)
|
|
assert "fleet_mtbf" in result
|
|
assert "failure_probability" in result
|
|
assert "optimal_checkpoint_interval" in result
|
|
|
|
assert result["failure_probability"] > 0.0
|
|
|
|
def test_economics_solver():
|
|
solver = EconomicsSolver()
|
|
cluster = Systems.Clusters.Research_256
|
|
grid = Infra.Quebec
|
|
|
|
result = solver.solve(cluster, duration_days=30, grid=grid)
|
|
assert "tco_usd" in result
|
|
assert "carbon_footprint_kg" in result
|
|
|
|
assert result["tco_usd"] > 0
|
|
assert result["carbon_footprint_kg"] > 0
|