mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-03-11 17:49:25 -05:00
Complete MLSYSIM v0.1.0 implementation with: - Documentation website (Quarto): landing page with animated hero and capability carousel, 4 tutorials (hello world, LLM serving, distributed training, sustainability), hardware/model/fleet/infra catalogs, solver guide, whitepaper, math foundations, glossary, and full quartodoc API reference - Typed registry system: Hardware (18 devices across 5 tiers), Models (15 workloads), Systems (fleets, clusters, fabrics), Infrastructure (grid profiles, rack configs, datacenters) - Core types: Pint-backed Quantity, Metadata provenance tracking, custom exception hierarchy (OOMError, SLAViolation) - SimulationConfig with YAML/JSON loading and pre-validation - Scenario system tying workloads to systems with SLA constraints - Multi-level evaluation scorecard (feasibility, performance, macro) - Examples, tests, and Jetson Orin NX spec fix (100 → 25 TFLOP/s) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
49 lines
1.6 KiB
Python
49 lines
1.6 KiB
Python
"""
|
|
Sustainability Lab: Carbon-Aware Fleet Design
|
|
=============================================
|
|
This lab teaches students how to model the 'Hierarchy of Environment'
|
|
by comparing the same GPU fleet across different regional grids.
|
|
"""
|
|
|
|
import mlsysim
|
|
from mlsysim.infra.types import Datacenter
|
|
|
|
def main():
|
|
print("Scenario: Training Llama-3-70B on 512 H100 GPUs for 30 days\n")
|
|
|
|
# 1. Setup the Fleet
|
|
node = mlsysim.Systems.Nodes.DGX_H100
|
|
fleet = mlsysim.Fleet(
|
|
name="Frontier Training Cluster",
|
|
node=node,
|
|
count=64, # 64 nodes * 8 GPUs = 512 GPUs
|
|
fabric=mlsysim.Systems.Fabrics.InfiniBand_NDR
|
|
)
|
|
|
|
# 2. Define our Experimental Regions
|
|
experiments = [
|
|
{"name": "Poland (Coal-Heavy)", "grid": mlsysim.Infra.Grids.Poland},
|
|
{"name": "Quebec (Hydro-Clean)", "grid": mlsysim.Infra.Grids.Quebec}
|
|
]
|
|
|
|
print(f"{'Region':<25} | {'PUE':<6} | {'Energy (MWh)':<12} | {'Carbon (Tonnes)':<12}")
|
|
print("-" * 65)
|
|
|
|
solver = mlsysim.SustainabilitySolver()
|
|
|
|
for exp in experiments:
|
|
# We'll assume a liquid-cooled profile override
|
|
dc = Datacenter(name="Custom DC", grid=exp['grid'], pue_override=1.06)
|
|
|
|
impact = solver.solve(fleet, duration_days=30, datacenter=dc)
|
|
|
|
energy_mwh = impact['total_energy_kwh'].m_as('megawatt_hour')
|
|
carbon_tonnes = impact['carbon_footprint_kg'] / 1000.0
|
|
|
|
print(f"{exp['name']:<25} | {dc.pue:<6.2f} | {energy_mwh:<12.1f} | {carbon_tonnes:<12.1f}")
|
|
|
|
print("\nConclusion: Moving the same hardware to a cleaner grid reduces carbon by >90%.")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|