mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-04-29 17:20:21 -05:00
Complete MLSYSIM v0.1.0 implementation with: - Documentation website (Quarto): landing page with animated hero and capability carousel, 4 tutorials (hello world, LLM serving, distributed training, sustainability), hardware/model/fleet/infra catalogs, solver guide, whitepaper, math foundations, glossary, and full quartodoc API reference - Typed registry system: Hardware (18 devices across 5 tiers), Models (15 workloads), Systems (fleets, clusters, fabrics), Infrastructure (grid profiles, rack configs, datacenters) - Core types: Pint-backed Quantity, Metadata provenance tracking, custom exception hierarchy (OOMError, SLAViolation) - SimulationConfig with YAML/JSON loading and pre-validation - Scenario system tying workloads to systems with SLA constraints - Multi-level evaluation scorecard (feasibility, performance, macro) - Examples, tests, and Jetson Orin NX spec fix (100 → 25 TFLOP/s) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
80 lines
2.2 KiB
Python
80 lines
2.2 KiB
Python
from .types import DeploymentTier, Node, Fleet, NetworkFabric
|
|
from ..core.constants import (
|
|
ureg,
|
|
SMARTPHONE_RAM_GB, MCU_RAM_KIB, CLOUD_MEM_GIB, TINY_MEM_KIB,
|
|
INFINIBAND_NDR_BW, INFINIBAND_HDR_BW, NETWORK_10G_BW, NETWORK_100G_BW
|
|
)
|
|
from ..hardware.registry import Hardware
|
|
|
|
class Tiers:
|
|
"""Vetted Deployment Tiers."""
|
|
Cloud = DeploymentTier(
|
|
name="Cloud",
|
|
ram=512 * ureg.GB,
|
|
storage=10 * ureg.TB,
|
|
typical_latency_budget=200 * ureg.ms
|
|
)
|
|
Edge = DeploymentTier(
|
|
name="Edge",
|
|
ram=32 * ureg.GB,
|
|
storage=1 * ureg.TB,
|
|
typical_latency_budget=50 * ureg.ms
|
|
)
|
|
Mobile = DeploymentTier(
|
|
name="Mobile",
|
|
ram=SMARTPHONE_RAM_GB,
|
|
storage=256 * ureg.GB,
|
|
typical_latency_budget=30 * ureg.ms
|
|
)
|
|
Tiny = DeploymentTier(
|
|
name="TinyML",
|
|
ram=MCU_RAM_KIB,
|
|
storage=4 * ureg.MB,
|
|
typical_latency_budget=100 * ureg.ms
|
|
)
|
|
|
|
class Nodes:
|
|
"""Vetted Reference Nodes."""
|
|
DGX_H100 = Node(
|
|
name="DGX H100",
|
|
accelerator=Hardware.H100,
|
|
accelerators_per_node=8,
|
|
intra_node_bw=900 * ureg.GB / ureg.second,
|
|
nics_per_node=8
|
|
)
|
|
DGX_A100 = Node(
|
|
name="DGX A100",
|
|
accelerator=Hardware.A100,
|
|
accelerators_per_node=8,
|
|
intra_node_bw=600 * ureg.GB / ureg.second,
|
|
nics_per_node=8
|
|
)
|
|
|
|
class Fabrics:
|
|
"""Vetted Network Fabrics."""
|
|
Ethernet_10G = NetworkFabric(name="10GbE", bandwidth=NETWORK_10G_BW)
|
|
Ethernet_100G = NetworkFabric(name="100GbE", bandwidth=NETWORK_100G_BW)
|
|
InfiniBand_HDR = NetworkFabric(name="IB HDR", bandwidth=INFINIBAND_HDR_BW)
|
|
InfiniBand_NDR = NetworkFabric(name="IB NDR", bandwidth=INFINIBAND_NDR_BW)
|
|
|
|
class Clusters:
|
|
"""Vetted Production Clusters."""
|
|
Research_256 = Fleet(
|
|
name="Research Cluster (256 GPUs)",
|
|
node=Nodes.DGX_H100,
|
|
count=32, # 32 nodes * 8 GPUs = 256
|
|
fabric=Fabrics.Ethernet_100G
|
|
)
|
|
Frontier_8K = Fleet(
|
|
name="Frontier Cluster (8192 GPUs)",
|
|
node=Nodes.DGX_H100,
|
|
count=1024, # 1024 nodes * 8 GPUs = 8192
|
|
fabric=Fabrics.InfiniBand_NDR
|
|
)
|
|
|
|
class Systems:
|
|
Tiers = Tiers
|
|
Nodes = Nodes
|
|
Clusters = Clusters
|
|
Fabrics = Fabrics
|