mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-05-01 10:09:18 -05:00
Moves the mlsysim package from book/quarto/mlsysim/ to the repo root so it is importable as a proper top-level package across the codebase. Key changes: - mlsysim/fmt.py: new top-level module for all formatting helpers (fmt, sci, check, md_math, fmt_full, fmt_split, etc.), moved out of viz/ - mlsysim/viz/__init__.py: now exports only plot utilities; dashboard.py (marimo-only) is no longer wildcard-exported and must be imported explicitly by marimo labs - mlsysim/__init__.py: added `from . import fmt` and `from .core import constants`; removed broken `from .viz import plots as viz` alias - execute-env.yml: fixed PYTHONPATH from "../../.." to "../.." so chapters resolve to repo root, not parent of repo - 51 QMD files: updated `from mlsysim.viz import <fmt-fns>` to `from mlsysim.fmt import <fmt-fns>` - book/quarto/mlsys/: legacy shadow package contents cleaned up; stub __init__.py remains for backward compat - All Vol1 and Vol2 chapters verified to build with `binder build pdf`
142 lines
5.8 KiB
Python
142 lines
5.8 KiB
Python
# simulations.py
|
|
"""
|
|
MLSys Analytical Simulations
|
|
============================
|
|
This module provides domain-specific analytical solvers for lab simulations.
|
|
Each simulation class implements the 'Physics' of a specific engineering domain.
|
|
"""
|
|
|
|
from dataclasses import dataclass
|
|
from typing import Dict, Any, List, Union, Optional
|
|
from ..core.constants import ureg, Q_, HOURS_PER_DAY
|
|
from .ledger import SystemLedger, PerformanceMetrics, SustainabilityMetrics, EconomicMetrics, ReliabilityMetrics
|
|
from .personas import Persona, Personas
|
|
from ..core.scenarios import ApplicationScenario, ClusterScenario
|
|
from ..core.engine import Engine
|
|
from ..core.systems import SystemArchetype, Systems
|
|
from ..core.datacenters import Datacenters
|
|
|
|
@dataclass
|
|
class SimulationResult:
|
|
"""The complete outcome of a simulation evaluation, including raw data for plots."""
|
|
ledger: SystemLedger
|
|
plots: Dict[str, Any]
|
|
|
|
class BaseSimulation:
|
|
"""
|
|
Abstract Base Class for all Analytical Simulations.
|
|
Provides the standard 'evaluate' interface for student choice processing.
|
|
"""
|
|
def __init__(self, scenario: Union[ApplicationScenario, ClusterScenario], persona: Persona):
|
|
"""Initializes the simulation with a static scenario and a persona.
|
|
|
|
Args:
|
|
scenario: The base model + hardware setup.
|
|
persona: The scaling and narrative context.
|
|
"""
|
|
self.scenario = scenario
|
|
self.persona = persona
|
|
|
|
def _get_system_archetype(self) -> SystemArchetype:
|
|
"""Helper to unify Application and Cluster scenarios for the Engine.
|
|
|
|
Returns:
|
|
A SystemArchetype object compatible with Engine.solve().
|
|
"""
|
|
if hasattr(self.scenario, "system"):
|
|
return self.scenario.system
|
|
|
|
if hasattr(self.scenario, "cluster"):
|
|
cluster = self.scenario.cluster
|
|
return SystemArchetype(
|
|
name=f"Virtual Node ({cluster.node.name})",
|
|
hardware=cluster.node.accelerator,
|
|
tier=Systems.Cloud.tier,
|
|
network_bw=cluster.fabric.bandwidth,
|
|
power_budget=cluster.node.node_tdp or Q_("700 watt")
|
|
)
|
|
|
|
return Systems.Cloud
|
|
|
|
def evaluate(self, choice: Dict[str, Any]) -> SystemLedger:
|
|
"""Processes a student's choice and returns a Ledger.
|
|
|
|
Args:
|
|
choice: A dictionary of parameters from the UI (e.g., {'region': 'Quebec'}).
|
|
|
|
Returns:
|
|
A SystemLedger containing the multi-dimensional results.
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
# RESOURCE SIMULATION: Sustainability, Carbon, and TCO
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
class ResourceSimulation(BaseSimulation):
|
|
"""
|
|
Analyzes energy consumption, carbon footprint, and economic TCO.
|
|
|
|
This simulation handles regional grid math and fleet-wide power scaling.
|
|
"""
|
|
def evaluate(self, choice: Dict[str, Any]) -> SystemLedger:
|
|
# 1. BASE PERFORMANCE
|
|
system = self._get_system_archetype()
|
|
perf_base = Engine.solve(self.scenario.model, system)
|
|
mfu_val = (perf_base.latency_compute / perf_base.latency).to_base_units().m
|
|
|
|
# 2. EXTRACT USER CHOICES
|
|
region_name = choice.get("region", "US_Avg")
|
|
grid = getattr(Datacenters.Grids, region_name, Datacenters.Grids.US_Avg)
|
|
duration_days = float(choice.get("duration_days", 365.0))
|
|
|
|
# 3. SCALE TO FLEET (Persona Context)
|
|
scale = self.persona.scale_factor
|
|
|
|
# IT Energy (kWh) = Power(W) * Time(h) / 1000
|
|
it_power_w = (perf_base.energy / perf_base.latency).to(ureg.watt).m
|
|
total_hours = duration_days * HOURS_PER_DAY
|
|
it_energy_kwh = (it_power_w * total_hours * scale) / 1000.0
|
|
|
|
# 4. APPLY PHYSICAL INVARIANTS (Sustainability)
|
|
total_energy_kwh = it_energy_kwh * grid.pue
|
|
total_carbon_kg = grid.carbon_kg(it_energy_kwh)
|
|
|
|
# 5. ECONOMIC MATH
|
|
electricity_cost = total_energy_kwh * 0.12
|
|
hw_cost_per_unit = 10.0 if system.tier.name == "Tiny" else 30000.0
|
|
total_capex = hw_cost_per_unit * scale
|
|
|
|
# 6. ASSEMBLE UNIVERSAL LEDGER
|
|
ledger = SystemLedger(
|
|
mission_name="Global Efficiency Challenge",
|
|
track_name=self.persona.name,
|
|
choice_summary=f"Region: {grid.name}, Duration: {duration_days} days",
|
|
performance=PerformanceMetrics(
|
|
latency=perf_base.latency,
|
|
throughput=perf_base.throughput * scale,
|
|
mfu=mfu_val,
|
|
hfu=mfu_val * 1.1,
|
|
bottleneck=perf_base.bottleneck
|
|
),
|
|
sustainability=SustainabilityMetrics(
|
|
energy=total_energy_kwh * ureg.kilowatt_hour,
|
|
carbon_kg=total_carbon_kg,
|
|
pue=grid.pue,
|
|
water_liters=total_energy_kwh * grid.wue
|
|
),
|
|
economics=EconomicMetrics(
|
|
capex=total_capex,
|
|
opex=electricity_cost,
|
|
tco=total_capex + electricity_cost,
|
|
cost_per_million=(electricity_cost / (perf_base.throughput.m * total_hours * scale * 3600)) * 1e6
|
|
),
|
|
reliability=ReliabilityMetrics(
|
|
mttf=Q_("100 hours"),
|
|
goodput=0.95,
|
|
recovery_time=Q_("15 minutes")
|
|
)
|
|
)
|
|
ledger.validate()
|
|
return ledger
|