Files
cs249r_book/mlsysim/sim/simulations.py
Vijay Janapa Reddi c30f2a3bfd refactor: move mlsysim to repo root, extract fmt module from viz
Moves the mlsysim package from book/quarto/mlsysim/ to the repo root
so it is importable as a proper top-level package across the codebase.

Key changes:
- mlsysim/fmt.py: new top-level module for all formatting helpers (fmt,
  sci, check, md_math, fmt_full, fmt_split, etc.), moved out of viz/
- mlsysim/viz/__init__.py: now exports only plot utilities; dashboard.py
  (marimo-only) is no longer wildcard-exported and must be imported
  explicitly by marimo labs
- mlsysim/__init__.py: added `from . import fmt` and `from .core import
  constants`; removed broken `from .viz import plots as viz` alias
- execute-env.yml: fixed PYTHONPATH from "../../.." to "../.." so
  chapters resolve to repo root, not parent of repo
- 51 QMD files: updated `from mlsysim.viz import <fmt-fns>` to
  `from mlsysim.fmt import <fmt-fns>`
- book/quarto/mlsys/: legacy shadow package contents cleaned up;
  stub __init__.py remains for backward compat
- All Vol1 and Vol2 chapters verified to build with `binder build pdf`
2026-03-01 17:24:11 -05:00

142 lines
5.8 KiB
Python

# simulations.py
"""
MLSys Analytical Simulations
============================
This module provides domain-specific analytical solvers for lab simulations.
Each simulation class implements the 'Physics' of a specific engineering domain.
"""
from dataclasses import dataclass
from typing import Dict, Any, List, Union, Optional
from ..core.constants import ureg, Q_, HOURS_PER_DAY
from .ledger import SystemLedger, PerformanceMetrics, SustainabilityMetrics, EconomicMetrics, ReliabilityMetrics
from .personas import Persona, Personas
from ..core.scenarios import ApplicationScenario, ClusterScenario
from ..core.engine import Engine
from ..core.systems import SystemArchetype, Systems
from ..core.datacenters import Datacenters
@dataclass
class SimulationResult:
"""The complete outcome of a simulation evaluation, including raw data for plots."""
ledger: SystemLedger
plots: Dict[str, Any]
class BaseSimulation:
"""
Abstract Base Class for all Analytical Simulations.
Provides the standard 'evaluate' interface for student choice processing.
"""
def __init__(self, scenario: Union[ApplicationScenario, ClusterScenario], persona: Persona):
"""Initializes the simulation with a static scenario and a persona.
Args:
scenario: The base model + hardware setup.
persona: The scaling and narrative context.
"""
self.scenario = scenario
self.persona = persona
def _get_system_archetype(self) -> SystemArchetype:
"""Helper to unify Application and Cluster scenarios for the Engine.
Returns:
A SystemArchetype object compatible with Engine.solve().
"""
if hasattr(self.scenario, "system"):
return self.scenario.system
if hasattr(self.scenario, "cluster"):
cluster = self.scenario.cluster
return SystemArchetype(
name=f"Virtual Node ({cluster.node.name})",
hardware=cluster.node.accelerator,
tier=Systems.Cloud.tier,
network_bw=cluster.fabric.bandwidth,
power_budget=cluster.node.node_tdp or Q_("700 watt")
)
return Systems.Cloud
def evaluate(self, choice: Dict[str, Any]) -> SystemLedger:
"""Processes a student's choice and returns a Ledger.
Args:
choice: A dictionary of parameters from the UI (e.g., {'region': 'Quebec'}).
Returns:
A SystemLedger containing the multi-dimensional results.
"""
raise NotImplementedError
# ─────────────────────────────────────────────────────────────────────────────
# RESOURCE SIMULATION: Sustainability, Carbon, and TCO
# ─────────────────────────────────────────────────────────────────────────────
class ResourceSimulation(BaseSimulation):
"""
Analyzes energy consumption, carbon footprint, and economic TCO.
This simulation handles regional grid math and fleet-wide power scaling.
"""
def evaluate(self, choice: Dict[str, Any]) -> SystemLedger:
# 1. BASE PERFORMANCE
system = self._get_system_archetype()
perf_base = Engine.solve(self.scenario.model, system)
mfu_val = (perf_base.latency_compute / perf_base.latency).to_base_units().m
# 2. EXTRACT USER CHOICES
region_name = choice.get("region", "US_Avg")
grid = getattr(Datacenters.Grids, region_name, Datacenters.Grids.US_Avg)
duration_days = float(choice.get("duration_days", 365.0))
# 3. SCALE TO FLEET (Persona Context)
scale = self.persona.scale_factor
# IT Energy (kWh) = Power(W) * Time(h) / 1000
it_power_w = (perf_base.energy / perf_base.latency).to(ureg.watt).m
total_hours = duration_days * HOURS_PER_DAY
it_energy_kwh = (it_power_w * total_hours * scale) / 1000.0
# 4. APPLY PHYSICAL INVARIANTS (Sustainability)
total_energy_kwh = it_energy_kwh * grid.pue
total_carbon_kg = grid.carbon_kg(it_energy_kwh)
# 5. ECONOMIC MATH
electricity_cost = total_energy_kwh * 0.12
hw_cost_per_unit = 10.0 if system.tier.name == "Tiny" else 30000.0
total_capex = hw_cost_per_unit * scale
# 6. ASSEMBLE UNIVERSAL LEDGER
ledger = SystemLedger(
mission_name="Global Efficiency Challenge",
track_name=self.persona.name,
choice_summary=f"Region: {grid.name}, Duration: {duration_days} days",
performance=PerformanceMetrics(
latency=perf_base.latency,
throughput=perf_base.throughput * scale,
mfu=mfu_val,
hfu=mfu_val * 1.1,
bottleneck=perf_base.bottleneck
),
sustainability=SustainabilityMetrics(
energy=total_energy_kwh * ureg.kilowatt_hour,
carbon_kg=total_carbon_kg,
pue=grid.pue,
water_liters=total_energy_kwh * grid.wue
),
economics=EconomicMetrics(
capex=total_capex,
opex=electricity_cost,
tco=total_capex + electricity_cost,
cost_per_million=(electricity_cost / (perf_base.throughput.m * total_hours * scale * 3600)) * 1e6
),
reliability=ReliabilityMetrics(
mttf=Q_("100 hours"),
goodput=0.95,
recovery_time=Q_("15 minutes")
)
)
ledger.validate()
return ledger