mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-03-11 17:49:25 -05:00
Update book quarto mlsys (hardware, validate_inline_refs, engine)
This commit is contained in:
@@ -5,6 +5,7 @@ from .hardware import Hardware
|
||||
from .models import Models
|
||||
from .deployment import Tiers
|
||||
from .systems import Systems, Archetypes
|
||||
from .engine import Engine
|
||||
from .scenarios import Scenarios, Applications
|
||||
|
||||
# Export constants and registry for legacy support
|
||||
|
||||
84
book/quarto/mlsys/engine.py
Normal file
84
book/quarto/mlsys/engine.py
Normal file
@@ -0,0 +1,84 @@
|
||||
# engine.py
|
||||
# The central computational engine for ML Systems analysis.
|
||||
# Ties Models, Systems, and Formulas into a single "Solver".
|
||||
|
||||
from dataclasses import dataclass
|
||||
from .models import ModelSpec
|
||||
from .systems import SystemArchetype
|
||||
from .constants import ureg, Q_, BYTES_FP32, BYTES_FP16, BYTES_INT8
|
||||
from .formulas import calc_bottleneck
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PerformanceProfile:
|
||||
"""The result of a system simulation."""
|
||||
latency: Q_
|
||||
latency_compute: Q_
|
||||
latency_memory: Q_
|
||||
latency_overhead: Q_
|
||||
throughput: Q_
|
||||
bottleneck: str
|
||||
arithmetic_intensity: Q_
|
||||
energy: Q_
|
||||
memory_footprint: Q_
|
||||
peak_flops_actual: Q_
|
||||
peak_bw_actual: Q_
|
||||
feasible: bool
|
||||
|
||||
class Engine:
|
||||
"""
|
||||
Unified solver for ML Systems trade-offs.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def solve(model: ModelSpec, system: SystemArchetype, batch_size=1, precision="fp16", efficiency=0.5) -> PerformanceProfile:
|
||||
hw = system.hardware
|
||||
|
||||
# 1. Map Precision
|
||||
if precision == "fp32":
|
||||
bpp = BYTES_FP32
|
||||
peak_flops = hw.peak_flops_fp32 or hw.peak_flops
|
||||
elif precision == "int8":
|
||||
bpp = BYTES_INT8
|
||||
peak_flops = hw.int8_flops or hw.peak_flops
|
||||
else: # Default fp16
|
||||
bpp = BYTES_FP16
|
||||
peak_flops = hw.peak_flops
|
||||
|
||||
# 2. Workload
|
||||
ops_per_inference = model.inference_flops or (2 * model.parameters.to(ureg.count).magnitude * ureg.flop)
|
||||
total_ops = ops_per_inference * batch_size
|
||||
memory_bytes = model.size_in_bytes(bpp)
|
||||
|
||||
# 3. Physics (Iron Law)
|
||||
# Note: We use the hardware's memory bandwidth directly.
|
||||
results = calc_bottleneck(
|
||||
ops=total_ops,
|
||||
model_bytes=memory_bytes,
|
||||
device_flops=peak_flops * efficiency,
|
||||
device_bw=hw.memory_bw
|
||||
)
|
||||
|
||||
t_comp = results["compute_ms"] * ureg.ms
|
||||
t_mem = results["memory_ms"] * ureg.ms
|
||||
t_overhead = hw.dispatch_tax
|
||||
|
||||
# Total Latency (Pipelined Assumption: overlapping data and compute)
|
||||
latency = max(t_comp, t_mem) + t_overhead
|
||||
|
||||
# 4. Feasibility Check
|
||||
feasible = memory_bytes <= system.ram
|
||||
|
||||
return PerformanceProfile(
|
||||
latency=latency,
|
||||
latency_compute=t_comp,
|
||||
latency_memory=t_mem,
|
||||
latency_overhead=t_overhead,
|
||||
throughput=(batch_size / latency).to(1/ureg.second),
|
||||
bottleneck=results["bottleneck"],
|
||||
arithmetic_intensity=results["intensity"] * (ureg.flop / ureg.byte),
|
||||
energy=(hw.tdp * latency).to(ureg.joule) if hw.tdp else 0 * ureg.joule,
|
||||
memory_footprint=memory_bytes,
|
||||
peak_flops_actual=peak_flops * efficiency,
|
||||
peak_bw_actual=hw.memory_bw,
|
||||
feasible=feasible
|
||||
)
|
||||
@@ -27,6 +27,7 @@ class HardwareSpec:
|
||||
memory_capacity: Q_
|
||||
tdp: Optional[Q_] = None
|
||||
battery_capacity: Optional[Q_] = None
|
||||
dispatch_tax: Q_ = 0.01 * ureg.ms # Default 10us
|
||||
|
||||
# Precision-specific FLOPS
|
||||
peak_flops_fp32: Optional[Q_] = None
|
||||
@@ -50,6 +51,7 @@ class HardwareSpec:
|
||||
_validate(self.memory_bw, "memory_bw", ureg.byte/ureg.second, "data/time (e.g. GB/s)")
|
||||
_validate(self.peak_flops, "peak_flops", ureg.flop/ureg.second, "compute rate (e.g. TFLOPs/s)")
|
||||
_validate(self.memory_capacity, "memory_capacity", ureg.byte, "data size (e.g. GiB)")
|
||||
_validate(self.dispatch_tax, "dispatch_tax", ureg.second, "time (e.g. ms)")
|
||||
if self.tdp:
|
||||
_validate(self.tdp, "tdp", ureg.watt, "power (e.g. W)")
|
||||
if self.battery_capacity:
|
||||
@@ -75,34 +77,44 @@ class Networks:
|
||||
class Cloud:
|
||||
"""Datacenter-scale Accelerators."""
|
||||
V100 = HardwareSpec("NVIDIA V100", 2017, V100_MEM_BW, V100_FLOPS_FP16_TENSOR, V100_MEM_CAPACITY, V100_TDP,
|
||||
peak_flops_fp32=V100_FLOPS_FP32)
|
||||
peak_flops_fp32=V100_FLOPS_FP32, dispatch_tax=0.02 * ureg.ms)
|
||||
A100 = HardwareSpec("NVIDIA A100", 2020, A100_MEM_BW, A100_FLOPS_FP16_TENSOR, A100_MEM_CAPACITY, A100_TDP,
|
||||
peak_flops_fp32=A100_FLOPS_FP32, tf32_flops=A100_FLOPS_TF32, int8_flops=A100_FLOPS_INT8)
|
||||
peak_flops_fp32=A100_FLOPS_FP32, tf32_flops=A100_FLOPS_TF32, int8_flops=A100_FLOPS_INT8,
|
||||
dispatch_tax=0.015 * ureg.ms)
|
||||
H100 = HardwareSpec("NVIDIA H100", 2022, H100_MEM_BW, H100_FLOPS_FP16_TENSOR, H100_MEM_CAPACITY, H100_TDP,
|
||||
tf32_flops=H100_FLOPS_TF32, fp8_flops=H100_FLOPS_FP8_TENSOR, int8_flops=H100_FLOPS_INT8)
|
||||
tf32_flops=H100_FLOPS_TF32, fp8_flops=H100_FLOPS_FP8_TENSOR, int8_flops=H100_FLOPS_INT8,
|
||||
dispatch_tax=0.01 * ureg.ms)
|
||||
B200 = HardwareSpec("NVIDIA B200", 2024, B200_MEM_BW, B200_FLOPS_FP16_TENSOR, B200_MEM_CAPACITY, B200_TDP,
|
||||
fp8_flops=B200_FLOPS_FP8_TENSOR)
|
||||
fp8_flops=B200_FLOPS_FP8_TENSOR, dispatch_tax=0.008 * ureg.ms)
|
||||
T4 = HardwareSpec("NVIDIA T4", 2018, T4_MEM_BW, T4_FLOPS_FP16_TENSOR, 16 * ureg.GiB, T4_TDP,
|
||||
int8_flops=T4_FLOPS_INT8)
|
||||
int8_flops=T4_FLOPS_INT8, dispatch_tax=0.03 * ureg.ms)
|
||||
|
||||
TPUv4 = HardwareSpec("Google TPU v4", 2021, TPUV4_MEM_BW, TPUV4_FLOPS_BF16, 32 * ureg.GiB)
|
||||
TPUv4 = HardwareSpec("Google TPU v4", 2021, TPUV4_MEM_BW, TPUV4_FLOPS_BF16, 32 * ureg.GiB, dispatch_tax=0.05 * ureg.ms)
|
||||
|
||||
class Edge:
|
||||
"""Mobile and Robotics Hardware."""
|
||||
Generic_Phone = HardwareSpec("Smartphone", 2024, MOBILE_NPU_MEM_BW, MOBILE_NPU_TOPS_INT8, 8 * ureg.GiB, battery_capacity=15 * ureg.Wh)
|
||||
Generic_Phone = HardwareSpec("Smartphone", 2024, MOBILE_NPU_MEM_BW, MOBILE_NPU_TOPS_INT8, 8 * ureg.GiB,
|
||||
battery_capacity=15 * ureg.Wh, dispatch_tax=1.0 * ureg.ms) # High OS overhead
|
||||
|
||||
# Specific Edge Devices
|
||||
Coral = HardwareSpec("Google Coral Dev", 2019, 25 * ureg.GB/ureg.s, 4 * ureg.TFLOPs/ureg.s, 1 * ureg.GB, 2 * ureg.W) # 4 TOPS INT8
|
||||
JetsonOrinNX = HardwareSpec("NVIDIA Jetson Orin NX", 2023, 102 * ureg.GB/ureg.s, 100 * ureg.TFLOPs/ureg.s, 16 * ureg.GB, 25 * ureg.W) # 100 TOPS INT8
|
||||
NUC_Movidius = HardwareSpec("Intel NUC + Movidius", 2020, 50 * ureg.GB/ureg.s, 4 * ureg.TFLOPs/ureg.s, 16 * ureg.GB, 15 * ureg.W)
|
||||
Coral = HardwareSpec("Google Coral Dev", 2019, 25 * ureg.GB/ureg.s, 4 * ureg.TFLOPs/ureg.s, 1 * ureg.GB, 2 * ureg.W,
|
||||
dispatch_tax=0.5 * ureg.ms)
|
||||
JetsonOrinNX = HardwareSpec("NVIDIA Jetson Orin NX", 2023, 102 * ureg.GB/ureg.s, 100 * ureg.TFLOPs/ureg.s, 16 * ureg.GB, 25 * ureg.W,
|
||||
dispatch_tax=0.2 * ureg.ms)
|
||||
NUC_Movidius = HardwareSpec("Intel NUC + Movidius", 2020, 50 * ureg.GB/ureg.s, 4 * ureg.TFLOPs/ureg.s, 16 * ureg.GB, 15 * ureg.W,
|
||||
dispatch_tax=2.0 * ureg.ms)
|
||||
|
||||
# Servers
|
||||
GenericServer = HardwareSpec("Edge Server", 2024, 100 * ureg.GB/ureg.s, 1 * ureg.TFLOPs/ureg.s, 128 * ureg.GB, 300 * ureg.W)
|
||||
GenericServer = HardwareSpec("Edge Server", 2024, 100 * ureg.GB/ureg.s, 1 * ureg.TFLOPs/ureg.s, 128 * ureg.GB, 300 * ureg.W,
|
||||
dispatch_tax=0.1 * ureg.ms)
|
||||
|
||||
class Tiny:
|
||||
"""Microcontrollers and Embedded Systems."""
|
||||
ESP32 = HardwareSpec("ESP32-CAM", 2019, 0.1 * ureg.GB/ureg.second, 0.01 * ureg.TFLOPs/ureg.second, ESP32_RAM, ESP32_POWER_MAX)
|
||||
Generic_MCU = HardwareSpec("Cortex-M7", 2020, 0.05 * ureg.GB/ureg.second, 0.001 * ureg.TFLOPs/ureg.second, MCU_RAM_KIB)
|
||||
# ESP32 at 240MHz is ~240 MIPS, for AI math without FPU it's roughly 100-200 MFLOPS (0.0001-0.0002 TFLOPS)
|
||||
ESP32 = HardwareSpec("ESP32-CAM", 2019, 0.1 * ureg.GB/ureg.second, 0.0002 * ureg.TFLOPs/ureg.second, ESP32_RAM, ESP32_POWER_MAX,
|
||||
dispatch_tax=5.0 * ureg.ms) # Very high overhead relative to math
|
||||
Generic_MCU = HardwareSpec("Cortex-M7", 2020, 0.05 * ureg.GB/ureg.second, 0.001 * ureg.TFLOPs/ureg.second, MCU_RAM_KIB,
|
||||
dispatch_tax=2.0 * ureg.ms)
|
||||
|
||||
class Hardware:
|
||||
Cloud = Cloud
|
||||
|
||||
@@ -5,9 +5,10 @@ Pre-render guardrail for inline Python in QMD files.
|
||||
|
||||
Checks:
|
||||
1. Every `{python} var_name` resolves to a defined variable
|
||||
2. No inline Python inside LaTeX math mode (causes decimal stripping)
|
||||
3. No inline Python adjacent to LaTeX symbols like $\\times$
|
||||
4. No grid tables with inline Python (use pipe tables instead)
|
||||
2. Every `{python} var_name` appears AFTER its definition (Locality)
|
||||
3. No inline Python inside LaTeX math mode (causes decimal stripping)
|
||||
4. No inline Python adjacent to LaTeX symbols like $\\times$
|
||||
5. No grid tables with inline Python (use pipe tables instead)
|
||||
|
||||
Usage:
|
||||
python3 book/quarto/mlsys/validate_inline_refs.py [--verbose] [--check-patterns]
|
||||
@@ -36,8 +37,11 @@ INLINE_REF = re.compile(r'`\{python\}\s+(\w+)`')
|
||||
CELL_START = re.compile(r'^```\{python\}')
|
||||
CELL_END = re.compile(r'^```\s*$')
|
||||
|
||||
# Pattern for variable assignments in compute cells
|
||||
ASSIGNMENT = re.compile(r'^(\w+)\s*=')
|
||||
# Pattern for variable assignments in compute cells (handles tuple unpacking)
|
||||
ASSIGNMENT = re.compile(r'^([a-zA-Z_]\w*(?:\s*,\s*[a-zA-Z_]\w*)*)\s*=')
|
||||
|
||||
# Pattern for Exports: in header block
|
||||
EXPORTS_SECTION = re.compile(r'#\s*.\s*[Ee]xports?:\s*(.*)')
|
||||
|
||||
# Problematic patterns that cause rendering issues
|
||||
# Pattern 1: Inline Python directly inside LaTeX math: $`{python}`$ or $..`{python}`$
|
||||
@@ -60,41 +64,12 @@ INLINE_FSTRING = re.compile(r'`\{python\}\s*f"[^`]+`')
|
||||
# Pattern 5: Inline function calls (should be pre-computed as _str)
|
||||
INLINE_FUNC_CALL = re.compile(r'`\{python\}\s*\w+\([^`]+\)`')
|
||||
|
||||
# Pattern 6: Inline Python in YAML cell options (fig-cap, tbl-cap, etc.)
|
||||
# These NEVER render - Quarto passes YAML options as literal strings
|
||||
# Pattern 6: Inline Python in YAML chunk options (fig-cap, tbl-cap, fig-alt, lst-cap)
|
||||
# These NEVER render — Quarto uses the option value as a literal string (verified by
|
||||
# rendering _test_inline_captions.qmd: body and ": Caption {#tbl-...}" run inline Python;
|
||||
# #| fig-alt and #| fig-cap do not).
|
||||
YAML_OPTION_INLINE = re.compile(r'^#\|\s*(fig-cap|tbl-cap|lst-cap|fig-alt):\s*.*`\{python\}')
|
||||
|
||||
# Pattern 7: Inline Python in Quarto caption syntax (: Caption {#tbl-...} or {#fig-...})
|
||||
# These also NEVER render - the caption line is parsed as metadata
|
||||
CAPTION_SYNTAX_INLINE = re.compile(r'^:\s+.*`\{python\}.*\{#(tbl|fig|lst)-')
|
||||
|
||||
|
||||
def extract_compute_vars(lines):
|
||||
"""Extract all variable names assigned in ```{python} compute cells."""
|
||||
variables = set()
|
||||
in_cell = False
|
||||
for line in lines:
|
||||
if CELL_START.match(line):
|
||||
in_cell = True
|
||||
continue
|
||||
if in_cell and CELL_END.match(line):
|
||||
in_cell = False
|
||||
continue
|
||||
if in_cell:
|
||||
m = ASSIGNMENT.match(line.strip())
|
||||
if m:
|
||||
variables.add(m.group(1))
|
||||
return variables
|
||||
|
||||
|
||||
def extract_inline_refs(lines):
|
||||
"""Extract all inline `{python} var` references with line numbers."""
|
||||
refs = []
|
||||
for i, line in enumerate(lines, 1):
|
||||
for m in INLINE_REF.finditer(line):
|
||||
refs.append((i, m.group(1)))
|
||||
return refs
|
||||
|
||||
|
||||
def check_rendering_patterns(qmd_path, verbose=False):
|
||||
"""Check for patterns that cause rendering issues. Returns list of warnings."""
|
||||
@@ -152,20 +127,13 @@ def check_rendering_patterns(qmd_path, verbose=False):
|
||||
if verbose:
|
||||
print(f" ⚠ {qmd_path.name}:{i} — Inline function call")
|
||||
|
||||
# Check for inline Python in YAML cell options (NEVER renders!)
|
||||
# Check for inline Python in YAML chunk options (fig-cap, fig-alt, tbl-cap, lst-cap) — NEVER renders
|
||||
if YAML_OPTION_INLINE.search(line):
|
||||
warnings.append((filepath, i, "YAML_OPTION",
|
||||
"Inline Python in YAML option (fig-cap/tbl-cap) - NEVER renders! Use hardcoded value or plt.suptitle()"))
|
||||
"Inline Python in #| fig-alt/fig-cap/tbl-cap/lst-cap - NEVER renders! Use hardcoded value or set caption in code."))
|
||||
if verbose:
|
||||
print(f" ✗ {qmd_path.name}:{i} — Python in YAML option (will appear literally)")
|
||||
|
||||
# Check for inline Python in Quarto caption syntax (: Caption {#tbl-...})
|
||||
if CAPTION_SYNTAX_INLINE.search(line):
|
||||
warnings.append((filepath, i, "CAPTION_SYNTAX",
|
||||
"Inline Python in caption (: ... {#tbl/fig-}) - NEVER renders! Use hardcoded value."))
|
||||
if verbose:
|
||||
print(f" ✗ {qmd_path.name}:{i} — Python in caption syntax (will appear literally)")
|
||||
|
||||
return warnings
|
||||
|
||||
|
||||
@@ -174,17 +142,68 @@ def validate_file(qmd_path, verbose=False, check_patterns=False):
|
||||
text = qmd_path.read_text(encoding="utf-8")
|
||||
lines = text.splitlines()
|
||||
|
||||
inline_refs = extract_inline_refs(lines)
|
||||
if not inline_refs:
|
||||
return [], [] # No inline refs, nothing to validate
|
||||
|
||||
compute_vars = extract_compute_vars(lines)
|
||||
errors = []
|
||||
for lineno, var in inline_refs:
|
||||
if var not in compute_vars:
|
||||
errors.append((str(qmd_path.relative_to(BOOK_ROOT)), lineno, var))
|
||||
if verbose:
|
||||
print(f" ✗ {qmd_path.name}:{lineno} — `{{python}} {var}` not defined")
|
||||
defined_vars = set()
|
||||
in_cell = False
|
||||
in_exports = False
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
# 1. Track variable definitions in cells
|
||||
if CELL_START.match(line):
|
||||
in_cell = True
|
||||
continue
|
||||
if in_cell and CELL_END.match(line):
|
||||
in_cell = False
|
||||
in_exports = False
|
||||
continue
|
||||
|
||||
if in_cell:
|
||||
# Check for assignments: var = ... or var1, var2 = ...
|
||||
m = ASSIGNMENT.match(line.strip())
|
||||
if m:
|
||||
vars_part = m.group(1)
|
||||
for v in re.split(r'[,\s]+', vars_part):
|
||||
if v.strip():
|
||||
defined_vars.add(v.strip())
|
||||
|
||||
# Check for Exports: in header
|
||||
m = EXPORTS_SECTION.match(line.strip())
|
||||
if m:
|
||||
in_exports = True
|
||||
vars_raw = m.group(1)
|
||||
# Remove unit parentheticals like (MB, GB)
|
||||
vars_raw = re.sub(r'\(.*?\)', '', vars_raw)
|
||||
for v in re.split(r'[,\s]+', vars_raw):
|
||||
v = v.strip().rstrip(',')
|
||||
if v:
|
||||
defined_vars.add(v)
|
||||
elif in_exports:
|
||||
# Continuation of exports
|
||||
m = re.match(r'#\s*.\s*(.*)', line.strip())
|
||||
if m:
|
||||
content = m.group(1).strip()
|
||||
# If content starts with a section like 'Goal:', stop
|
||||
if re.match(r'^[A-Z][a-z]+:', content):
|
||||
in_exports = False
|
||||
elif content == "" or "──" in content:
|
||||
in_exports = False
|
||||
else:
|
||||
vars_raw = re.sub(r'\(.*?\)', '', content)
|
||||
for v in re.split(r'[,\s]+', vars_raw):
|
||||
v = v.strip().rstrip(',')
|
||||
if v:
|
||||
defined_vars.add(v)
|
||||
else:
|
||||
in_exports = False
|
||||
continue # Don't check for refs inside compute cells
|
||||
|
||||
# 2. Check inline references for Locality
|
||||
for m in INLINE_REF.finditer(line):
|
||||
var = m.group(1)
|
||||
if var not in defined_vars:
|
||||
errors.append((str(qmd_path.relative_to(BOOK_ROOT)), i, var))
|
||||
if verbose:
|
||||
print(f" ✗ {qmd_path.name}:{i} — `{{python}} {var}` used before definition (Locality Violation)")
|
||||
|
||||
warnings = []
|
||||
if check_patterns:
|
||||
@@ -198,8 +217,17 @@ def main():
|
||||
|
||||
verbose = "--verbose" in sys.argv or "-v" in sys.argv
|
||||
check_patterns = "--check-patterns" in sys.argv or "-p" in sys.argv
|
||||
|
||||
qmd_files = sorted(CONTENTS.rglob("*.qmd"))
|
||||
|
||||
# Check for path argument
|
||||
args = [a for a in sys.argv[1:] if not a.startswith("-")]
|
||||
if args:
|
||||
target_path = Path(args[0]).resolve()
|
||||
if target_path.is_file():
|
||||
qmd_files = [target_path]
|
||||
else:
|
||||
qmd_files = sorted(target_path.rglob("*.qmd"))
|
||||
else:
|
||||
qmd_files = sorted(CONTENTS.rglob("*.qmd"))
|
||||
total_files = 0
|
||||
total_refs = 0
|
||||
all_errors = []
|
||||
@@ -230,7 +258,7 @@ def main():
|
||||
print(f"\n{'─'*60}")
|
||||
print("ERRORS (will break render):")
|
||||
for filepath, lineno, var in all_errors:
|
||||
print(f" {filepath}:{lineno} — `{{python}} {var}` undefined")
|
||||
print(f" {filepath}:{lineno} — `{{python}} {var}` undefined/locality violation")
|
||||
exit_code = 1
|
||||
|
||||
if all_warnings:
|
||||
|
||||
Reference in New Issue
Block a user