Update book quarto mlsys (hardware, validate_inline_refs, engine)

This commit is contained in:
Vijay Janapa Reddi
2026-02-26 15:23:07 -05:00
parent 73e39a0b8e
commit 5e0c9a2f5d
4 changed files with 198 additions and 73 deletions

View File

@@ -5,6 +5,7 @@ from .hardware import Hardware
from .models import Models
from .deployment import Tiers
from .systems import Systems, Archetypes
from .engine import Engine
from .scenarios import Scenarios, Applications
# Export constants and registry for legacy support

View File

@@ -0,0 +1,84 @@
# engine.py
# The central computational engine for ML Systems analysis.
# Ties Models, Systems, and Formulas into a single "Solver".
from dataclasses import dataclass
from .models import ModelSpec
from .systems import SystemArchetype
from .constants import ureg, Q_, BYTES_FP32, BYTES_FP16, BYTES_INT8
from .formulas import calc_bottleneck
@dataclass(frozen=True)
class PerformanceProfile:
"""The result of a system simulation."""
latency: Q_
latency_compute: Q_
latency_memory: Q_
latency_overhead: Q_
throughput: Q_
bottleneck: str
arithmetic_intensity: Q_
energy: Q_
memory_footprint: Q_
peak_flops_actual: Q_
peak_bw_actual: Q_
feasible: bool
class Engine:
"""
Unified solver for ML Systems trade-offs.
"""
@staticmethod
def solve(model: ModelSpec, system: SystemArchetype, batch_size=1, precision="fp16", efficiency=0.5) -> PerformanceProfile:
hw = system.hardware
# 1. Map Precision
if precision == "fp32":
bpp = BYTES_FP32
peak_flops = hw.peak_flops_fp32 or hw.peak_flops
elif precision == "int8":
bpp = BYTES_INT8
peak_flops = hw.int8_flops or hw.peak_flops
else: # Default fp16
bpp = BYTES_FP16
peak_flops = hw.peak_flops
# 2. Workload
ops_per_inference = model.inference_flops or (2 * model.parameters.to(ureg.count).magnitude * ureg.flop)
total_ops = ops_per_inference * batch_size
memory_bytes = model.size_in_bytes(bpp)
# 3. Physics (Iron Law)
# Note: We use the hardware's memory bandwidth directly.
results = calc_bottleneck(
ops=total_ops,
model_bytes=memory_bytes,
device_flops=peak_flops * efficiency,
device_bw=hw.memory_bw
)
t_comp = results["compute_ms"] * ureg.ms
t_mem = results["memory_ms"] * ureg.ms
t_overhead = hw.dispatch_tax
# Total Latency (Pipelined Assumption: overlapping data and compute)
latency = max(t_comp, t_mem) + t_overhead
# 4. Feasibility Check
feasible = memory_bytes <= system.ram
return PerformanceProfile(
latency=latency,
latency_compute=t_comp,
latency_memory=t_mem,
latency_overhead=t_overhead,
throughput=(batch_size / latency).to(1/ureg.second),
bottleneck=results["bottleneck"],
arithmetic_intensity=results["intensity"] * (ureg.flop / ureg.byte),
energy=(hw.tdp * latency).to(ureg.joule) if hw.tdp else 0 * ureg.joule,
memory_footprint=memory_bytes,
peak_flops_actual=peak_flops * efficiency,
peak_bw_actual=hw.memory_bw,
feasible=feasible
)

View File

@@ -27,6 +27,7 @@ class HardwareSpec:
memory_capacity: Q_
tdp: Optional[Q_] = None
battery_capacity: Optional[Q_] = None
dispatch_tax: Q_ = 0.01 * ureg.ms # Default 10us
# Precision-specific FLOPS
peak_flops_fp32: Optional[Q_] = None
@@ -50,6 +51,7 @@ class HardwareSpec:
_validate(self.memory_bw, "memory_bw", ureg.byte/ureg.second, "data/time (e.g. GB/s)")
_validate(self.peak_flops, "peak_flops", ureg.flop/ureg.second, "compute rate (e.g. TFLOPs/s)")
_validate(self.memory_capacity, "memory_capacity", ureg.byte, "data size (e.g. GiB)")
_validate(self.dispatch_tax, "dispatch_tax", ureg.second, "time (e.g. ms)")
if self.tdp:
_validate(self.tdp, "tdp", ureg.watt, "power (e.g. W)")
if self.battery_capacity:
@@ -75,34 +77,44 @@ class Networks:
class Cloud:
"""Datacenter-scale Accelerators."""
V100 = HardwareSpec("NVIDIA V100", 2017, V100_MEM_BW, V100_FLOPS_FP16_TENSOR, V100_MEM_CAPACITY, V100_TDP,
peak_flops_fp32=V100_FLOPS_FP32)
peak_flops_fp32=V100_FLOPS_FP32, dispatch_tax=0.02 * ureg.ms)
A100 = HardwareSpec("NVIDIA A100", 2020, A100_MEM_BW, A100_FLOPS_FP16_TENSOR, A100_MEM_CAPACITY, A100_TDP,
peak_flops_fp32=A100_FLOPS_FP32, tf32_flops=A100_FLOPS_TF32, int8_flops=A100_FLOPS_INT8)
peak_flops_fp32=A100_FLOPS_FP32, tf32_flops=A100_FLOPS_TF32, int8_flops=A100_FLOPS_INT8,
dispatch_tax=0.015 * ureg.ms)
H100 = HardwareSpec("NVIDIA H100", 2022, H100_MEM_BW, H100_FLOPS_FP16_TENSOR, H100_MEM_CAPACITY, H100_TDP,
tf32_flops=H100_FLOPS_TF32, fp8_flops=H100_FLOPS_FP8_TENSOR, int8_flops=H100_FLOPS_INT8)
tf32_flops=H100_FLOPS_TF32, fp8_flops=H100_FLOPS_FP8_TENSOR, int8_flops=H100_FLOPS_INT8,
dispatch_tax=0.01 * ureg.ms)
B200 = HardwareSpec("NVIDIA B200", 2024, B200_MEM_BW, B200_FLOPS_FP16_TENSOR, B200_MEM_CAPACITY, B200_TDP,
fp8_flops=B200_FLOPS_FP8_TENSOR)
fp8_flops=B200_FLOPS_FP8_TENSOR, dispatch_tax=0.008 * ureg.ms)
T4 = HardwareSpec("NVIDIA T4", 2018, T4_MEM_BW, T4_FLOPS_FP16_TENSOR, 16 * ureg.GiB, T4_TDP,
int8_flops=T4_FLOPS_INT8)
int8_flops=T4_FLOPS_INT8, dispatch_tax=0.03 * ureg.ms)
TPUv4 = HardwareSpec("Google TPU v4", 2021, TPUV4_MEM_BW, TPUV4_FLOPS_BF16, 32 * ureg.GiB)
TPUv4 = HardwareSpec("Google TPU v4", 2021, TPUV4_MEM_BW, TPUV4_FLOPS_BF16, 32 * ureg.GiB, dispatch_tax=0.05 * ureg.ms)
class Edge:
"""Mobile and Robotics Hardware."""
Generic_Phone = HardwareSpec("Smartphone", 2024, MOBILE_NPU_MEM_BW, MOBILE_NPU_TOPS_INT8, 8 * ureg.GiB, battery_capacity=15 * ureg.Wh)
Generic_Phone = HardwareSpec("Smartphone", 2024, MOBILE_NPU_MEM_BW, MOBILE_NPU_TOPS_INT8, 8 * ureg.GiB,
battery_capacity=15 * ureg.Wh, dispatch_tax=1.0 * ureg.ms) # High OS overhead
# Specific Edge Devices
Coral = HardwareSpec("Google Coral Dev", 2019, 25 * ureg.GB/ureg.s, 4 * ureg.TFLOPs/ureg.s, 1 * ureg.GB, 2 * ureg.W) # 4 TOPS INT8
JetsonOrinNX = HardwareSpec("NVIDIA Jetson Orin NX", 2023, 102 * ureg.GB/ureg.s, 100 * ureg.TFLOPs/ureg.s, 16 * ureg.GB, 25 * ureg.W) # 100 TOPS INT8
NUC_Movidius = HardwareSpec("Intel NUC + Movidius", 2020, 50 * ureg.GB/ureg.s, 4 * ureg.TFLOPs/ureg.s, 16 * ureg.GB, 15 * ureg.W)
Coral = HardwareSpec("Google Coral Dev", 2019, 25 * ureg.GB/ureg.s, 4 * ureg.TFLOPs/ureg.s, 1 * ureg.GB, 2 * ureg.W,
dispatch_tax=0.5 * ureg.ms)
JetsonOrinNX = HardwareSpec("NVIDIA Jetson Orin NX", 2023, 102 * ureg.GB/ureg.s, 100 * ureg.TFLOPs/ureg.s, 16 * ureg.GB, 25 * ureg.W,
dispatch_tax=0.2 * ureg.ms)
NUC_Movidius = HardwareSpec("Intel NUC + Movidius", 2020, 50 * ureg.GB/ureg.s, 4 * ureg.TFLOPs/ureg.s, 16 * ureg.GB, 15 * ureg.W,
dispatch_tax=2.0 * ureg.ms)
# Servers
GenericServer = HardwareSpec("Edge Server", 2024, 100 * ureg.GB/ureg.s, 1 * ureg.TFLOPs/ureg.s, 128 * ureg.GB, 300 * ureg.W)
GenericServer = HardwareSpec("Edge Server", 2024, 100 * ureg.GB/ureg.s, 1 * ureg.TFLOPs/ureg.s, 128 * ureg.GB, 300 * ureg.W,
dispatch_tax=0.1 * ureg.ms)
class Tiny:
"""Microcontrollers and Embedded Systems."""
ESP32 = HardwareSpec("ESP32-CAM", 2019, 0.1 * ureg.GB/ureg.second, 0.01 * ureg.TFLOPs/ureg.second, ESP32_RAM, ESP32_POWER_MAX)
Generic_MCU = HardwareSpec("Cortex-M7", 2020, 0.05 * ureg.GB/ureg.second, 0.001 * ureg.TFLOPs/ureg.second, MCU_RAM_KIB)
# ESP32 at 240MHz is ~240 MIPS, for AI math without FPU it's roughly 100-200 MFLOPS (0.0001-0.0002 TFLOPS)
ESP32 = HardwareSpec("ESP32-CAM", 2019, 0.1 * ureg.GB/ureg.second, 0.0002 * ureg.TFLOPs/ureg.second, ESP32_RAM, ESP32_POWER_MAX,
dispatch_tax=5.0 * ureg.ms) # Very high overhead relative to math
Generic_MCU = HardwareSpec("Cortex-M7", 2020, 0.05 * ureg.GB/ureg.second, 0.001 * ureg.TFLOPs/ureg.second, MCU_RAM_KIB,
dispatch_tax=2.0 * ureg.ms)
class Hardware:
Cloud = Cloud

View File

@@ -5,9 +5,10 @@ Pre-render guardrail for inline Python in QMD files.
Checks:
1. Every `{python} var_name` resolves to a defined variable
2. No inline Python inside LaTeX math mode (causes decimal stripping)
3. No inline Python adjacent to LaTeX symbols like $\\times$
4. No grid tables with inline Python (use pipe tables instead)
2. Every `{python} var_name` appears AFTER its definition (Locality)
3. No inline Python inside LaTeX math mode (causes decimal stripping)
4. No inline Python adjacent to LaTeX symbols like $\\times$
5. No grid tables with inline Python (use pipe tables instead)
Usage:
python3 book/quarto/mlsys/validate_inline_refs.py [--verbose] [--check-patterns]
@@ -36,8 +37,11 @@ INLINE_REF = re.compile(r'`\{python\}\s+(\w+)`')
CELL_START = re.compile(r'^```\{python\}')
CELL_END = re.compile(r'^```\s*$')
# Pattern for variable assignments in compute cells
ASSIGNMENT = re.compile(r'^(\w+)\s*=')
# Pattern for variable assignments in compute cells (handles tuple unpacking)
ASSIGNMENT = re.compile(r'^([a-zA-Z_]\w*(?:\s*,\s*[a-zA-Z_]\w*)*)\s*=')
# Pattern for Exports: in header block
EXPORTS_SECTION = re.compile(r'#\s*.\s*[Ee]xports?:\s*(.*)')
# Problematic patterns that cause rendering issues
# Pattern 1: Inline Python directly inside LaTeX math: $`{python}`$ or $..`{python}`$
@@ -60,41 +64,12 @@ INLINE_FSTRING = re.compile(r'`\{python\}\s*f"[^`]+`')
# Pattern 5: Inline function calls (should be pre-computed as _str)
INLINE_FUNC_CALL = re.compile(r'`\{python\}\s*\w+\([^`]+\)`')
# Pattern 6: Inline Python in YAML cell options (fig-cap, tbl-cap, etc.)
# These NEVER render - Quarto passes YAML options as literal strings
# Pattern 6: Inline Python in YAML chunk options (fig-cap, tbl-cap, fig-alt, lst-cap)
# These NEVER render Quarto uses the option value as a literal string (verified by
# rendering _test_inline_captions.qmd: body and ": Caption {#tbl-...}" run inline Python;
# #| fig-alt and #| fig-cap do not).
YAML_OPTION_INLINE = re.compile(r'^#\|\s*(fig-cap|tbl-cap|lst-cap|fig-alt):\s*.*`\{python\}')
# Pattern 7: Inline Python in Quarto caption syntax (: Caption {#tbl-...} or {#fig-...})
# These also NEVER render - the caption line is parsed as metadata
CAPTION_SYNTAX_INLINE = re.compile(r'^:\s+.*`\{python\}.*\{#(tbl|fig|lst)-')
def extract_compute_vars(lines):
"""Extract all variable names assigned in ```{python} compute cells."""
variables = set()
in_cell = False
for line in lines:
if CELL_START.match(line):
in_cell = True
continue
if in_cell and CELL_END.match(line):
in_cell = False
continue
if in_cell:
m = ASSIGNMENT.match(line.strip())
if m:
variables.add(m.group(1))
return variables
def extract_inline_refs(lines):
"""Extract all inline `{python} var` references with line numbers."""
refs = []
for i, line in enumerate(lines, 1):
for m in INLINE_REF.finditer(line):
refs.append((i, m.group(1)))
return refs
def check_rendering_patterns(qmd_path, verbose=False):
"""Check for patterns that cause rendering issues. Returns list of warnings."""
@@ -152,20 +127,13 @@ def check_rendering_patterns(qmd_path, verbose=False):
if verbose:
print(f"{qmd_path.name}:{i} — Inline function call")
# Check for inline Python in YAML cell options (NEVER renders!)
# Check for inline Python in YAML chunk options (fig-cap, fig-alt, tbl-cap, lst-cap) — NEVER renders
if YAML_OPTION_INLINE.search(line):
warnings.append((filepath, i, "YAML_OPTION",
"Inline Python in YAML option (fig-cap/tbl-cap) - NEVER renders! Use hardcoded value or plt.suptitle()"))
"Inline Python in #| fig-alt/fig-cap/tbl-cap/lst-cap - NEVER renders! Use hardcoded value or set caption in code."))
if verbose:
print(f"{qmd_path.name}:{i} — Python in YAML option (will appear literally)")
# Check for inline Python in Quarto caption syntax (: Caption {#tbl-...})
if CAPTION_SYNTAX_INLINE.search(line):
warnings.append((filepath, i, "CAPTION_SYNTAX",
"Inline Python in caption (: ... {#tbl/fig-}) - NEVER renders! Use hardcoded value."))
if verbose:
print(f"{qmd_path.name}:{i} — Python in caption syntax (will appear literally)")
return warnings
@@ -174,17 +142,68 @@ def validate_file(qmd_path, verbose=False, check_patterns=False):
text = qmd_path.read_text(encoding="utf-8")
lines = text.splitlines()
inline_refs = extract_inline_refs(lines)
if not inline_refs:
return [], [] # No inline refs, nothing to validate
compute_vars = extract_compute_vars(lines)
errors = []
for lineno, var in inline_refs:
if var not in compute_vars:
errors.append((str(qmd_path.relative_to(BOOK_ROOT)), lineno, var))
if verbose:
print(f"{qmd_path.name}:{lineno} — `{{python}} {var}` not defined")
defined_vars = set()
in_cell = False
in_exports = False
for i, line in enumerate(lines, 1):
# 1. Track variable definitions in cells
if CELL_START.match(line):
in_cell = True
continue
if in_cell and CELL_END.match(line):
in_cell = False
in_exports = False
continue
if in_cell:
# Check for assignments: var = ... or var1, var2 = ...
m = ASSIGNMENT.match(line.strip())
if m:
vars_part = m.group(1)
for v in re.split(r'[,\s]+', vars_part):
if v.strip():
defined_vars.add(v.strip())
# Check for Exports: in header
m = EXPORTS_SECTION.match(line.strip())
if m:
in_exports = True
vars_raw = m.group(1)
# Remove unit parentheticals like (MB, GB)
vars_raw = re.sub(r'\(.*?\)', '', vars_raw)
for v in re.split(r'[,\s]+', vars_raw):
v = v.strip().rstrip(',')
if v:
defined_vars.add(v)
elif in_exports:
# Continuation of exports
m = re.match(r'#\s*.\s*(.*)', line.strip())
if m:
content = m.group(1).strip()
# If content starts with a section like 'Goal:', stop
if re.match(r'^[A-Z][a-z]+:', content):
in_exports = False
elif content == "" or "──" in content:
in_exports = False
else:
vars_raw = re.sub(r'\(.*?\)', '', content)
for v in re.split(r'[,\s]+', vars_raw):
v = v.strip().rstrip(',')
if v:
defined_vars.add(v)
else:
in_exports = False
continue # Don't check for refs inside compute cells
# 2. Check inline references for Locality
for m in INLINE_REF.finditer(line):
var = m.group(1)
if var not in defined_vars:
errors.append((str(qmd_path.relative_to(BOOK_ROOT)), i, var))
if verbose:
print(f"{qmd_path.name}:{i} — `{{python}} {var}` used before definition (Locality Violation)")
warnings = []
if check_patterns:
@@ -198,8 +217,17 @@ def main():
verbose = "--verbose" in sys.argv or "-v" in sys.argv
check_patterns = "--check-patterns" in sys.argv or "-p" in sys.argv
qmd_files = sorted(CONTENTS.rglob("*.qmd"))
# Check for path argument
args = [a for a in sys.argv[1:] if not a.startswith("-")]
if args:
target_path = Path(args[0]).resolve()
if target_path.is_file():
qmd_files = [target_path]
else:
qmd_files = sorted(target_path.rglob("*.qmd"))
else:
qmd_files = sorted(CONTENTS.rglob("*.qmd"))
total_files = 0
total_refs = 0
all_errors = []
@@ -230,7 +258,7 @@ def main():
print(f"\n{''*60}")
print("ERRORS (will break render):")
for filepath, lineno, var in all_errors:
print(f" {filepath}:{lineno} — `{{python}} {var}` undefined")
print(f" {filepath}:{lineno} — `{{python}} {var}` undefined/locality violation")
exit_code = 1
if all_warnings: