From 5e0c9a2f5d7ff391a3da72829f5bf8962ae0ae76 Mon Sep 17 00:00:00 2001 From: Vijay Janapa Reddi Date: Thu, 26 Feb 2026 15:23:07 -0500 Subject: [PATCH] Update book quarto mlsys (hardware, validate_inline_refs, engine) --- book/quarto/mlsys/__init__.py | 1 + book/quarto/mlsys/engine.py | 84 ++++++++++++ book/quarto/mlsys/hardware.py | 38 ++++-- book/quarto/mlsys/validate_inline_refs.py | 148 +++++++++++++--------- 4 files changed, 198 insertions(+), 73 deletions(-) create mode 100644 book/quarto/mlsys/engine.py diff --git a/book/quarto/mlsys/__init__.py b/book/quarto/mlsys/__init__.py index 23d4767c1..39e513864 100644 --- a/book/quarto/mlsys/__init__.py +++ b/book/quarto/mlsys/__init__.py @@ -5,6 +5,7 @@ from .hardware import Hardware from .models import Models from .deployment import Tiers from .systems import Systems, Archetypes +from .engine import Engine from .scenarios import Scenarios, Applications # Export constants and registry for legacy support diff --git a/book/quarto/mlsys/engine.py b/book/quarto/mlsys/engine.py new file mode 100644 index 000000000..8ee4ac127 --- /dev/null +++ b/book/quarto/mlsys/engine.py @@ -0,0 +1,84 @@ +# engine.py +# The central computational engine for ML Systems analysis. +# Ties Models, Systems, and Formulas into a single "Solver". + +from dataclasses import dataclass +from .models import ModelSpec +from .systems import SystemArchetype +from .constants import ureg, Q_, BYTES_FP32, BYTES_FP16, BYTES_INT8 +from .formulas import calc_bottleneck + +@dataclass(frozen=True) +class PerformanceProfile: + """The result of a system simulation.""" + latency: Q_ + latency_compute: Q_ + latency_memory: Q_ + latency_overhead: Q_ + throughput: Q_ + bottleneck: str + arithmetic_intensity: Q_ + energy: Q_ + memory_footprint: Q_ + peak_flops_actual: Q_ + peak_bw_actual: Q_ + feasible: bool + +class Engine: + """ + Unified solver for ML Systems trade-offs. + """ + + @staticmethod + def solve(model: ModelSpec, system: SystemArchetype, batch_size=1, precision="fp16", efficiency=0.5) -> PerformanceProfile: + hw = system.hardware + + # 1. Map Precision + if precision == "fp32": + bpp = BYTES_FP32 + peak_flops = hw.peak_flops_fp32 or hw.peak_flops + elif precision == "int8": + bpp = BYTES_INT8 + peak_flops = hw.int8_flops or hw.peak_flops + else: # Default fp16 + bpp = BYTES_FP16 + peak_flops = hw.peak_flops + + # 2. Workload + ops_per_inference = model.inference_flops or (2 * model.parameters.to(ureg.count).magnitude * ureg.flop) + total_ops = ops_per_inference * batch_size + memory_bytes = model.size_in_bytes(bpp) + + # 3. Physics (Iron Law) + # Note: We use the hardware's memory bandwidth directly. + results = calc_bottleneck( + ops=total_ops, + model_bytes=memory_bytes, + device_flops=peak_flops * efficiency, + device_bw=hw.memory_bw + ) + + t_comp = results["compute_ms"] * ureg.ms + t_mem = results["memory_ms"] * ureg.ms + t_overhead = hw.dispatch_tax + + # Total Latency (Pipelined Assumption: overlapping data and compute) + latency = max(t_comp, t_mem) + t_overhead + + # 4. Feasibility Check + feasible = memory_bytes <= system.ram + + return PerformanceProfile( + latency=latency, + latency_compute=t_comp, + latency_memory=t_mem, + latency_overhead=t_overhead, + throughput=(batch_size / latency).to(1/ureg.second), + bottleneck=results["bottleneck"], + arithmetic_intensity=results["intensity"] * (ureg.flop / ureg.byte), + energy=(hw.tdp * latency).to(ureg.joule) if hw.tdp else 0 * ureg.joule, + memory_footprint=memory_bytes, + peak_flops_actual=peak_flops * efficiency, + peak_bw_actual=hw.memory_bw, + feasible=feasible + ) diff --git a/book/quarto/mlsys/hardware.py b/book/quarto/mlsys/hardware.py index 9be49faf0..fc582e3f5 100644 --- a/book/quarto/mlsys/hardware.py +++ b/book/quarto/mlsys/hardware.py @@ -27,6 +27,7 @@ class HardwareSpec: memory_capacity: Q_ tdp: Optional[Q_] = None battery_capacity: Optional[Q_] = None + dispatch_tax: Q_ = 0.01 * ureg.ms # Default 10us # Precision-specific FLOPS peak_flops_fp32: Optional[Q_] = None @@ -50,6 +51,7 @@ class HardwareSpec: _validate(self.memory_bw, "memory_bw", ureg.byte/ureg.second, "data/time (e.g. GB/s)") _validate(self.peak_flops, "peak_flops", ureg.flop/ureg.second, "compute rate (e.g. TFLOPs/s)") _validate(self.memory_capacity, "memory_capacity", ureg.byte, "data size (e.g. GiB)") + _validate(self.dispatch_tax, "dispatch_tax", ureg.second, "time (e.g. ms)") if self.tdp: _validate(self.tdp, "tdp", ureg.watt, "power (e.g. W)") if self.battery_capacity: @@ -75,34 +77,44 @@ class Networks: class Cloud: """Datacenter-scale Accelerators.""" V100 = HardwareSpec("NVIDIA V100", 2017, V100_MEM_BW, V100_FLOPS_FP16_TENSOR, V100_MEM_CAPACITY, V100_TDP, - peak_flops_fp32=V100_FLOPS_FP32) + peak_flops_fp32=V100_FLOPS_FP32, dispatch_tax=0.02 * ureg.ms) A100 = HardwareSpec("NVIDIA A100", 2020, A100_MEM_BW, A100_FLOPS_FP16_TENSOR, A100_MEM_CAPACITY, A100_TDP, - peak_flops_fp32=A100_FLOPS_FP32, tf32_flops=A100_FLOPS_TF32, int8_flops=A100_FLOPS_INT8) + peak_flops_fp32=A100_FLOPS_FP32, tf32_flops=A100_FLOPS_TF32, int8_flops=A100_FLOPS_INT8, + dispatch_tax=0.015 * ureg.ms) H100 = HardwareSpec("NVIDIA H100", 2022, H100_MEM_BW, H100_FLOPS_FP16_TENSOR, H100_MEM_CAPACITY, H100_TDP, - tf32_flops=H100_FLOPS_TF32, fp8_flops=H100_FLOPS_FP8_TENSOR, int8_flops=H100_FLOPS_INT8) + tf32_flops=H100_FLOPS_TF32, fp8_flops=H100_FLOPS_FP8_TENSOR, int8_flops=H100_FLOPS_INT8, + dispatch_tax=0.01 * ureg.ms) B200 = HardwareSpec("NVIDIA B200", 2024, B200_MEM_BW, B200_FLOPS_FP16_TENSOR, B200_MEM_CAPACITY, B200_TDP, - fp8_flops=B200_FLOPS_FP8_TENSOR) + fp8_flops=B200_FLOPS_FP8_TENSOR, dispatch_tax=0.008 * ureg.ms) T4 = HardwareSpec("NVIDIA T4", 2018, T4_MEM_BW, T4_FLOPS_FP16_TENSOR, 16 * ureg.GiB, T4_TDP, - int8_flops=T4_FLOPS_INT8) + int8_flops=T4_FLOPS_INT8, dispatch_tax=0.03 * ureg.ms) - TPUv4 = HardwareSpec("Google TPU v4", 2021, TPUV4_MEM_BW, TPUV4_FLOPS_BF16, 32 * ureg.GiB) + TPUv4 = HardwareSpec("Google TPU v4", 2021, TPUV4_MEM_BW, TPUV4_FLOPS_BF16, 32 * ureg.GiB, dispatch_tax=0.05 * ureg.ms) class Edge: """Mobile and Robotics Hardware.""" - Generic_Phone = HardwareSpec("Smartphone", 2024, MOBILE_NPU_MEM_BW, MOBILE_NPU_TOPS_INT8, 8 * ureg.GiB, battery_capacity=15 * ureg.Wh) + Generic_Phone = HardwareSpec("Smartphone", 2024, MOBILE_NPU_MEM_BW, MOBILE_NPU_TOPS_INT8, 8 * ureg.GiB, + battery_capacity=15 * ureg.Wh, dispatch_tax=1.0 * ureg.ms) # High OS overhead # Specific Edge Devices - Coral = HardwareSpec("Google Coral Dev", 2019, 25 * ureg.GB/ureg.s, 4 * ureg.TFLOPs/ureg.s, 1 * ureg.GB, 2 * ureg.W) # 4 TOPS INT8 - JetsonOrinNX = HardwareSpec("NVIDIA Jetson Orin NX", 2023, 102 * ureg.GB/ureg.s, 100 * ureg.TFLOPs/ureg.s, 16 * ureg.GB, 25 * ureg.W) # 100 TOPS INT8 - NUC_Movidius = HardwareSpec("Intel NUC + Movidius", 2020, 50 * ureg.GB/ureg.s, 4 * ureg.TFLOPs/ureg.s, 16 * ureg.GB, 15 * ureg.W) + Coral = HardwareSpec("Google Coral Dev", 2019, 25 * ureg.GB/ureg.s, 4 * ureg.TFLOPs/ureg.s, 1 * ureg.GB, 2 * ureg.W, + dispatch_tax=0.5 * ureg.ms) + JetsonOrinNX = HardwareSpec("NVIDIA Jetson Orin NX", 2023, 102 * ureg.GB/ureg.s, 100 * ureg.TFLOPs/ureg.s, 16 * ureg.GB, 25 * ureg.W, + dispatch_tax=0.2 * ureg.ms) + NUC_Movidius = HardwareSpec("Intel NUC + Movidius", 2020, 50 * ureg.GB/ureg.s, 4 * ureg.TFLOPs/ureg.s, 16 * ureg.GB, 15 * ureg.W, + dispatch_tax=2.0 * ureg.ms) # Servers - GenericServer = HardwareSpec("Edge Server", 2024, 100 * ureg.GB/ureg.s, 1 * ureg.TFLOPs/ureg.s, 128 * ureg.GB, 300 * ureg.W) + GenericServer = HardwareSpec("Edge Server", 2024, 100 * ureg.GB/ureg.s, 1 * ureg.TFLOPs/ureg.s, 128 * ureg.GB, 300 * ureg.W, + dispatch_tax=0.1 * ureg.ms) class Tiny: """Microcontrollers and Embedded Systems.""" - ESP32 = HardwareSpec("ESP32-CAM", 2019, 0.1 * ureg.GB/ureg.second, 0.01 * ureg.TFLOPs/ureg.second, ESP32_RAM, ESP32_POWER_MAX) - Generic_MCU = HardwareSpec("Cortex-M7", 2020, 0.05 * ureg.GB/ureg.second, 0.001 * ureg.TFLOPs/ureg.second, MCU_RAM_KIB) + # ESP32 at 240MHz is ~240 MIPS, for AI math without FPU it's roughly 100-200 MFLOPS (0.0001-0.0002 TFLOPS) + ESP32 = HardwareSpec("ESP32-CAM", 2019, 0.1 * ureg.GB/ureg.second, 0.0002 * ureg.TFLOPs/ureg.second, ESP32_RAM, ESP32_POWER_MAX, + dispatch_tax=5.0 * ureg.ms) # Very high overhead relative to math + Generic_MCU = HardwareSpec("Cortex-M7", 2020, 0.05 * ureg.GB/ureg.second, 0.001 * ureg.TFLOPs/ureg.second, MCU_RAM_KIB, + dispatch_tax=2.0 * ureg.ms) class Hardware: Cloud = Cloud diff --git a/book/quarto/mlsys/validate_inline_refs.py b/book/quarto/mlsys/validate_inline_refs.py index eaec0fdfd..9d4fdda95 100644 --- a/book/quarto/mlsys/validate_inline_refs.py +++ b/book/quarto/mlsys/validate_inline_refs.py @@ -5,9 +5,10 @@ Pre-render guardrail for inline Python in QMD files. Checks: 1. Every `{python} var_name` resolves to a defined variable -2. No inline Python inside LaTeX math mode (causes decimal stripping) -3. No inline Python adjacent to LaTeX symbols like $\\times$ -4. No grid tables with inline Python (use pipe tables instead) +2. Every `{python} var_name` appears AFTER its definition (Locality) +3. No inline Python inside LaTeX math mode (causes decimal stripping) +4. No inline Python adjacent to LaTeX symbols like $\\times$ +5. No grid tables with inline Python (use pipe tables instead) Usage: python3 book/quarto/mlsys/validate_inline_refs.py [--verbose] [--check-patterns] @@ -36,8 +37,11 @@ INLINE_REF = re.compile(r'`\{python\}\s+(\w+)`') CELL_START = re.compile(r'^```\{python\}') CELL_END = re.compile(r'^```\s*$') -# Pattern for variable assignments in compute cells -ASSIGNMENT = re.compile(r'^(\w+)\s*=') +# Pattern for variable assignments in compute cells (handles tuple unpacking) +ASSIGNMENT = re.compile(r'^([a-zA-Z_]\w*(?:\s*,\s*[a-zA-Z_]\w*)*)\s*=') + +# Pattern for Exports: in header block +EXPORTS_SECTION = re.compile(r'#\s*.\s*[Ee]xports?:\s*(.*)') # Problematic patterns that cause rendering issues # Pattern 1: Inline Python directly inside LaTeX math: $`{python}`$ or $..`{python}`$ @@ -60,41 +64,12 @@ INLINE_FSTRING = re.compile(r'`\{python\}\s*f"[^`]+`') # Pattern 5: Inline function calls (should be pre-computed as _str) INLINE_FUNC_CALL = re.compile(r'`\{python\}\s*\w+\([^`]+\)`') -# Pattern 6: Inline Python in YAML cell options (fig-cap, tbl-cap, etc.) -# These NEVER render - Quarto passes YAML options as literal strings +# Pattern 6: Inline Python in YAML chunk options (fig-cap, tbl-cap, fig-alt, lst-cap) +# These NEVER render — Quarto uses the option value as a literal string (verified by +# rendering _test_inline_captions.qmd: body and ": Caption {#tbl-...}" run inline Python; +# #| fig-alt and #| fig-cap do not). YAML_OPTION_INLINE = re.compile(r'^#\|\s*(fig-cap|tbl-cap|lst-cap|fig-alt):\s*.*`\{python\}') -# Pattern 7: Inline Python in Quarto caption syntax (: Caption {#tbl-...} or {#fig-...}) -# These also NEVER render - the caption line is parsed as metadata -CAPTION_SYNTAX_INLINE = re.compile(r'^:\s+.*`\{python\}.*\{#(tbl|fig|lst)-') - - -def extract_compute_vars(lines): - """Extract all variable names assigned in ```{python} compute cells.""" - variables = set() - in_cell = False - for line in lines: - if CELL_START.match(line): - in_cell = True - continue - if in_cell and CELL_END.match(line): - in_cell = False - continue - if in_cell: - m = ASSIGNMENT.match(line.strip()) - if m: - variables.add(m.group(1)) - return variables - - -def extract_inline_refs(lines): - """Extract all inline `{python} var` references with line numbers.""" - refs = [] - for i, line in enumerate(lines, 1): - for m in INLINE_REF.finditer(line): - refs.append((i, m.group(1))) - return refs - def check_rendering_patterns(qmd_path, verbose=False): """Check for patterns that cause rendering issues. Returns list of warnings.""" @@ -152,20 +127,13 @@ def check_rendering_patterns(qmd_path, verbose=False): if verbose: print(f" ⚠ {qmd_path.name}:{i} — Inline function call") - # Check for inline Python in YAML cell options (NEVER renders!) + # Check for inline Python in YAML chunk options (fig-cap, fig-alt, tbl-cap, lst-cap) — NEVER renders if YAML_OPTION_INLINE.search(line): warnings.append((filepath, i, "YAML_OPTION", - "Inline Python in YAML option (fig-cap/tbl-cap) - NEVER renders! Use hardcoded value or plt.suptitle()")) + "Inline Python in #| fig-alt/fig-cap/tbl-cap/lst-cap - NEVER renders! Use hardcoded value or set caption in code.")) if verbose: print(f" ✗ {qmd_path.name}:{i} — Python in YAML option (will appear literally)") - # Check for inline Python in Quarto caption syntax (: Caption {#tbl-...}) - if CAPTION_SYNTAX_INLINE.search(line): - warnings.append((filepath, i, "CAPTION_SYNTAX", - "Inline Python in caption (: ... {#tbl/fig-}) - NEVER renders! Use hardcoded value.")) - if verbose: - print(f" ✗ {qmd_path.name}:{i} — Python in caption syntax (will appear literally)") - return warnings @@ -174,17 +142,68 @@ def validate_file(qmd_path, verbose=False, check_patterns=False): text = qmd_path.read_text(encoding="utf-8") lines = text.splitlines() - inline_refs = extract_inline_refs(lines) - if not inline_refs: - return [], [] # No inline refs, nothing to validate - - compute_vars = extract_compute_vars(lines) errors = [] - for lineno, var in inline_refs: - if var not in compute_vars: - errors.append((str(qmd_path.relative_to(BOOK_ROOT)), lineno, var)) - if verbose: - print(f" ✗ {qmd_path.name}:{lineno} — `{{python}} {var}` not defined") + defined_vars = set() + in_cell = False + in_exports = False + + for i, line in enumerate(lines, 1): + # 1. Track variable definitions in cells + if CELL_START.match(line): + in_cell = True + continue + if in_cell and CELL_END.match(line): + in_cell = False + in_exports = False + continue + + if in_cell: + # Check for assignments: var = ... or var1, var2 = ... + m = ASSIGNMENT.match(line.strip()) + if m: + vars_part = m.group(1) + for v in re.split(r'[,\s]+', vars_part): + if v.strip(): + defined_vars.add(v.strip()) + + # Check for Exports: in header + m = EXPORTS_SECTION.match(line.strip()) + if m: + in_exports = True + vars_raw = m.group(1) + # Remove unit parentheticals like (MB, GB) + vars_raw = re.sub(r'\(.*?\)', '', vars_raw) + for v in re.split(r'[,\s]+', vars_raw): + v = v.strip().rstrip(',') + if v: + defined_vars.add(v) + elif in_exports: + # Continuation of exports + m = re.match(r'#\s*.\s*(.*)', line.strip()) + if m: + content = m.group(1).strip() + # If content starts with a section like 'Goal:', stop + if re.match(r'^[A-Z][a-z]+:', content): + in_exports = False + elif content == "" or "──" in content: + in_exports = False + else: + vars_raw = re.sub(r'\(.*?\)', '', content) + for v in re.split(r'[,\s]+', vars_raw): + v = v.strip().rstrip(',') + if v: + defined_vars.add(v) + else: + in_exports = False + continue # Don't check for refs inside compute cells + + # 2. Check inline references for Locality + for m in INLINE_REF.finditer(line): + var = m.group(1) + if var not in defined_vars: + errors.append((str(qmd_path.relative_to(BOOK_ROOT)), i, var)) + if verbose: + print(f" ✗ {qmd_path.name}:{i} — `{{python}} {var}` used before definition (Locality Violation)") warnings = [] if check_patterns: @@ -198,8 +217,17 @@ def main(): verbose = "--verbose" in sys.argv or "-v" in sys.argv check_patterns = "--check-patterns" in sys.argv or "-p" in sys.argv - - qmd_files = sorted(CONTENTS.rglob("*.qmd")) + + # Check for path argument + args = [a for a in sys.argv[1:] if not a.startswith("-")] + if args: + target_path = Path(args[0]).resolve() + if target_path.is_file(): + qmd_files = [target_path] + else: + qmd_files = sorted(target_path.rglob("*.qmd")) + else: + qmd_files = sorted(CONTENTS.rglob("*.qmd")) total_files = 0 total_refs = 0 all_errors = [] @@ -230,7 +258,7 @@ def main(): print(f"\n{'─'*60}") print("ERRORS (will break render):") for filepath, lineno, var in all_errors: - print(f" {filepath}:{lineno} — `{{python}} {var}` undefined") + print(f" {filepath}:{lineno} — `{{python}} {var}` undefined/locality violation") exit_code = 1 if all_warnings: