From 5e0c9a2f5d7ff391a3da72829f5bf8962ae0ae76 Mon Sep 17 00:00:00 2001
From: Vijay Janapa Reddi <vj@eecs.harvard.edu>
Date: Thu, 26 Feb 2026 15:23:07 -0500
Subject: [PATCH] Update book quarto mlsys (hardware, validate_inline_refs,
 engine)

---
 book/quarto/mlsys/__init__.py             |   1 +
 book/quarto/mlsys/engine.py               |  84 ++++++++++++
 book/quarto/mlsys/hardware.py             |  38 ++++--
 book/quarto/mlsys/validate_inline_refs.py | 148 +++++++++++++---------
 4 files changed, 198 insertions(+), 73 deletions(-)
 create mode 100644 book/quarto/mlsys/engine.py

diff --git a/book/quarto/mlsys/__init__.py b/book/quarto/mlsys/__init__.py
index 23d4767c1..39e513864 100644
--- a/book/quarto/mlsys/__init__.py
+++ b/book/quarto/mlsys/__init__.py
@@ -5,6 +5,7 @@ from .hardware import Hardware
 from .models import Models
 from .deployment import Tiers
 from .systems import Systems, Archetypes
+from .engine import Engine
 from .scenarios import Scenarios, Applications
 
 # Export constants and registry for legacy support
diff --git a/book/quarto/mlsys/engine.py b/book/quarto/mlsys/engine.py
new file mode 100644
index 000000000..8ee4ac127
--- /dev/null
+++ b/book/quarto/mlsys/engine.py
@@ -0,0 +1,84 @@
+# engine.py
+# The central computational engine for ML Systems analysis.
+# Ties Models, Systems, and Formulas into a single "Solver".
+
+from dataclasses import dataclass
+from .models import ModelSpec
+from .systems import SystemArchetype
+from .constants import ureg, Q_, BYTES_FP32, BYTES_FP16, BYTES_INT8
+from .formulas import calc_bottleneck
+
+@dataclass(frozen=True)
+class PerformanceProfile:
+    """The result of a system simulation."""
+    latency: Q_
+    latency_compute: Q_
+    latency_memory: Q_
+    latency_overhead: Q_
+    throughput: Q_
+    bottleneck: str
+    arithmetic_intensity: Q_
+    energy: Q_
+    memory_footprint: Q_
+    peak_flops_actual: Q_
+    peak_bw_actual: Q_
+    feasible: bool
+
+class Engine:
+    """
+    Unified solver for ML Systems trade-offs.
+    """
+    
+    @staticmethod
+    def solve(model: ModelSpec, system: SystemArchetype, batch_size=1, precision="fp16", efficiency=0.5) -> PerformanceProfile:
+        hw = system.hardware
+        
+        # 1. Map Precision
+        if precision == "fp32":
+            bpp = BYTES_FP32
+            peak_flops = hw.peak_flops_fp32 or hw.peak_flops
+        elif precision == "int8":
+            bpp = BYTES_INT8
+            peak_flops = hw.int8_flops or hw.peak_flops
+        else: # Default fp16
+            bpp = BYTES_FP16
+            peak_flops = hw.peak_flops
+
+        # 2. Workload
+        ops_per_inference = model.inference_flops or (2 * model.parameters.to(ureg.count).magnitude * ureg.flop)
+        total_ops = ops_per_inference * batch_size
+        memory_bytes = model.size_in_bytes(bpp)
+        
+        # 3. Physics (Iron Law)
+        # Note: We use the hardware's memory bandwidth directly.
+        results = calc_bottleneck(
+            ops=total_ops, 
+            model_bytes=memory_bytes, 
+            device_flops=peak_flops * efficiency, 
+            device_bw=hw.memory_bw
+        )
+        
+        t_comp = results["compute_ms"] * ureg.ms
+        t_mem = results["memory_ms"] * ureg.ms
+        t_overhead = hw.dispatch_tax
+        
+        # Total Latency (Pipelined Assumption: overlapping data and compute)
+        latency = max(t_comp, t_mem) + t_overhead
+        
+        # 4. Feasibility Check
+        feasible = memory_bytes <= system.ram
+        
+        return PerformanceProfile(
+            latency=latency,
+            latency_compute=t_comp,
+            latency_memory=t_mem,
+            latency_overhead=t_overhead,
+            throughput=(batch_size / latency).to(1/ureg.second),
+            bottleneck=results["bottleneck"],
+            arithmetic_intensity=results["intensity"] * (ureg.flop / ureg.byte),
+            energy=(hw.tdp * latency).to(ureg.joule) if hw.tdp else 0 * ureg.joule,
+            memory_footprint=memory_bytes,
+            peak_flops_actual=peak_flops * efficiency,
+            peak_bw_actual=hw.memory_bw,
+            feasible=feasible
+        )
diff --git a/book/quarto/mlsys/hardware.py b/book/quarto/mlsys/hardware.py
index 9be49faf0..fc582e3f5 100644
--- a/book/quarto/mlsys/hardware.py
+++ b/book/quarto/mlsys/hardware.py
@@ -27,6 +27,7 @@ class HardwareSpec:
     memory_capacity: Q_
     tdp: Optional[Q_] = None
     battery_capacity: Optional[Q_] = None
+    dispatch_tax: Q_ = 0.01 * ureg.ms # Default 10us
     
     # Precision-specific FLOPS
     peak_flops_fp32: Optional[Q_] = None
@@ -50,6 +51,7 @@ class HardwareSpec:
         _validate(self.memory_bw,       "memory_bw",       ureg.byte/ureg.second, "data/time (e.g. GB/s)")
         _validate(self.peak_flops,      "peak_flops",      ureg.flop/ureg.second, "compute rate (e.g. TFLOPs/s)")
         _validate(self.memory_capacity, "memory_capacity", ureg.byte,             "data size (e.g. GiB)")
+        _validate(self.dispatch_tax,    "dispatch_tax",    ureg.second,           "time (e.g. ms)")
         if self.tdp:
             _validate(self.tdp,         "tdp",             ureg.watt,             "power (e.g. W)")
         if self.battery_capacity:
@@ -75,34 +77,44 @@ class Networks:
 class Cloud:
     """Datacenter-scale Accelerators."""
     V100 = HardwareSpec("NVIDIA V100", 2017, V100_MEM_BW, V100_FLOPS_FP16_TENSOR, V100_MEM_CAPACITY, V100_TDP, 
-                        peak_flops_fp32=V100_FLOPS_FP32)
+                        peak_flops_fp32=V100_FLOPS_FP32, dispatch_tax=0.02 * ureg.ms)
     A100 = HardwareSpec("NVIDIA A100", 2020, A100_MEM_BW, A100_FLOPS_FP16_TENSOR, A100_MEM_CAPACITY, A100_TDP,
-                        peak_flops_fp32=A100_FLOPS_FP32, tf32_flops=A100_FLOPS_TF32, int8_flops=A100_FLOPS_INT8)
+                        peak_flops_fp32=A100_FLOPS_FP32, tf32_flops=A100_FLOPS_TF32, int8_flops=A100_FLOPS_INT8,
+                        dispatch_tax=0.015 * ureg.ms)
     H100 = HardwareSpec("NVIDIA H100", 2022, H100_MEM_BW, H100_FLOPS_FP16_TENSOR, H100_MEM_CAPACITY, H100_TDP,
-                        tf32_flops=H100_FLOPS_TF32, fp8_flops=H100_FLOPS_FP8_TENSOR, int8_flops=H100_FLOPS_INT8)
+                        tf32_flops=H100_FLOPS_TF32, fp8_flops=H100_FLOPS_FP8_TENSOR, int8_flops=H100_FLOPS_INT8,
+                        dispatch_tax=0.01 * ureg.ms)
     B200 = HardwareSpec("NVIDIA B200", 2024, B200_MEM_BW, B200_FLOPS_FP16_TENSOR, B200_MEM_CAPACITY, B200_TDP,
-                        fp8_flops=B200_FLOPS_FP8_TENSOR)
+                        fp8_flops=B200_FLOPS_FP8_TENSOR, dispatch_tax=0.008 * ureg.ms)
     T4   = HardwareSpec("NVIDIA T4",   2018, T4_MEM_BW,   T4_FLOPS_FP16_TENSOR,   16 * ureg.GiB,     T4_TDP,
-                        int8_flops=T4_FLOPS_INT8)
+                        int8_flops=T4_FLOPS_INT8, dispatch_tax=0.03 * ureg.ms)
     
-    TPUv4 = HardwareSpec("Google TPU v4", 2021, TPUV4_MEM_BW, TPUV4_FLOPS_BF16, 32 * ureg.GiB)
+    TPUv4 = HardwareSpec("Google TPU v4", 2021, TPUV4_MEM_BW, TPUV4_FLOPS_BF16, 32 * ureg.GiB, dispatch_tax=0.05 * ureg.ms)
 
 class Edge:
     """Mobile and Robotics Hardware."""
-    Generic_Phone = HardwareSpec("Smartphone", 2024, MOBILE_NPU_MEM_BW, MOBILE_NPU_TOPS_INT8, 8 * ureg.GiB, battery_capacity=15 * ureg.Wh)
+    Generic_Phone = HardwareSpec("Smartphone", 2024, MOBILE_NPU_MEM_BW, MOBILE_NPU_TOPS_INT8, 8 * ureg.GiB, 
+                                 battery_capacity=15 * ureg.Wh, dispatch_tax=1.0 * ureg.ms) # High OS overhead
     
     # Specific Edge Devices
-    Coral = HardwareSpec("Google Coral Dev", 2019, 25 * ureg.GB/ureg.s, 4 * ureg.TFLOPs/ureg.s, 1 * ureg.GB, 2 * ureg.W) # 4 TOPS INT8
-    JetsonOrinNX = HardwareSpec("NVIDIA Jetson Orin NX", 2023, 102 * ureg.GB/ureg.s, 100 * ureg.TFLOPs/ureg.s, 16 * ureg.GB, 25 * ureg.W) # 100 TOPS INT8
-    NUC_Movidius = HardwareSpec("Intel NUC + Movidius", 2020, 50 * ureg.GB/ureg.s, 4 * ureg.TFLOPs/ureg.s, 16 * ureg.GB, 15 * ureg.W)
+    Coral = HardwareSpec("Google Coral Dev", 2019, 25 * ureg.GB/ureg.s, 4 * ureg.TFLOPs/ureg.s, 1 * ureg.GB, 2 * ureg.W, 
+                         dispatch_tax=0.5 * ureg.ms)
+    JetsonOrinNX = HardwareSpec("NVIDIA Jetson Orin NX", 2023, 102 * ureg.GB/ureg.s, 100 * ureg.TFLOPs/ureg.s, 16 * ureg.GB, 25 * ureg.W, 
+                                dispatch_tax=0.2 * ureg.ms)
+    NUC_Movidius = HardwareSpec("Intel NUC + Movidius", 2020, 50 * ureg.GB/ureg.s, 4 * ureg.TFLOPs/ureg.s, 16 * ureg.GB, 15 * ureg.W, 
+                                dispatch_tax=2.0 * ureg.ms)
     
     # Servers
-    GenericServer = HardwareSpec("Edge Server", 2024, 100 * ureg.GB/ureg.s, 1 * ureg.TFLOPs/ureg.s, 128 * ureg.GB, 300 * ureg.W)
+    GenericServer = HardwareSpec("Edge Server", 2024, 100 * ureg.GB/ureg.s, 1 * ureg.TFLOPs/ureg.s, 128 * ureg.GB, 300 * ureg.W, 
+                                 dispatch_tax=0.1 * ureg.ms)
 
 class Tiny:
     """Microcontrollers and Embedded Systems."""
-    ESP32 = HardwareSpec("ESP32-CAM", 2019, 0.1 * ureg.GB/ureg.second, 0.01 * ureg.TFLOPs/ureg.second, ESP32_RAM, ESP32_POWER_MAX)
-    Generic_MCU = HardwareSpec("Cortex-M7", 2020, 0.05 * ureg.GB/ureg.second, 0.001 * ureg.TFLOPs/ureg.second, MCU_RAM_KIB)
+    # ESP32 at 240MHz is ~240 MIPS, for AI math without FPU it's roughly 100-200 MFLOPS (0.0001-0.0002 TFLOPS)
+    ESP32 = HardwareSpec("ESP32-CAM", 2019, 0.1 * ureg.GB/ureg.second, 0.0002 * ureg.TFLOPs/ureg.second, ESP32_RAM, ESP32_POWER_MAX, 
+                         dispatch_tax=5.0 * ureg.ms) # Very high overhead relative to math
+    Generic_MCU = HardwareSpec("Cortex-M7", 2020, 0.05 * ureg.GB/ureg.second, 0.001 * ureg.TFLOPs/ureg.second, MCU_RAM_KIB, 
+                               dispatch_tax=2.0 * ureg.ms)
 
 class Hardware:
     Cloud = Cloud
diff --git a/book/quarto/mlsys/validate_inline_refs.py b/book/quarto/mlsys/validate_inline_refs.py
index eaec0fdfd..9d4fdda95 100644
--- a/book/quarto/mlsys/validate_inline_refs.py
+++ b/book/quarto/mlsys/validate_inline_refs.py
@@ -5,9 +5,10 @@ Pre-render guardrail for inline Python in QMD files.
 
 Checks:
 1. Every `{python} var_name` resolves to a defined variable
-2. No inline Python inside LaTeX math mode (causes decimal stripping)
-3. No inline Python adjacent to LaTeX symbols like $\\times$
-4. No grid tables with inline Python (use pipe tables instead)
+2. Every `{python} var_name` appears AFTER its definition (Locality)
+3. No inline Python inside LaTeX math mode (causes decimal stripping)
+4. No inline Python adjacent to LaTeX symbols like $\\times$
+5. No grid tables with inline Python (use pipe tables instead)
 
 Usage:
     python3 book/quarto/mlsys/validate_inline_refs.py [--verbose] [--check-patterns]
@@ -36,8 +37,11 @@ INLINE_REF = re.compile(r'`\{python\}\s+(\w+)`')
 CELL_START = re.compile(r'^```\{python\}')
 CELL_END = re.compile(r'^```\s*$')
 
-# Pattern for variable assignments in compute cells
-ASSIGNMENT = re.compile(r'^(\w+)\s*=')
+# Pattern for variable assignments in compute cells (handles tuple unpacking)
+ASSIGNMENT = re.compile(r'^([a-zA-Z_]\w*(?:\s*,\s*[a-zA-Z_]\w*)*)\s*=')
+
+# Pattern for Exports: in header block
+EXPORTS_SECTION = re.compile(r'#\s*.\s*[Ee]xports?:\s*(.*)')
 
 # Problematic patterns that cause rendering issues
 # Pattern 1: Inline Python directly inside LaTeX math: $`{python}`$ or $..`{python}`$
@@ -60,41 +64,12 @@ INLINE_FSTRING = re.compile(r'`\{python\}\s*f"[^`]+`')
 # Pattern 5: Inline function calls (should be pre-computed as _str)
 INLINE_FUNC_CALL = re.compile(r'`\{python\}\s*\w+\([^`]+\)`')
 
-# Pattern 6: Inline Python in YAML cell options (fig-cap, tbl-cap, etc.)
-# These NEVER render - Quarto passes YAML options as literal strings
+# Pattern 6: Inline Python in YAML chunk options (fig-cap, tbl-cap, fig-alt, lst-cap)
+# These NEVER render — Quarto uses the option value as a literal string (verified by
+# rendering _test_inline_captions.qmd: body and ": Caption {#tbl-...}" run inline Python;
+# #| fig-alt and #| fig-cap do not).
 YAML_OPTION_INLINE = re.compile(r'^#\|\s*(fig-cap|tbl-cap|lst-cap|fig-alt):\s*.*`\{python\}')
 
-# Pattern 7: Inline Python in Quarto caption syntax (: Caption {#tbl-...} or {#fig-...})
-# These also NEVER render - the caption line is parsed as metadata
-CAPTION_SYNTAX_INLINE = re.compile(r'^:\s+.*`\{python\}.*\{#(tbl|fig|lst)-')
-
-
-def extract_compute_vars(lines):
-    """Extract all variable names assigned in ```{python} compute cells."""
-    variables = set()
-    in_cell = False
-    for line in lines:
-        if CELL_START.match(line):
-            in_cell = True
-            continue
-        if in_cell and CELL_END.match(line):
-            in_cell = False
-            continue
-        if in_cell:
-            m = ASSIGNMENT.match(line.strip())
-            if m:
-                variables.add(m.group(1))
-    return variables
-
-
-def extract_inline_refs(lines):
-    """Extract all inline `{python} var` references with line numbers."""
-    refs = []
-    for i, line in enumerate(lines, 1):
-        for m in INLINE_REF.finditer(line):
-            refs.append((i, m.group(1)))
-    return refs
-
 
 def check_rendering_patterns(qmd_path, verbose=False):
     """Check for patterns that cause rendering issues. Returns list of warnings."""
@@ -152,20 +127,13 @@ def check_rendering_patterns(qmd_path, verbose=False):
             if verbose:
                 print(f"  ⚠ {qmd_path.name}:{i} — Inline function call")
 
-        # Check for inline Python in YAML cell options (NEVER renders!)
+        # Check for inline Python in YAML chunk options (fig-cap, fig-alt, tbl-cap, lst-cap) — NEVER renders
         if YAML_OPTION_INLINE.search(line):
             warnings.append((filepath, i, "YAML_OPTION",
-                "Inline Python in YAML option (fig-cap/tbl-cap) - NEVER renders! Use hardcoded value or plt.suptitle()"))
+                "Inline Python in #| fig-alt/fig-cap/tbl-cap/lst-cap - NEVER renders! Use hardcoded value or set caption in code."))
             if verbose:
                 print(f"  ✗ {qmd_path.name}:{i} — Python in YAML option (will appear literally)")
 
-        # Check for inline Python in Quarto caption syntax (: Caption {#tbl-...})
-        if CAPTION_SYNTAX_INLINE.search(line):
-            warnings.append((filepath, i, "CAPTION_SYNTAX",
-                "Inline Python in caption (: ... {#tbl/fig-}) - NEVER renders! Use hardcoded value."))
-            if verbose:
-                print(f"  ✗ {qmd_path.name}:{i} — Python in caption syntax (will appear literally)")
-
     return warnings
 
 
@@ -174,17 +142,68 @@ def validate_file(qmd_path, verbose=False, check_patterns=False):
     text = qmd_path.read_text(encoding="utf-8")
     lines = text.splitlines()
 
-    inline_refs = extract_inline_refs(lines)
-    if not inline_refs:
-        return [], []  # No inline refs, nothing to validate
-
-    compute_vars = extract_compute_vars(lines)
     errors = []
-    for lineno, var in inline_refs:
-        if var not in compute_vars:
-            errors.append((str(qmd_path.relative_to(BOOK_ROOT)), lineno, var))
-            if verbose:
-                print(f"  ✗ {qmd_path.name}:{lineno} — `{{python}} {var}` not defined")
+    defined_vars = set()
+    in_cell = False
+    in_exports = False
+
+    for i, line in enumerate(lines, 1):
+        # 1. Track variable definitions in cells
+        if CELL_START.match(line):
+            in_cell = True
+            continue
+        if in_cell and CELL_END.match(line):
+            in_cell = False
+            in_exports = False
+            continue
+        
+        if in_cell:
+            # Check for assignments: var = ... or var1, var2 = ...
+            m = ASSIGNMENT.match(line.strip())
+            if m:
+                vars_part = m.group(1)
+                for v in re.split(r'[,\s]+', vars_part):
+                    if v.strip():
+                        defined_vars.add(v.strip())
+            
+            # Check for Exports: in header
+            m = EXPORTS_SECTION.match(line.strip())
+            if m:
+                in_exports = True
+                vars_raw = m.group(1)
+                # Remove unit parentheticals like (MB, GB)
+                vars_raw = re.sub(r'\(.*?\)', '', vars_raw)
+                for v in re.split(r'[,\s]+', vars_raw):
+                    v = v.strip().rstrip(',')
+                    if v:
+                        defined_vars.add(v)
+            elif in_exports:
+                # Continuation of exports
+                m = re.match(r'#\s*.\s*(.*)', line.strip())
+                if m:
+                    content = m.group(1).strip()
+                    # If content starts with a section like 'Goal:', stop
+                    if re.match(r'^[A-Z][a-z]+:', content):
+                        in_exports = False
+                    elif content == "" or "──" in content:
+                        in_exports = False
+                    else:
+                        vars_raw = re.sub(r'\(.*?\)', '', content)
+                        for v in re.split(r'[,\s]+', vars_raw):
+                            v = v.strip().rstrip(',')
+                            if v:
+                                defined_vars.add(v)
+                else:
+                    in_exports = False
+            continue # Don't check for refs inside compute cells
+
+        # 2. Check inline references for Locality
+        for m in INLINE_REF.finditer(line):
+            var = m.group(1)
+            if var not in defined_vars:
+                errors.append((str(qmd_path.relative_to(BOOK_ROOT)), i, var))
+                if verbose:
+                    print(f"  ✗ {qmd_path.name}:{i} — `{{python}} {var}` used before definition (Locality Violation)")
 
     warnings = []
     if check_patterns:
@@ -198,8 +217,17 @@ def main():
 
     verbose = "--verbose" in sys.argv or "-v" in sys.argv
     check_patterns = "--check-patterns" in sys.argv or "-p" in sys.argv
-
-    qmd_files = sorted(CONTENTS.rglob("*.qmd"))
+    
+    # Check for path argument
+    args = [a for a in sys.argv[1:] if not a.startswith("-")]
+    if args:
+        target_path = Path(args[0]).resolve()
+        if target_path.is_file():
+            qmd_files = [target_path]
+        else:
+            qmd_files = sorted(target_path.rglob("*.qmd"))
+    else:
+        qmd_files = sorted(CONTENTS.rglob("*.qmd"))
     total_files = 0
     total_refs = 0
     all_errors = []
@@ -230,7 +258,7 @@ def main():
         print(f"\n{'─'*60}")
         print("ERRORS (will break render):")
         for filepath, lineno, var in all_errors:
-            print(f"  {filepath}:{lineno} — `{{python}} {var}` undefined")
+            print(f"  {filepath}:{lineno} — `{{python}} {var}` undefined/locality violation")
         exit_code = 1
 
     if all_warnings: