refactor: inline QMD plots and slim viz helpers

Move remaining plot logic into QMD blocks and keep physx/viz styling-only. Update preview scripts to use local plot code.
2026-04-30 09:38:38 -05:00 · 2026-02-04 16:34:31 -05:00
parent ab9d9b49a5
commit 668cc25030
5 changed files with 342 additions and 1633 deletions
--- a/book/tools/scripts/preview_diagrams.py
+++ b/book/tools/scripts/preview_diagrams.py
@@ -11,16 +11,134 @@ from physx import viz
 OUTPUT_DIR = "book/quarto/assets/preview_plots"
 os.makedirs(OUTPUT_DIR, exist_ok=True)

+def plot_ml_lifecycle(ax=None):
+    """Visualizes 'ML System Lifecycle' (Circular Flow)."""
+    if ax is None:
+        fig, ax = plt.subplots(figsize=(10, 6))
+
+    ax.axis('off')
+    ax.set_xlim(0, 12)
+    ax.set_ylim(0, 8)
+
+    from matplotlib.patches import FancyBboxPatch, ConnectionPatch
+
+    # Define Nodes (Circular layout roughly)
+    nodes = {
+        'Collection': {'x': 2, 'y': 6, 'label': 'Data\nCollection', 'color': viz.COLORS['BlueL'], 'edge': viz.COLORS['BlueLine']},
+        'Prep':       {'x': 6, 'y': 6, 'label': 'Data\nPreparation', 'color': viz.COLORS['GreenL'], 'edge': viz.COLORS['GreenLine']},
+        'Train':      {'x': 10, 'y': 6, 'label': 'Model\nTraining', 'color': viz.COLORS['OrangeL'], 'edge': viz.COLORS['OrangeLine']},
+        'Eval':       {'x': 10, 'y': 2, 'label': 'Model\nEvaluation', 'color': viz.COLORS['RedL'], 'edge': viz.COLORS['RedLine']},
+        'Deploy':     {'x': 6, 'y': 2, 'label': 'Model\nDeployment', 'color': viz.COLORS['VioletL'], 'edge': viz.COLORS['VioletLine']},
+        'Monitor':    {'x': 2, 'y': 2, 'label': 'Model\nMonitoring', 'color': viz.COLORS['OrangeL'], 'edge': viz.COLORS['OrangeLine']},
+    }
+
+    # Draw Nodes
+    for _, node in nodes.items():
+        p = FancyBboxPatch((node['x']-0.9, node['y']-0.6), 1.8, 1.2, boxstyle="round,pad=0.1",
+                           fc=node['color'], ec=node['edge'], linewidth=2)
+        ax.add_patch(p)
+        ax.text(node['x'], node['y'], node['label'], ha='center', va='center', fontsize=9, fontweight='bold')
+
+    # Draw Arrows (Main Cycle)
+    arrows = [
+        ('Collection', 'Prep'), ('Prep', 'Train'), ('Train', 'Eval'),
+        ('Eval', 'Deploy'), ('Deploy', 'Monitor'), ('Monitor', 'Collection')
+    ]
+
+    for start, end in arrows:
+        con = ConnectionPatch(xyA=(nodes[start]['x'], nodes[start]['y']), xyB=(nodes[end]['x'], nodes[end]['y']),
+                              coordsA="data", coordsB="data",
+                              axesA=ax, axesB=ax,
+                              arrowstyle="-|>", connectionstyle="arc3,rad=0.0", color=viz.COLORS['primary'], lw=1.5,
+                              shrinkA=20, shrinkB=20)
+        ax.add_artist(con)
+
+    # Feedback Loops
+    con = ConnectionPatch(xyA=(nodes['Eval']['x'], nodes['Eval']['y']), xyB=(nodes['Prep']['x'], nodes['Prep']['y']),
+                          coordsA="data", coordsB="data", axesA=ax, axesB=ax,
+                          arrowstyle="-|>", connectionstyle="arc3,rad=-0.2", color=viz.COLORS['RedLine'], lw=1.5, linestyle='--',
+                          shrinkA=20, shrinkB=20)
+    ax.add_artist(con)
+    ax.text(8, 4, "Needs Improvement", ha='center', va='center', fontsize=8, color=viz.COLORS['RedLine'], rotation=-25, backgroundcolor='white')
+
+    return ax
+
+
+def plot_distributed_training(ax=None):
+    """Visualizes 'Data Parallel Training Flow'."""
+    if ax is None:
+        fig, ax = plt.subplots(figsize=(10, 6))
+
+    ax.axis('off')
+    ax.set_xlim(0, 12)
+    ax.set_ylim(0, 8)
+
+    from matplotlib.patches import FancyBboxPatch, Rectangle
+
+    # Input Data
+    p = FancyBboxPatch((4.5, 7), 3, 0.8, boxstyle="round,pad=0.1", fc=viz.COLORS['GreenL'], ec=viz.COLORS['GreenLine'], lw=2)
+    ax.add_patch(p)
+    ax.text(6, 7.4, "Input Data", ha='center', va='center', fontsize=10, fontweight='bold')
+
+    # Split Arrows
+    ax.annotate("", xy=(3, 6), xytext=(6, 7), arrowprops=dict(arrowstyle="->", color=viz.COLORS['primary'], lw=1.5))
+    ax.annotate("", xy=(9, 6), xytext=(6, 7), arrowprops=dict(arrowstyle="->", color=viz.COLORS['primary'], lw=1.5))
+
+    # GPU 1 Track
+    ax.add_patch(Rectangle((1.5, 2.5), 3, 3.5, fill=False, edgecolor=viz.COLORS['BlueLine'], linestyle='--', lw=1))
+    ax.text(3, 6.2, "GPU 1", ha='center', fontweight='bold', color=viz.COLORS['BlueLine'])
+
+    ax.text(3, 5.5, "Batch 1", ha='center', fontsize=9, bbox=dict(facecolor='white', edgecolor=viz.COLORS['primary']))
+    ax.annotate("", xy=(3, 4.5), xytext=(3, 5.2), arrowprops=dict(arrowstyle="->", color=viz.COLORS['primary']))
+    ax.text(3, 4.0, "Forward/\nBackward", ha='center', va='center', fontsize=9, bbox=dict(facecolor=viz.COLORS['BlueL'], edgecolor='none'))
+    ax.annotate("", xy=(3, 3.0), xytext=(3, 3.5), arrowprops=dict(arrowstyle="->", color=viz.COLORS['primary']))
+    ax.text(3, 2.8, "Gradients", ha='center', fontsize=9, style='italic')
+
+    # GPU 2 Track
+    ax.add_patch(Rectangle((7.5, 2.5), 3, 3.5, fill=False, edgecolor=viz.COLORS['BlueLine'], linestyle='--', lw=1))
+    ax.text(9, 6.2, "GPU 2", ha='center', fontweight='bold', color=viz.COLORS['BlueLine'])
+
+    ax.text(9, 5.5, "Batch 2", ha='center', fontsize=9, bbox=dict(facecolor='white', edgecolor=viz.COLORS['primary']))
+    ax.annotate("", xy=(9, 4.5), xytext=(9, 5.2), arrowprops=dict(arrowstyle="->", color=viz.COLORS['primary']))
+    ax.text(9, 4.0, "Forward/\nBackward", ha='center', va='center', fontsize=9, bbox=dict(facecolor=viz.COLORS['BlueL'], edgecolor='none'))
+    ax.annotate("", xy=(9, 3.0), xytext=(9, 3.5), arrowprops=dict(arrowstyle="->", color=viz.COLORS['primary']))
+    ax.text(9, 2.8, "Gradients", ha='center', fontsize=9, style='italic')
+
+    # Synchronization
+    p = FancyBboxPatch((4, 1), 4, 1, boxstyle="round,pad=0.1", fc=viz.COLORS['VioletL'], ec=viz.COLORS['VioletLine'], lw=2)
+    ax.add_patch(p)
+    ax.text(6, 1.5, "Gradient Aggregation\n(AllReduce)", ha='center', va='center', fontsize=10, fontweight='bold')
+
+    # Arrows to Sync
+    ax.annotate("", xy=(5, 2), xytext=(3, 2.5), arrowprops=dict(arrowstyle="->", color=viz.COLORS['primary'], lw=1.5))
+    ax.annotate("", xy=(7, 2), xytext=(9, 2.5), arrowprops=dict(arrowstyle="->", color=viz.COLORS['primary'], lw=1.5))
+
+    # Update Arrow
+    ax.annotate(
+        "Model Update",
+        xy=(6, 4.0),
+        xytext=(6, 2.0),
+        arrowprops=dict(arrowstyle="->", color=viz.COLORS['RedLine'], lw=2, linestyle='dashed'),
+        ha='center',
+        va='center',
+        fontsize=9,
+        color=viz.COLORS['RedLine'],
+        backgroundcolor='white',
+    )
+
+    return ax
+
+
 # Set style
 viz.set_book_style()

 print("Generating ML Lifecycle...")
-viz.plot_ml_lifecycle()
+plot_ml_lifecycle()
 plt.savefig(f"{OUTPUT_DIR}/ml_lifecycle.png")
 plt.close('all')

 print("Generating Distributed Training...")
-viz.plot_distributed_training()
+plot_distributed_training()
 plt.savefig(f"{OUTPUT_DIR}/distributed_training.png")
 plt.close('all')

--- a/book/tools/scripts/preview_systems_gap.py
+++ b/book/tools/scripts/preview_systems_gap.py
@@ -1,6 +1,7 @@
 import sys
 import os
 import matplotlib.pyplot as plt
+import numpy as np

 # Add physx directory to path
 project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -11,11 +12,120 @@ from physx import viz
 OUTPUT_DIR = "book/quarto/assets/preview_plots"
 os.makedirs(OUTPUT_DIR, exist_ok=True)

+def plot_systems_gap(ax=None):
+    if ax is None:
+        fig, ax = plt.subplots()
+
+    years = np.linspace(2012, 2024.5, 100)
+
+    # 1. Moore's Law (CPU Baseline)
+    # 2012 (Xeon E5-2690): ~0.37 TF -> 2022 (Xeon 8480+): ~7 TF. Growth ~19x in 10y.
+    cpu_slope = np.log10(19) / 10
+    moore = 1.0 * 10 ** (cpu_slope * (years - 2012))
+
+    # 2. Huang's Law (GPU Peak)
+    # 2012 (K20X): 3.95 TF -> 2022 (H100): 989 TF. Growth ~250x in 10y.
+    gpu_slope = np.log10(250) / 10
+    huang = 1.0 * 10 ** (gpu_slope * (years - 2012))
+
+    # 3. Model Demand
+    # 2012 (AlexNet): 4.3e16 -> 2023 (GPT-4): 2e25. Growth ~4.6e8x in 11y.
+    demand_slope = np.log10(4.6e8) / 11
+    demand = 1.0 * 10 ** (demand_slope * (years - 2012))
+
+    ax.plot(years, moore, ':', color=viz.COLORS['grid'], label="CPU Performance Trend", linewidth=2)
+    ax.plot(years, huang, '--', color=viz.COLORS['BlueLine'], label="GPU Peak (Huang's Law)", linewidth=2.5)
+    ax.plot(years, demand, '-', color=viz.COLORS['RedLine'], label="Model Demand (Scaling Laws)", linewidth=3)
+
+    ax.fill_between(years, huang, demand, where=(demand > huang), color=viz.COLORS['VioletL'], alpha=0.3)
+
+    ax.set_yscale('log')
+    ax.set_xlabel('Year')
+    ax.set_ylabel('Relative Growth (2012 = 1.0)')
+    ax.set_xlim(2012, 2024.5)
+    ax.set_ylim(0.5, 1e10)
+
+    gap_x = 2020.0
+    h_val = 10 ** (gpu_slope * (gap_x - 2012))
+    d_val = 10 ** (demand_slope * (gap_x - 2012))
+    gap_y = np.sqrt(h_val * d_val)
+
+    ax.text(
+        gap_x,
+        gap_y,
+        "THE SYSTEMS GAP\n(Closed by Parallelism,\nArchitecture & Co-design)",
+        ha='center',
+        va='center',
+        fontweight='bold',
+        color=viz.COLORS['VioletLine'],
+        fontsize=8,
+        bbox=dict(facecolor='white', alpha=0.7, edgecolor='none', pad=2),
+    )
+
+    points = [
+        (2012, 1.0, "AlexNet"),
+        (2015, 10 ** (demand_slope * 3), "ResNet"),
+        (2017, 10 ** (demand_slope * 5), "Transformer"),
+        (2020, 10 ** (demand_slope * 8), "GPT-3"),
+        (2023, 10 ** (demand_slope * 11), "GPT-4"),
+    ]
+
+    model_offsets = {
+        "AlexNet": (0, 10),
+        "Transformer": (-15, 10),
+        "GPT-3": (-15, 8),
+        "GPT-4": (0, 8),
+    }
+
+    for y, v, l in points:
+        ax.scatter(y, v, color=viz.COLORS['RedLine'], s=25, zorder=5, edgecolors='white')
+        xytext = model_offsets.get(l, (0, 8))
+        ax.annotate(
+            l,
+            (y, v),
+            xytext=xytext,
+            textcoords='offset points',
+            fontsize=8,
+            ha='center',
+            color=viz.COLORS['RedLine'],
+            fontweight='bold',
+        )
+
+    hw_points = [
+        (2012, 1.0, "K20X"),
+        (2016, 10 ** (gpu_slope * 4), "P100"),
+        (2022, 10 ** (gpu_slope * 10), "H100"),
+    ]
+
+    hw_offsets = {
+        "K20X": (0, -15),
+        "P100": (0, -15),
+        "H100": (0, -15),
+    }
+
+    for y, v, l in hw_points:
+        ax.scatter(y, v, color=viz.COLORS['BlueLine'], s=25, zorder=5, edgecolors='white')
+        xytext = hw_offsets.get(l, (0, -12))
+        ax.annotate(
+            l,
+            (y, v),
+            xytext=xytext,
+            textcoords='offset points',
+            fontsize=8,
+            ha='center',
+            color=viz.COLORS['BlueLine'],
+            fontweight='bold',
+        )
+
+    ax.legend(loc='lower right', fontsize=8)
+    return ax
+
+
 # Set style
 viz.set_book_style()

 print("Generating Systems Gap...")
-viz.plot_systems_gap()
+plot_systems_gap()
 plt.savefig(f"{OUTPUT_DIR}/systems_gap.png")
 plt.close('all')