mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-04-30 09:38:38 -05:00
refactor: inline QMD plots and slim viz helpers
Move remaining plot logic into QMD blocks and keep physx/viz styling-only. Update preview scripts to use local plot code.
This commit is contained in:
@@ -11,16 +11,134 @@ from physx import viz
|
||||
OUTPUT_DIR = "book/quarto/assets/preview_plots"
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
|
||||
def plot_ml_lifecycle(ax=None):
|
||||
"""Visualizes 'ML System Lifecycle' (Circular Flow)."""
|
||||
if ax is None:
|
||||
fig, ax = plt.subplots(figsize=(10, 6))
|
||||
|
||||
ax.axis('off')
|
||||
ax.set_xlim(0, 12)
|
||||
ax.set_ylim(0, 8)
|
||||
|
||||
from matplotlib.patches import FancyBboxPatch, ConnectionPatch
|
||||
|
||||
# Define Nodes (Circular layout roughly)
|
||||
nodes = {
|
||||
'Collection': {'x': 2, 'y': 6, 'label': 'Data\nCollection', 'color': viz.COLORS['BlueL'], 'edge': viz.COLORS['BlueLine']},
|
||||
'Prep': {'x': 6, 'y': 6, 'label': 'Data\nPreparation', 'color': viz.COLORS['GreenL'], 'edge': viz.COLORS['GreenLine']},
|
||||
'Train': {'x': 10, 'y': 6, 'label': 'Model\nTraining', 'color': viz.COLORS['OrangeL'], 'edge': viz.COLORS['OrangeLine']},
|
||||
'Eval': {'x': 10, 'y': 2, 'label': 'Model\nEvaluation', 'color': viz.COLORS['RedL'], 'edge': viz.COLORS['RedLine']},
|
||||
'Deploy': {'x': 6, 'y': 2, 'label': 'Model\nDeployment', 'color': viz.COLORS['VioletL'], 'edge': viz.COLORS['VioletLine']},
|
||||
'Monitor': {'x': 2, 'y': 2, 'label': 'Model\nMonitoring', 'color': viz.COLORS['OrangeL'], 'edge': viz.COLORS['OrangeLine']},
|
||||
}
|
||||
|
||||
# Draw Nodes
|
||||
for _, node in nodes.items():
|
||||
p = FancyBboxPatch((node['x']-0.9, node['y']-0.6), 1.8, 1.2, boxstyle="round,pad=0.1",
|
||||
fc=node['color'], ec=node['edge'], linewidth=2)
|
||||
ax.add_patch(p)
|
||||
ax.text(node['x'], node['y'], node['label'], ha='center', va='center', fontsize=9, fontweight='bold')
|
||||
|
||||
# Draw Arrows (Main Cycle)
|
||||
arrows = [
|
||||
('Collection', 'Prep'), ('Prep', 'Train'), ('Train', 'Eval'),
|
||||
('Eval', 'Deploy'), ('Deploy', 'Monitor'), ('Monitor', 'Collection')
|
||||
]
|
||||
|
||||
for start, end in arrows:
|
||||
con = ConnectionPatch(xyA=(nodes[start]['x'], nodes[start]['y']), xyB=(nodes[end]['x'], nodes[end]['y']),
|
||||
coordsA="data", coordsB="data",
|
||||
axesA=ax, axesB=ax,
|
||||
arrowstyle="-|>", connectionstyle="arc3,rad=0.0", color=viz.COLORS['primary'], lw=1.5,
|
||||
shrinkA=20, shrinkB=20)
|
||||
ax.add_artist(con)
|
||||
|
||||
# Feedback Loops
|
||||
con = ConnectionPatch(xyA=(nodes['Eval']['x'], nodes['Eval']['y']), xyB=(nodes['Prep']['x'], nodes['Prep']['y']),
|
||||
coordsA="data", coordsB="data", axesA=ax, axesB=ax,
|
||||
arrowstyle="-|>", connectionstyle="arc3,rad=-0.2", color=viz.COLORS['RedLine'], lw=1.5, linestyle='--',
|
||||
shrinkA=20, shrinkB=20)
|
||||
ax.add_artist(con)
|
||||
ax.text(8, 4, "Needs Improvement", ha='center', va='center', fontsize=8, color=viz.COLORS['RedLine'], rotation=-25, backgroundcolor='white')
|
||||
|
||||
return ax
|
||||
|
||||
|
||||
def plot_distributed_training(ax=None):
|
||||
"""Visualizes 'Data Parallel Training Flow'."""
|
||||
if ax is None:
|
||||
fig, ax = plt.subplots(figsize=(10, 6))
|
||||
|
||||
ax.axis('off')
|
||||
ax.set_xlim(0, 12)
|
||||
ax.set_ylim(0, 8)
|
||||
|
||||
from matplotlib.patches import FancyBboxPatch, Rectangle
|
||||
|
||||
# Input Data
|
||||
p = FancyBboxPatch((4.5, 7), 3, 0.8, boxstyle="round,pad=0.1", fc=viz.COLORS['GreenL'], ec=viz.COLORS['GreenLine'], lw=2)
|
||||
ax.add_patch(p)
|
||||
ax.text(6, 7.4, "Input Data", ha='center', va='center', fontsize=10, fontweight='bold')
|
||||
|
||||
# Split Arrows
|
||||
ax.annotate("", xy=(3, 6), xytext=(6, 7), arrowprops=dict(arrowstyle="->", color=viz.COLORS['primary'], lw=1.5))
|
||||
ax.annotate("", xy=(9, 6), xytext=(6, 7), arrowprops=dict(arrowstyle="->", color=viz.COLORS['primary'], lw=1.5))
|
||||
|
||||
# GPU 1 Track
|
||||
ax.add_patch(Rectangle((1.5, 2.5), 3, 3.5, fill=False, edgecolor=viz.COLORS['BlueLine'], linestyle='--', lw=1))
|
||||
ax.text(3, 6.2, "GPU 1", ha='center', fontweight='bold', color=viz.COLORS['BlueLine'])
|
||||
|
||||
ax.text(3, 5.5, "Batch 1", ha='center', fontsize=9, bbox=dict(facecolor='white', edgecolor=viz.COLORS['primary']))
|
||||
ax.annotate("", xy=(3, 4.5), xytext=(3, 5.2), arrowprops=dict(arrowstyle="->", color=viz.COLORS['primary']))
|
||||
ax.text(3, 4.0, "Forward/\nBackward", ha='center', va='center', fontsize=9, bbox=dict(facecolor=viz.COLORS['BlueL'], edgecolor='none'))
|
||||
ax.annotate("", xy=(3, 3.0), xytext=(3, 3.5), arrowprops=dict(arrowstyle="->", color=viz.COLORS['primary']))
|
||||
ax.text(3, 2.8, "Gradients", ha='center', fontsize=9, style='italic')
|
||||
|
||||
# GPU 2 Track
|
||||
ax.add_patch(Rectangle((7.5, 2.5), 3, 3.5, fill=False, edgecolor=viz.COLORS['BlueLine'], linestyle='--', lw=1))
|
||||
ax.text(9, 6.2, "GPU 2", ha='center', fontweight='bold', color=viz.COLORS['BlueLine'])
|
||||
|
||||
ax.text(9, 5.5, "Batch 2", ha='center', fontsize=9, bbox=dict(facecolor='white', edgecolor=viz.COLORS['primary']))
|
||||
ax.annotate("", xy=(9, 4.5), xytext=(9, 5.2), arrowprops=dict(arrowstyle="->", color=viz.COLORS['primary']))
|
||||
ax.text(9, 4.0, "Forward/\nBackward", ha='center', va='center', fontsize=9, bbox=dict(facecolor=viz.COLORS['BlueL'], edgecolor='none'))
|
||||
ax.annotate("", xy=(9, 3.0), xytext=(9, 3.5), arrowprops=dict(arrowstyle="->", color=viz.COLORS['primary']))
|
||||
ax.text(9, 2.8, "Gradients", ha='center', fontsize=9, style='italic')
|
||||
|
||||
# Synchronization
|
||||
p = FancyBboxPatch((4, 1), 4, 1, boxstyle="round,pad=0.1", fc=viz.COLORS['VioletL'], ec=viz.COLORS['VioletLine'], lw=2)
|
||||
ax.add_patch(p)
|
||||
ax.text(6, 1.5, "Gradient Aggregation\n(AllReduce)", ha='center', va='center', fontsize=10, fontweight='bold')
|
||||
|
||||
# Arrows to Sync
|
||||
ax.annotate("", xy=(5, 2), xytext=(3, 2.5), arrowprops=dict(arrowstyle="->", color=viz.COLORS['primary'], lw=1.5))
|
||||
ax.annotate("", xy=(7, 2), xytext=(9, 2.5), arrowprops=dict(arrowstyle="->", color=viz.COLORS['primary'], lw=1.5))
|
||||
|
||||
# Update Arrow
|
||||
ax.annotate(
|
||||
"Model Update",
|
||||
xy=(6, 4.0),
|
||||
xytext=(6, 2.0),
|
||||
arrowprops=dict(arrowstyle="->", color=viz.COLORS['RedLine'], lw=2, linestyle='dashed'),
|
||||
ha='center',
|
||||
va='center',
|
||||
fontsize=9,
|
||||
color=viz.COLORS['RedLine'],
|
||||
backgroundcolor='white',
|
||||
)
|
||||
|
||||
return ax
|
||||
|
||||
|
||||
# Set style
|
||||
viz.set_book_style()
|
||||
|
||||
print("Generating ML Lifecycle...")
|
||||
viz.plot_ml_lifecycle()
|
||||
plot_ml_lifecycle()
|
||||
plt.savefig(f"{OUTPUT_DIR}/ml_lifecycle.png")
|
||||
plt.close('all')
|
||||
|
||||
print("Generating Distributed Training...")
|
||||
viz.plot_distributed_training()
|
||||
plot_distributed_training()
|
||||
plt.savefig(f"{OUTPUT_DIR}/distributed_training.png")
|
||||
plt.close('all')
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import sys
|
||||
import os
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
# Add physx directory to path
|
||||
project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
@@ -11,11 +12,120 @@ from physx import viz
|
||||
OUTPUT_DIR = "book/quarto/assets/preview_plots"
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
|
||||
def plot_systems_gap(ax=None):
|
||||
if ax is None:
|
||||
fig, ax = plt.subplots()
|
||||
|
||||
years = np.linspace(2012, 2024.5, 100)
|
||||
|
||||
# 1. Moore's Law (CPU Baseline)
|
||||
# 2012 (Xeon E5-2690): ~0.37 TF -> 2022 (Xeon 8480+): ~7 TF. Growth ~19x in 10y.
|
||||
cpu_slope = np.log10(19) / 10
|
||||
moore = 1.0 * 10 ** (cpu_slope * (years - 2012))
|
||||
|
||||
# 2. Huang's Law (GPU Peak)
|
||||
# 2012 (K20X): 3.95 TF -> 2022 (H100): 989 TF. Growth ~250x in 10y.
|
||||
gpu_slope = np.log10(250) / 10
|
||||
huang = 1.0 * 10 ** (gpu_slope * (years - 2012))
|
||||
|
||||
# 3. Model Demand
|
||||
# 2012 (AlexNet): 4.3e16 -> 2023 (GPT-4): 2e25. Growth ~4.6e8x in 11y.
|
||||
demand_slope = np.log10(4.6e8) / 11
|
||||
demand = 1.0 * 10 ** (demand_slope * (years - 2012))
|
||||
|
||||
ax.plot(years, moore, ':', color=viz.COLORS['grid'], label="CPU Performance Trend", linewidth=2)
|
||||
ax.plot(years, huang, '--', color=viz.COLORS['BlueLine'], label="GPU Peak (Huang's Law)", linewidth=2.5)
|
||||
ax.plot(years, demand, '-', color=viz.COLORS['RedLine'], label="Model Demand (Scaling Laws)", linewidth=3)
|
||||
|
||||
ax.fill_between(years, huang, demand, where=(demand > huang), color=viz.COLORS['VioletL'], alpha=0.3)
|
||||
|
||||
ax.set_yscale('log')
|
||||
ax.set_xlabel('Year')
|
||||
ax.set_ylabel('Relative Growth (2012 = 1.0)')
|
||||
ax.set_xlim(2012, 2024.5)
|
||||
ax.set_ylim(0.5, 1e10)
|
||||
|
||||
gap_x = 2020.0
|
||||
h_val = 10 ** (gpu_slope * (gap_x - 2012))
|
||||
d_val = 10 ** (demand_slope * (gap_x - 2012))
|
||||
gap_y = np.sqrt(h_val * d_val)
|
||||
|
||||
ax.text(
|
||||
gap_x,
|
||||
gap_y,
|
||||
"THE SYSTEMS GAP\n(Closed by Parallelism,\nArchitecture & Co-design)",
|
||||
ha='center',
|
||||
va='center',
|
||||
fontweight='bold',
|
||||
color=viz.COLORS['VioletLine'],
|
||||
fontsize=8,
|
||||
bbox=dict(facecolor='white', alpha=0.7, edgecolor='none', pad=2),
|
||||
)
|
||||
|
||||
points = [
|
||||
(2012, 1.0, "AlexNet"),
|
||||
(2015, 10 ** (demand_slope * 3), "ResNet"),
|
||||
(2017, 10 ** (demand_slope * 5), "Transformer"),
|
||||
(2020, 10 ** (demand_slope * 8), "GPT-3"),
|
||||
(2023, 10 ** (demand_slope * 11), "GPT-4"),
|
||||
]
|
||||
|
||||
model_offsets = {
|
||||
"AlexNet": (0, 10),
|
||||
"Transformer": (-15, 10),
|
||||
"GPT-3": (-15, 8),
|
||||
"GPT-4": (0, 8),
|
||||
}
|
||||
|
||||
for y, v, l in points:
|
||||
ax.scatter(y, v, color=viz.COLORS['RedLine'], s=25, zorder=5, edgecolors='white')
|
||||
xytext = model_offsets.get(l, (0, 8))
|
||||
ax.annotate(
|
||||
l,
|
||||
(y, v),
|
||||
xytext=xytext,
|
||||
textcoords='offset points',
|
||||
fontsize=8,
|
||||
ha='center',
|
||||
color=viz.COLORS['RedLine'],
|
||||
fontweight='bold',
|
||||
)
|
||||
|
||||
hw_points = [
|
||||
(2012, 1.0, "K20X"),
|
||||
(2016, 10 ** (gpu_slope * 4), "P100"),
|
||||
(2022, 10 ** (gpu_slope * 10), "H100"),
|
||||
]
|
||||
|
||||
hw_offsets = {
|
||||
"K20X": (0, -15),
|
||||
"P100": (0, -15),
|
||||
"H100": (0, -15),
|
||||
}
|
||||
|
||||
for y, v, l in hw_points:
|
||||
ax.scatter(y, v, color=viz.COLORS['BlueLine'], s=25, zorder=5, edgecolors='white')
|
||||
xytext = hw_offsets.get(l, (0, -12))
|
||||
ax.annotate(
|
||||
l,
|
||||
(y, v),
|
||||
xytext=xytext,
|
||||
textcoords='offset points',
|
||||
fontsize=8,
|
||||
ha='center',
|
||||
color=viz.COLORS['BlueLine'],
|
||||
fontweight='bold',
|
||||
)
|
||||
|
||||
ax.legend(loc='lower right', fontsize=8)
|
||||
return ax
|
||||
|
||||
|
||||
# Set style
|
||||
viz.set_book_style()
|
||||
|
||||
print("Generating Systems Gap...")
|
||||
viz.plot_systems_gap()
|
||||
plot_systems_gap()
|
||||
plt.savefig(f"{OUTPUT_DIR}/systems_gap.png")
|
||||
plt.close('all')
|
||||
|
||||
|
||||
Reference in New Issue
Block a user