mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-05-08 02:28:25 -05:00
ARCHITECTURE.md establishes that visuals are a property of any question, not a separate category. Three supported formats let the layout engine do the work: DOT for graph topology, matplotlib for curves and Gantt charts, hand SVG for custom layouts. render_visuals.py is the single entry point that dispatches by visual.kind, runs the appropriate tool, and normalizes the rendered SVG to the book's font stack. It is idempotent and supports --dry-run. Three exemplars cover the three formats: - cloud-2846 (DOT): Tree AllReduce on 8 ranks — auto-laid-out topology - cloud-2847 (matplotlib): Queueing hockey-stick curve with SLO line - cloud-2848 (matplotlib): Pipeline-bubble Gantt for GPipe schedule All three are status:draft pending math review and promotion in a later batch. Existing cloud-visual-001 remains unchanged as the canonical hand-SVG exemplar.
117 lines
4.1 KiB
Python
117 lines
4.1 KiB
Python
#!/usr/bin/env python3
|
||
"""Pipeline-parallelism bubble timeline (1F1B variant, 4 stages × 4 micro-batches).
|
||
|
||
Visualizes the warm-up + steady-state + cool-down regions of a 1F1B
|
||
schedule on 4 GPUs. Bubble fraction = (P-1) / (P-1+M) where P is the
|
||
number of pipeline stages and M is the number of micro-batches.
|
||
|
||
Renders to $VISUAL_OUT_PATH.
|
||
"""
|
||
|
||
import os
|
||
|
||
import matplotlib.pyplot as plt
|
||
import matplotlib.patches as mpatches
|
||
|
||
OUT = os.environ.get("VISUAL_OUT_PATH", "cloud-2848.svg")
|
||
|
||
P = 4 # stages (GPUs)
|
||
M = 4 # micro-batches
|
||
|
||
# Each cell = 1 unit of time. Forward and backward each take 1 unit per stage.
|
||
# 1F1B: warm-up sends P-1 forwards, then alternates F/B in steady state,
|
||
# cool-down has P-1 backwards.
|
||
|
||
# We'll lay out a Gantt: y-axis = stage (top = stage 0), x-axis = time.
|
||
fig, ax = plt.subplots(figsize=(7.5, 3.5))
|
||
|
||
# Colors
|
||
COL_FW = "#cfe2f3" # forward — compute blue
|
||
COL_BW = "#d4edda" # backward — data-flow green
|
||
COL_BUBBLE = "#f9d6d5" # bubble — error red
|
||
|
||
# Schedule generator (1F1B-naive, just for visualization)
|
||
# For visualization clarity, we use a simplified GPipe-like schedule:
|
||
# all forwards first, then all backwards. Bubble = (P-1) at start + (P-1) at end.
|
||
# This makes the bubble visually obvious; the question prose will then
|
||
# show how 1F1B reduces this.
|
||
|
||
# Forward pass: stage s starts at time s, processes M microbatches consecutively
|
||
for s in range(P):
|
||
for m in range(M):
|
||
t_start = s + m
|
||
ax.add_patch(mpatches.Rectangle(
|
||
(t_start, P - 1 - s), 1, 0.8,
|
||
facecolor=COL_FW, edgecolor="#4a90c4", lw=0.8,
|
||
))
|
||
ax.text(t_start + 0.5, P - 1 - s + 0.4, f"F{m}",
|
||
ha="center", va="center", fontsize=8)
|
||
|
||
# Backward pass: stage s starts at time (P-1) + (M-1) + (P-1-s) + 1 = ...
|
||
# Simpler: backward from stage P-1 down to stage 0
|
||
fw_end = (P - 1) + M # time when last forward finishes on stage P-1
|
||
for s in reversed(range(P)):
|
||
rev_offset = (P - 1) - s
|
||
for m in range(M):
|
||
t_start = fw_end + rev_offset + m
|
||
ax.add_patch(mpatches.Rectangle(
|
||
(t_start, P - 1 - s), 1, 0.8,
|
||
facecolor=COL_BW, edgecolor="#3d9e5a", lw=0.8,
|
||
))
|
||
ax.text(t_start + 0.5, P - 1 - s + 0.4, f"B{m}",
|
||
ha="center", va="center", fontsize=8)
|
||
|
||
# Bubble shading: idle time on each stage at start (warm-up)
|
||
for s in range(P):
|
||
if s > 0:
|
||
ax.add_patch(mpatches.Rectangle(
|
||
(0, P - 1 - s), s, 0.8,
|
||
facecolor=COL_BUBBLE, alpha=0.4, edgecolor="none",
|
||
))
|
||
|
||
# Bubble shading: idle time on each stage at end (cool-down)
|
||
total_time = fw_end + (P - 1) + M
|
||
for s in range(P):
|
||
cool_start = fw_end + ((P - 1) - s) + M # when this stage finishes backward
|
||
cool_dur = total_time - cool_start
|
||
if cool_dur > 0:
|
||
ax.add_patch(mpatches.Rectangle(
|
||
(cool_start, P - 1 - s), cool_dur, 0.8,
|
||
facecolor=COL_BUBBLE, alpha=0.4, edgecolor="none",
|
||
))
|
||
|
||
# Annotate bubble fraction
|
||
bubble_units = P * (P - 1) # warm-up + cool-down per stage
|
||
total_units = P * total_time
|
||
bubble_frac = bubble_units / total_units
|
||
ax.text(
|
||
total_time / 2, P + 0.4,
|
||
f"Bubble fraction = (P-1)/(P-1+M) = {(P-1)/(P-1+M):.2f} "
|
||
f"(P={P} stages, M={M} micro-batches)",
|
||
ha="center", fontsize=9, color="#555",
|
||
)
|
||
|
||
ax.set_xlim(0, total_time + 0.5)
|
||
ax.set_ylim(-0.4, P + 1.0)
|
||
ax.set_yticks([P - 1 - s + 0.4 for s in range(P)])
|
||
ax.set_yticklabels([f"GPU {s}\n(stage {s})" for s in range(P)], fontsize=9)
|
||
ax.set_xlabel("Time (units)", fontsize=10)
|
||
ax.set_title("Pipeline parallelism: bubble structure (GPipe-style schedule)",
|
||
fontsize=11, loc="left")
|
||
|
||
# Legend
|
||
legend_handles = [
|
||
mpatches.Patch(facecolor=COL_FW, edgecolor="#4a90c4", label="Forward"),
|
||
mpatches.Patch(facecolor=COL_BW, edgecolor="#3d9e5a", label="Backward"),
|
||
mpatches.Patch(facecolor=COL_BUBBLE, alpha=0.4, label="Bubble (idle)"),
|
||
]
|
||
ax.legend(handles=legend_handles, loc="lower right", fontsize=9, frameon=False)
|
||
|
||
# Clean spines
|
||
for spine in ("top", "right", "left"):
|
||
ax.spines[spine].set_visible(False)
|
||
ax.tick_params(axis="y", length=0)
|
||
|
||
fig.tight_layout()
|
||
fig.savefig(OUT, format="svg", bbox_inches="tight")
|