#!/usr/bin/env python3
"""Pipeline-parallelism bubble timeline (1F1B variant, 4 stages × 4 micro-batches).

Visualizes the warm-up + steady-state + cool-down regions of a 1F1B
schedule on 4 GPUs. Bubble fraction = (P-1) / (P-1+M) where P is the
number of pipeline stages and M is the number of micro-batches.

Renders to $VISUAL_OUT_PATH.
"""

import os

import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

OUT = os.environ.get("VISUAL_OUT_PATH", "cloud-2848.svg")

P = 4   # stages (GPUs)
M = 4   # micro-batches

# Each cell = 1 unit of time. Forward and backward each take 1 unit per stage.
# 1F1B: warm-up sends P-1 forwards, then alternates F/B in steady state,
# cool-down has P-1 backwards.

# We'll lay out a Gantt: y-axis = stage (top = stage 0), x-axis = time.
fig, ax = plt.subplots(figsize=(7.5, 3.5))

# Colors
COL_FW = "#cfe2f3"   # forward — compute blue
COL_BW = "#d4edda"   # backward — data-flow green
COL_BUBBLE = "#f9d6d5"  # bubble — error red

# Schedule generator (1F1B-naive, just for visualization)
# For visualization clarity, we use a simplified GPipe-like schedule:
# all forwards first, then all backwards. Bubble = (P-1) at start + (P-1) at end.
# This makes the bubble visually obvious; the question prose will then
# show how 1F1B reduces this.

# Forward pass: stage s starts at time s, processes M microbatches consecutively
for s in range(P):
    for m in range(M):
        t_start = s + m
        ax.add_patch(mpatches.Rectangle(
            (t_start, P - 1 - s), 1, 0.8,
            facecolor=COL_FW, edgecolor="#4a90c4", lw=0.8,
        ))
        ax.text(t_start + 0.5, P - 1 - s + 0.4, f"F{m}",
                ha="center", va="center", fontsize=8)

# Backward pass: stage s starts at time (P-1) + (M-1) + (P-1-s) + 1 = ...
# Simpler: backward from stage P-1 down to stage 0
fw_end = (P - 1) + M  # time when last forward finishes on stage P-1
for s in reversed(range(P)):
    rev_offset = (P - 1) - s
    for m in range(M):
        t_start = fw_end + rev_offset + m
        ax.add_patch(mpatches.Rectangle(
            (t_start, P - 1 - s), 1, 0.8,
            facecolor=COL_BW, edgecolor="#3d9e5a", lw=0.8,
        ))
        ax.text(t_start + 0.5, P - 1 - s + 0.4, f"B{m}",
                ha="center", va="center", fontsize=8)

# Bubble shading: idle time on each stage at start (warm-up)
for s in range(P):
    if s > 0:
        ax.add_patch(mpatches.Rectangle(
            (0, P - 1 - s), s, 0.8,
            facecolor=COL_BUBBLE, alpha=0.4, edgecolor="none",
        ))

# Bubble shading: idle time on each stage at end (cool-down)
total_time = fw_end + (P - 1) + M
for s in range(P):
    cool_start = fw_end + ((P - 1) - s) + M  # when this stage finishes backward
    cool_dur = total_time - cool_start
    if cool_dur > 0:
        ax.add_patch(mpatches.Rectangle(
            (cool_start, P - 1 - s), cool_dur, 0.8,
            facecolor=COL_BUBBLE, alpha=0.4, edgecolor="none",
        ))

# Annotate bubble fraction
bubble_units = P * (P - 1)  # warm-up + cool-down per stage
total_units = P * total_time
bubble_frac = bubble_units / total_units
ax.text(
    total_time / 2, P + 0.4,
    f"Bubble fraction = (P-1)/(P-1+M) = {(P-1)/(P-1+M):.2f}  "
    f"(P={P} stages, M={M} micro-batches)",
    ha="center", fontsize=9, color="#555",
)

ax.set_xlim(0, total_time + 0.5)
ax.set_ylim(-0.4, P + 1.0)
ax.set_yticks([P - 1 - s + 0.4 for s in range(P)])
ax.set_yticklabels([f"GPU {s}\n(stage {s})" for s in range(P)], fontsize=9)
ax.set_xlabel("Time (units)", fontsize=10)
ax.set_title("Pipeline parallelism: bubble structure (GPipe-style schedule)",
             fontsize=11, loc="left")

# Legend
legend_handles = [
    mpatches.Patch(facecolor=COL_FW, edgecolor="#4a90c4", label="Forward"),
    mpatches.Patch(facecolor=COL_BW, edgecolor="#3d9e5a", label="Backward"),
    mpatches.Patch(facecolor=COL_BUBBLE, alpha=0.4, label="Bubble (idle)"),
]
ax.legend(handles=legend_handles, loc="lower right", fontsize=9, frameon=False)

# Clean spines
for spine in ("top", "right", "left"):
    ax.spines[spine].set_visible(False)
ax.tick_params(axis="y", length=0)

fig.tight_layout()
fig.savefig(OUT, format="svg", bbox_inches="tight")