Files
cs249r_book/mlperf-edu/reference/cloud/micro_diffusion.py
Vijay Janapa Reddi a9878ad6bd feat: import mlperf-edu pedagogical benchmark suite
Snapshot of the standalone /Users/VJ/GitHub/mlperf-edu/ repo as of
2026-04-16, brought into MLSysBook as a parked feature branch for
backup and iteration. Not for merge to dev.

Contents (88 files, ~2.3 MB):
- 16 reference workloads (cloud / edge / tiny / agent divisions)
- LoadGen proxy harness + SUT plugin protocol
- Compliance checker, autograder, hardware fingerprint
- Paper draft (paper.tex) with TikZ/SVG figure sources
- Three lab examples + practitioner workflow configs
- Workload + dataset YAML registries (single source of truth)

Excluded (per mlperf-edu/.gitignore + size constraints):
- Datasets (6.6 GB), checkpoints (260 MB), gpt2 weights (523 MB)
- Generated PDFs, .venv, build artifacts
2026-04-16 14:15:05 -04:00

126 lines
3.9 KiB
Python

"""
MLPerf EDU: Micro-Diffusion U-Net (Cloud Division)
A simplified U-Net denoising autoencoder for image generation,
mapping the MLPerf Training Stable Diffusion benchmark to laptop scale.
Architecture:
Input image → Encoder (conv → downsample) → Bottleneck
→ Decoder (upsample + skip connections) → Reconstructed image
For training, the model learns to reconstruct clean images from
noisy inputs (denoising autoencoder objective). The time embedding
is a placeholder for the diffusion timestep conditioning that would
be used in a full DDPM pipeline.
Quality Target: MSE < 0.001 on CIFAR-10 reconstruction
Provenance: Ho et al. 2020, "Denoising Diffusion Probabilistic Models"
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
class DownConv(nn.Module):
"""Encoder block: downsample 2x then double conv."""
def __init__(self, in_channels, out_channels):
super().__init__()
self.conv = nn.Sequential(
nn.MaxPool2d(2),
nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
)
def forward(self, x):
return self.conv(x)
class UpConv(nn.Module):
"""Decoder block: upsample 2x + skip connection then double conv."""
def __init__(self, in_channels, out_channels):
super().__init__()
self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
self.conv = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
)
def forward(self, x1, x2):
x1 = self.up(x1)
# Pad to match skip connection dimensions
diffY = x2.size()[2] - x1.size()[2]
diffX = x2.size()[3] - x1.size()[3]
x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2])
x = torch.cat([x2, x1], dim=1)
return self.conv(x)
class MicroDiffusionUNet(nn.Module):
"""
Micro-scale U-Net for denoising/generation (2.0M parameters).
Encoder-decoder with skip connections. Currently used as a denoising
autoencoder (reconstruct clean images). For full DDPM, add:
- Sinusoidal time embeddings
- Time-conditioned residual blocks
- Iterative sampling loop
"""
def __init__(self, n_channels=3, n_classes=3):
super().__init__()
# Time embedding (placeholder for full diffusion)
self.time_embed = nn.Sequential(
nn.Linear(128, 256),
nn.ReLU(),
nn.Linear(256, 256),
)
# Encoder
self.inc = nn.Sequential(
nn.Conv2d(n_channels, 64, kernel_size=3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.Conv2d(64, 64, kernel_size=3, padding=1),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
)
self.down1 = DownConv(64, 128)
self.down2 = DownConv(128, 256)
# Decoder
self.up1 = UpConv(256, 128)
self.up2 = UpConv(128, 64)
# Output projection
self.outc = nn.Conv2d(64, n_classes, kernel_size=1)
def forward(self, x, t=None):
"""
Args:
x: (B, 3, H, W) input image
t: (B,) optional timestep (unused in denoising AE mode)
Returns:
(B, 3, H, W) reconstructed image
"""
x1 = self.inc(x)
x2 = self.down1(x1)
x3 = self.down2(x2)
x = self.up1(x3, x2)
x = self.up2(x, x1)
return self.outc(x)