Files
cs249r_book/mlperf-edu/reference/vision_data.py
Vijay Janapa Reddi a9878ad6bd feat: import mlperf-edu pedagogical benchmark suite
Snapshot of the standalone /Users/VJ/GitHub/mlperf-edu/ repo as of
2026-04-16, brought into MLSysBook as a parked feature branch for
backup and iteration. Not for merge to dev.

Contents (88 files, ~2.3 MB):
- 16 reference workloads (cloud / edge / tiny / agent divisions)
- LoadGen proxy harness + SUT plugin protocol
- Compliance checker, autograder, hardware fingerprint
- Paper draft (paper.tex) with TikZ/SVG figure sources
- Three lab examples + practitioner workflow configs
- Workload + dataset YAML registries (single source of truth)

Excluded (per mlperf-edu/.gitignore + size constraints):
- Datasets (6.6 GB), checkpoints (260 MB), gpt2 weights (523 MB)
- Generated PDFs, .venv, build artifacts
2026-04-16 14:15:05 -04:00

53 lines
2.5 KiB
Python

import os
import torch
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR100
from torch.utils.data import DataLoader
class RealVisionTranslators:
"""
Explicit pedagogical mapping explicitly decoupling torchvision abstractions natively
simulating True Dataset processing physically loading physical Bytes organically!
"""
def __init__(self, batch_size=32):
self.batch_size = batch_size
# Absolute Canonical location pointing strictly to the MLPerf EDU offline cache!
self.data_dir = os.path.join(os.path.dirname(__file__), "..", ".data", "vision")
os.makedirs(self.data_dir, exist_ok=True)
# Explicit Physical transforms natively mutating RGB limits logically
self.transform_train = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
# Normalization physics dynamically constraining activation values statically!
transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
])
self.transform_test = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
])
def get_cifar100_loaders(self):
"""
Natively checks if the cache is hit, forcing the student to decouple
their dependencies to true offline physical limits explicitly!
"""
try:
trainset = CIFAR100(root=self.data_dir, train=True, download=False, transform=self.transform_train)
testset = CIFAR100(root=self.data_dir, train=False, download=False, transform=self.transform_test)
except Exception as e:
print(f"[yellow]⚠️ True Datasets not tracked natively: {e}[/yellow]")
print(f"[dim]Run `mlperf fetch --task resnet-train` to organically populate the `.data/` cache bounds remotely![/dim]")
# Auto-fallback for pure execution test viability safely
trainset = CIFAR100(root=self.data_dir, train=True, download=True, transform=self.transform_train)
testset = CIFAR100(root=self.data_dir, train=False, download=True, transform=self.transform_test)
trainloader = DataLoader(trainset, batch_size=self.batch_size, shuffle=True, num_workers=2)
testloader = DataLoader(testset, batch_size=self.batch_size, shuffle=False, num_workers=2)
return trainloader, testloader