mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-05-08 18:01:20 -05:00
Snapshot of the standalone /Users/VJ/GitHub/mlperf-edu/ repo as of 2026-04-16, brought into MLSysBook as a parked feature branch for backup and iteration. Not for merge to dev. Contents (88 files, ~2.3 MB): - 16 reference workloads (cloud / edge / tiny / agent divisions) - LoadGen proxy harness + SUT plugin protocol - Compliance checker, autograder, hardware fingerprint - Paper draft (paper.tex) with TikZ/SVG figure sources - Three lab examples + practitioner workflow configs - Workload + dataset YAML registries (single source of truth) Excluded (per mlperf-edu/.gitignore + size constraints): - Datasets (6.6 GB), checkpoints (260 MB), gpt2 weights (523 MB) - Generated PDFs, .venv, build artifacts
109 lines
3.9 KiB
Python
109 lines
3.9 KiB
Python
"""
|
|
MLPerf EDU: Micro-DLRM (Cloud Division)
|
|
|
|
A scaled-down Deep Learning Recommendation Model for click-through rate
|
|
prediction, mapping the MLPerf Training DLRM benchmark to laptop scale.
|
|
|
|
Architecture:
|
|
Dense features → Bottom MLP → dense embedding
|
|
Sparse features → EmbeddingBag tables → sparse embeddings
|
|
[dense_emb, sparse_embs] → concatenate → Top MLP → sigmoid → CTR
|
|
|
|
The model demonstrates the unique memory access pattern of recommendation:
|
|
- Sparse embeddings → memory bandwidth bound (random lookups)
|
|
- Dense MLP → compute bound (matrix multiplications)
|
|
|
|
Dataset: MovieLens-100K (Harper & Konstan, 2015)
|
|
- 100,000 ratings from 943 users on 1,682 movies
|
|
- Binarized at threshold 4: rating >= 4 → positive click
|
|
- Ships locally in data/movielens/ml-100k/ (5 MB)
|
|
|
|
Quality Target: Acc > 0.70 on MovieLens binary click prediction (best val ~71%)
|
|
|
|
Provenance: Naumov et al. 2019, "Deep Learning Recommendation Model"
|
|
"""
|
|
|
|
import torch
|
|
import torch.nn as nn
|
|
|
|
|
|
class MicroDLRMWhiteBox(nn.Module):
|
|
"""
|
|
Micro-scale DLRM for MovieLens-100K recommendation.
|
|
|
|
Implements the core DLRM pattern: separate processing of dense
|
|
(continuous) and sparse (categorical) features, followed by
|
|
feature interaction and CTR prediction.
|
|
|
|
Default embedding sizes match MovieLens-100K:
|
|
- user_id: 943 users
|
|
- item_id: 1682 items
|
|
- occupation: 21 categories
|
|
"""
|
|
|
|
def __init__(self,
|
|
m_spa=8,
|
|
num_embeddings=[943, 1682, 21],
|
|
ln_bot=[16, 8, 8],
|
|
ln_top=[32, 16, 1]):
|
|
super().__init__()
|
|
|
|
# Sparse: embedding tables for categorical features
|
|
self.emb_l = nn.ModuleList([
|
|
nn.EmbeddingBag(n, m_spa, mode="sum", sparse=False)
|
|
for n in num_embeddings
|
|
])
|
|
|
|
# Dense: bottom MLP for continuous features
|
|
layers = []
|
|
for i in range(len(ln_bot) - 1):
|
|
layers.append(nn.Linear(ln_bot[i], ln_bot[i + 1]))
|
|
layers.append(nn.ReLU())
|
|
self.bot_l = nn.Sequential(*layers)
|
|
|
|
# Feature interaction: concat dense output + all sparse embeddings
|
|
cross_dim = ln_bot[-1] + len(num_embeddings) * m_spa
|
|
|
|
# Top MLP: CTR prediction
|
|
top_layers = []
|
|
in_dim = cross_dim
|
|
for out_dim in ln_top[:-1]:
|
|
top_layers.append(nn.Linear(in_dim, out_dim))
|
|
top_layers.append(nn.ReLU())
|
|
in_dim = out_dim
|
|
top_layers.append(nn.Linear(in_dim, ln_top[-1]))
|
|
top_layers.append(nn.Sigmoid())
|
|
self.top_l = nn.Sequential(*top_layers)
|
|
|
|
def forward(self, dense_x, sparse_indices, sparse_offsets):
|
|
"""
|
|
Args:
|
|
dense_x: (B, 16) continuous features
|
|
sparse_indices: list of (B,) index tensors for each embedding table
|
|
sparse_offsets: list of (B,) offset tensors for EmbeddingBag
|
|
|
|
Returns:
|
|
(B, 1) click-through probability
|
|
"""
|
|
# Process dense features through bottom MLP
|
|
x_dense = self.bot_l(dense_x)
|
|
|
|
# Lookup sparse embeddings
|
|
x_sparse = []
|
|
for i, emb in enumerate(self.emb_l):
|
|
z = emb(sparse_indices[i], sparse_offsets[i])
|
|
x_sparse.append(z)
|
|
|
|
# Feature interaction: concatenate dense + sparse
|
|
# NOTE: The official DLRM uses dot-product interaction:
|
|
# T = stack([x_dense] + x_sparse) # (B, n_features, embed_dim)
|
|
# Z = bmm(T, T.transpose(1,2)) # (B, n, n) pairwise interactions
|
|
# flat = Z[triu_indices] # upper triangle features
|
|
# We use concat for simplicity. Switching to dot-product interaction
|
|
# is a pedagogical exercise that exposes feature crossing and the
|
|
# compute vs. memory tradeoff in sparse-dense architectures.
|
|
interaction = torch.cat([x_dense] + x_sparse, dim=1)
|
|
|
|
# Predict CTR
|
|
return self.top_l(interaction)
|