Files
Vijay Janapa Reddi a9878ad6bd feat: import mlperf-edu pedagogical benchmark suite
Snapshot of the standalone /Users/VJ/GitHub/mlperf-edu/ repo as of
2026-04-16, brought into MLSysBook as a parked feature branch for
backup and iteration. Not for merge to dev.

Contents (88 files, ~2.3 MB):
- 16 reference workloads (cloud / edge / tiny / agent divisions)
- LoadGen proxy harness + SUT plugin protocol
- Compliance checker, autograder, hardware fingerprint
- Paper draft (paper.tex) with TikZ/SVG figure sources
- Three lab examples + practitioner workflow configs
- Workload + dataset YAML registries (single source of truth)

Excluded (per mlperf-edu/.gitignore + size constraints):
- Datasets (6.6 GB), checkpoints (260 MB), gpt2 weights (523 MB)
- Generated PDFs, .venv, build artifacts
2026-04-16 14:15:05 -04:00

60 lines
2.5 KiB
Python

import torch
import torch.nn as nn
import numpy as np
class NativeFaissEngine:
"""
Pedagogical representation of a Vector Database Retrieval Engine (RAG).
Instead of abstracting LangChain, we literally map Cosine Similarity matrices
forcing students to optimize enormous Indexing arrays natively!
"""
def __init__(self, embedding_dim=768, capacity=100000):
self.embedding_dim = embedding_dim
# Native document vector store natively representing massive MS-MARCO chunks
self.index = torch.randn(capacity, embedding_dim)
self.index = torch.nn.functional.normalize(self.index, p=2, dim=1) # L2 Norm for fast Cosine bounds
def search(self, query_vectors: torch.Tensor, top_k=5):
"""
Natively simulates MIPS (Maximum Inner Product Search).
Students can optimize this via Hierarchical Navigable Small World (HNSW) algorithms!
"""
query_vectors = torch.nn.functional.normalize(query_vectors, p=2, dim=1)
# O(N) exhaustive dot-product natively generating Latency bottlenecks!
similarity_scores = torch.matmul(query_vectors, self.index.T)
top_scores, top_indices = torch.topk(similarity_scores, k=top_k, dim=1)
return top_indices, top_scores
class RagWhiteBox(nn.Module):
"""
Unified RAG Architecture mapping Information Retrieval directly into Generative Output limits.
"""
def __init__(self, n_embd=768, vocab_size=32000):
super().__init__()
self.retriever = NativeFaissEngine(embedding_dim=n_embd)
# Simplified generative embedding bounds
self.embed = nn.Embedding(vocab_size, n_embd)
self.generator = nn.Sequential(
nn.Linear(n_embd, n_embd * 4),
nn.GELU(),
nn.Linear(n_embd * 4, vocab_size)
)
def forward(self, input_ids):
# 1. Synthesize Query Embeddings functionally
query_embeds = self.embed(input_ids).mean(dim=1) # Mean pooling
# 2. Structural Retrieval Phase (I/O Bottleneck evaluation!)
# In a real environment, this blocks GPU execution!
retrieved_idx, _ = self.retriever.search(query_embeds, top_k=3)
# 3. Augment Context dynamically
# Pedagogical simulation of appending document text embeddings organically
augmented_embeds = query_embeds + (retrieved_idx.sum(dim=1, keepdim=True).float() * 0.001)
# 4. Final Generation
return self.generator(augmented_embeds)