mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-05-23 15:35:52 -05:00
Snapshot of the standalone /Users/VJ/GitHub/mlperf-edu/ repo as of 2026-04-16, brought into MLSysBook as a parked feature branch for backup and iteration. Not for merge to dev. Contents (88 files, ~2.3 MB): - 16 reference workloads (cloud / edge / tiny / agent divisions) - LoadGen proxy harness + SUT plugin protocol - Compliance checker, autograder, hardware fingerprint - Paper draft (paper.tex) with TikZ/SVG figure sources - Three lab examples + practitioner workflow configs - Workload + dataset YAML registries (single source of truth) Excluded (per mlperf-edu/.gitignore + size constraints): - Datasets (6.6 GB), checkpoints (260 MB), gpt2 weights (523 MB) - Generated PDFs, .venv, build artifacts
60 lines
2.5 KiB
Python
60 lines
2.5 KiB
Python
import torch
|
|
import torch.nn as nn
|
|
import numpy as np
|
|
|
|
class NativeFaissEngine:
|
|
"""
|
|
Pedagogical representation of a Vector Database Retrieval Engine (RAG).
|
|
Instead of abstracting LangChain, we literally map Cosine Similarity matrices
|
|
forcing students to optimize enormous Indexing arrays natively!
|
|
"""
|
|
def __init__(self, embedding_dim=768, capacity=100000):
|
|
self.embedding_dim = embedding_dim
|
|
# Native document vector store natively representing massive MS-MARCO chunks
|
|
self.index = torch.randn(capacity, embedding_dim)
|
|
self.index = torch.nn.functional.normalize(self.index, p=2, dim=1) # L2 Norm for fast Cosine bounds
|
|
|
|
def search(self, query_vectors: torch.Tensor, top_k=5):
|
|
"""
|
|
Natively simulates MIPS (Maximum Inner Product Search).
|
|
Students can optimize this via Hierarchical Navigable Small World (HNSW) algorithms!
|
|
"""
|
|
query_vectors = torch.nn.functional.normalize(query_vectors, p=2, dim=1)
|
|
|
|
# O(N) exhaustive dot-product natively generating Latency bottlenecks!
|
|
similarity_scores = torch.matmul(query_vectors, self.index.T)
|
|
top_scores, top_indices = torch.topk(similarity_scores, k=top_k, dim=1)
|
|
|
|
return top_indices, top_scores
|
|
|
|
class RagWhiteBox(nn.Module):
|
|
"""
|
|
Unified RAG Architecture mapping Information Retrieval directly into Generative Output limits.
|
|
"""
|
|
def __init__(self, n_embd=768, vocab_size=32000):
|
|
super().__init__()
|
|
self.retriever = NativeFaissEngine(embedding_dim=n_embd)
|
|
|
|
# Simplified generative embedding bounds
|
|
self.embed = nn.Embedding(vocab_size, n_embd)
|
|
self.generator = nn.Sequential(
|
|
nn.Linear(n_embd, n_embd * 4),
|
|
nn.GELU(),
|
|
nn.Linear(n_embd * 4, vocab_size)
|
|
)
|
|
|
|
def forward(self, input_ids):
|
|
# 1. Synthesize Query Embeddings functionally
|
|
query_embeds = self.embed(input_ids).mean(dim=1) # Mean pooling
|
|
|
|
# 2. Structural Retrieval Phase (I/O Bottleneck evaluation!)
|
|
# In a real environment, this blocks GPU execution!
|
|
retrieved_idx, _ = self.retriever.search(query_embeds, top_k=3)
|
|
|
|
# 3. Augment Context dynamically
|
|
# Pedagogical simulation of appending document text embeddings organically
|
|
augmented_embeds = query_embeds + (retrieved_idx.sum(dim=1, keepdim=True).float() * 0.001)
|
|
|
|
# 4. Final Generation
|
|
return self.generator(augmented_embeds)
|