Reset package and export modules 01-07 only (skip broken spatial module)

This commit is contained in:
Vijay Janapa Reddi
2025-09-30 13:41:00 -04:00
parent a0aef7d52e
commit caff73a75b
24 changed files with 807 additions and 647 deletions

View File

@@ -1,148 +0,0 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/13_transformers/transformers_dev.ipynb.
# %% auto 0
__all__ = ['Tensor', 'Linear', 'MultiHeadAttention', 'Embedding', 'gelu']
# %% ../../modules/source/13_transformers/transformers_dev.ipynb 1
import numpy as np
import math
from typing import Optional, List
# Minimal implementations for development - in practice these import from previous modules
class Tensor:
"""Minimal Tensor class for transformer development - imports from Module 01 in practice."""
def __init__(self, data, requires_grad=False):
self.data = np.array(data)
self.shape = self.data.shape
self.size = self.data.size
self.requires_grad = requires_grad
self.grad = None
def __add__(self, other):
if isinstance(other, Tensor):
return Tensor(self.data + other.data)
return Tensor(self.data + other)
def __mul__(self, other):
if isinstance(other, Tensor):
return Tensor(self.data * other.data)
return Tensor(self.data * other)
def matmul(self, other):
return Tensor(np.dot(self.data, other.data))
def sum(self, axis=None, keepdims=False):
return Tensor(self.data.sum(axis=axis, keepdims=keepdims))
def mean(self, axis=None, keepdims=False):
return Tensor(self.data.mean(axis=axis, keepdims=keepdims))
def reshape(self, *shape):
return Tensor(self.data.reshape(shape))
def __repr__(self):
return f"Tensor(data={self.data}, shape={self.shape})"
class Linear:
"""Minimal Linear layer - imports from Module 03 in practice."""
def __init__(self, in_features, out_features, bias=True):
# Xavier/Glorot initialization
std = math.sqrt(2.0 / (in_features + out_features))
self.weight = Tensor(np.random.normal(0, std, (in_features, out_features)))
self.bias = Tensor(np.zeros(out_features)) if bias else None
def forward(self, x):
output = x.matmul(self.weight)
if self.bias is not None:
output = output + self.bias
return output
def parameters(self):
params = [self.weight]
if self.bias is not None:
params.append(self.bias)
return params
class MultiHeadAttention:
"""Minimal MultiHeadAttention - imports from Module 12 in practice."""
def __init__(self, embed_dim, num_heads):
assert embed_dim % num_heads == 0
self.embed_dim = embed_dim
self.num_heads = num_heads
self.head_dim = embed_dim // num_heads
self.q_proj = Linear(embed_dim, embed_dim)
self.k_proj = Linear(embed_dim, embed_dim)
self.v_proj = Linear(embed_dim, embed_dim)
self.out_proj = Linear(embed_dim, embed_dim)
def forward(self, x, mask=None):
batch_size, seq_len, embed_dim = x.shape
# Linear projections
Q = self.q_proj.forward(x)
K = self.k_proj.forward(x)
V = self.v_proj.forward(x)
# Reshape for multi-head attention
Q = Q.reshape(batch_size, seq_len, self.num_heads, self.head_dim)
K = K.reshape(batch_size, seq_len, self.num_heads, self.head_dim)
V = V.reshape(batch_size, seq_len, self.num_heads, self.head_dim)
# Transpose to (batch_size, num_heads, seq_len, head_dim)
Q = Tensor(np.transpose(Q.data, (0, 2, 1, 3)))
K = Tensor(np.transpose(K.data, (0, 2, 1, 3)))
V = Tensor(np.transpose(V.data, (0, 2, 1, 3)))
# Scaled dot-product attention
scores = Tensor(np.matmul(Q.data, np.transpose(K.data, (0, 1, 3, 2))))
scores = scores * (1.0 / math.sqrt(self.head_dim))
# Apply causal mask for autoregressive generation
if mask is not None:
scores = Tensor(scores.data + mask.data)
# Softmax
attention_weights = self._softmax(scores)
# Apply attention to values
out = Tensor(np.matmul(attention_weights.data, V.data))
# Transpose back and reshape
out = Tensor(np.transpose(out.data, (0, 2, 1, 3)))
out = out.reshape(batch_size, seq_len, embed_dim)
# Final linear projection
return self.out_proj.forward(out)
def _softmax(self, x):
"""Numerically stable softmax."""
exp_x = Tensor(np.exp(x.data - np.max(x.data, axis=-1, keepdims=True)))
return Tensor(exp_x.data / np.sum(exp_x.data, axis=-1, keepdims=True))
def parameters(self):
params = []
params.extend(self.q_proj.parameters())
params.extend(self.k_proj.parameters())
params.extend(self.v_proj.parameters())
params.extend(self.out_proj.parameters())
return params
class Embedding:
"""Minimal Embedding layer - imports from Module 11 in practice."""
def __init__(self, vocab_size, embed_dim):
self.vocab_size = vocab_size
self.embed_dim = embed_dim
# Initialize with small random values
self.weight = Tensor(np.random.normal(0, 0.02, (vocab_size, embed_dim)))
def forward(self, indices):
# Simple embedding lookup
return Tensor(self.weight.data[indices.data])
def parameters(self):
return [self.weight]
def gelu(x):
"""GELU activation function."""
return Tensor(0.5 * x.data * (1 + np.tanh(np.sqrt(2 / np.pi) * (x.data + 0.044715 * x.data**3))))