Update tinytorch and tito with module exports

Re-exported all modules after restructuring:
- Updated _modidx.py with new module locations
- Removed outdated autogeneration headers
- Updated all core modules (tensor, autograd, layers, etc.)
- Updated optimization modules (quantization, compression, etc.)
- Updated TITO commands for new structure

Changes include:
- 24 tinytorch/ module files
- 24 tito/ command and core files
- Updated references from modules/source/ to modules/

All modules re-exported via nbdev from their new locations.
This commit is contained in:
Vijay Janapa Reddi
2025-11-10 19:42:03 -05:00
parent 9fdfa4317c
commit 41b132f55f
48 changed files with 681 additions and 2035 deletions

View File

@@ -1,19 +1,5 @@
# ╔═══════════════════════════════════════════════════════════════════════════════╗
# ║ 🚨 CRITICAL WARNING 🚨 ║
# ║ AUTOGENERATED! DO NOT EDIT! ║
# ║ ║
# ║ This file is AUTOMATICALLY GENERATED from source modules. ║
# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║
# ║ ║
# ║ ✅ TO EDIT: modules/source/XX_transformer/transformer_dev.py ║
# ║ ✅ TO EXPORT: Run 'tito module complete <module_name>' ║
# ║ ║
# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║
# ║ Editing it directly may break module functionality and training. ║
# ║ ║
# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║
# ║ happens! The tinytorch/ directory is just the compiled output. ║
# ╚═══════════════════════════════════════════════════════════════════════════════╝
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/13_transformers/transformers_dev.ipynb.
# %% auto 0
__all__ = ['LayerNorm', 'MLP', 'TransformerBlock', 'GPT']
@@ -23,7 +9,6 @@ from ..core.tensor import Tensor
from ..core.layers import Linear
from ..core.attention import MultiHeadAttention
from ..core.activations import GELU
from ..text.embeddings import Embedding, PositionalEncoding
# %% ../../modules/source/13_transformers/transformers_dev.ipynb 9
class LayerNorm:
@@ -61,6 +46,7 @@ class LayerNorm:
self.eps = eps
# Learnable parameters: scale and shift
# CRITICAL: requires_grad=True so optimizer can train these!
self.gamma = Tensor(np.ones(normalized_shape), requires_grad=True) # Scale parameter
self.beta = Tensor(np.zeros(normalized_shape), requires_grad=True) # Shift parameter
### END SOLUTION
@@ -83,19 +69,18 @@ class LayerNorm:
HINT: Use keepdims=True to maintain tensor dimensions for broadcasting
"""
### BEGIN SOLUTION
# CRITICAL: Use Tensor operations (not .data) to maintain gradient flow!
# Compute statistics across last dimension (features)
mean = x.mean(axis=-1, keepdims=True)
# Compute variance: E[(x - μ)²]
# Use Tensor operations to preserve computation graph!
diff = x - mean
variance = (diff * diff).mean(axis=-1, keepdims=True)
diff = x - mean # Tensor subtraction maintains gradient
variance = (diff * diff).mean(axis=-1, keepdims=True) # Tensor ops maintain gradient
# Normalize - use Tensor operations to preserve gradients!
# Add eps as a Tensor for proper gradient flow
eps_tensor = Tensor(np.array(self.eps), requires_grad=False)
std = Tensor(np.sqrt(variance.data + self.eps), requires_grad=variance.requires_grad)
normalized = (x - mean) / std
# Normalize: (x - mean) / sqrt(variance + eps)
# Note: sqrt and division need to preserve gradient flow
std_data = np.sqrt(variance.data + self.eps)
normalized = diff * Tensor(1.0 / std_data) # Scale by reciprocal to maintain gradient
# Apply learnable transformation
output = normalized * self.gamma + self.beta
@@ -103,7 +88,7 @@ class LayerNorm:
### END SOLUTION
def __call__(self, x):
"""Allows the layer norm to be called like a function."""
"""Allows the layer to be called like a function."""
return self.forward(x)
def parameters(self):
@@ -147,7 +132,7 @@ class MLP:
# Two-layer feed-forward network
self.linear1 = Linear(embed_dim, hidden_dim)
self.gelu = GELU() # Use GELU activation from activations module
self.gelu = GELU()
self.linear2 = Linear(hidden_dim, embed_dim)
### END SOLUTION
@@ -171,7 +156,7 @@ class MLP:
# First linear layer with expansion
hidden = self.linear1.forward(x)
# GELU activation (YOUR activation from Module 03!)
# GELU activation
hidden = self.gelu.forward(hidden)
# Second linear layer back to original size
@@ -404,10 +389,6 @@ class GPT:
return logits
### END SOLUTION
def __call__(self, tokens):
"""Allows the GPT model to be called like a function."""
return self.forward(tokens)
def _create_causal_mask(self, seq_len):
"""Create causal mask to prevent attending to future positions."""
### BEGIN SOLUTION