Merge transformer-training into dev

Complete Milestone 05 - 2017 Transformer implementation

Major Features:
- TinyTalks interactive dashboard with rich CLI
- Complete gradient flow fixes (13 tests passing)
- Multiple training examples (5-min, 10-min, levels 1-2)
- Milestone celebration card (perceptron style)
- Comprehensive documentation

Gradient Flow Fixes:
- Fixed reshape, matmul (3D), embedding, sqrt, mean, sub, div, GELU
- All transformer components now fully differentiable
- Hybrid attention approach for educational clarity + gradients

Training Results:
- 10-min training: 96.6% loss improvement, 62.5% accuracy
- 5-min training: 97.8% loss improvement, 66.7% accuracy
- Working chatbot with coherent responses

Files Added:
- tinytalks_dashboard.py (main demo)
- tinytalks_chatbot.py, tinytalks_dataset.py
- level1_memorization.py, level2_patterns.py
- Comprehensive docs and test suites

Ready for student use 2>&1
This commit is contained in:
Vijay Janapa Reddi
2025-10-30 17:48:11 -04:00
36 changed files with 7365 additions and 2240 deletions

View File

@@ -1,19 +1,5 @@
# ╔═══════════════════════════════════════════════════════════════════════════════╗
# ║ 🚨 CRITICAL WARNING 🚨 ║
# ║ AUTOGENERATED! DO NOT EDIT! ║
# ║ ║
# ║ This file is AUTOMATICALLY GENERATED from source modules. ║
# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║
# ║ ║
# ║ ✅ TO EDIT: modules/source/XX_embeddings/embeddings_dev.py ║
# ║ ✅ TO EXPORT: Run 'tito module complete <module_name>' ║
# ║ ║
# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║
# ║ Editing it directly may break module functionality and training. ║
# ║ ║
# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║
# ║ happens! The tinytorch/ directory is just the compiled output. ║
# ╚═══════════════════════════════════════════════════════════════════════════════╝
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/11_embeddings/embeddings_dev.ipynb.
# %% auto 0
__all__ = ['Embedding', 'PositionalEncoding', 'EmbeddingLayer']
@@ -93,22 +79,18 @@ class Embedding:
# Perform embedding lookup using advanced indexing
# This is equivalent to one-hot multiplication but much more efficient
embedded = self.weight.data[indices.data.astype(int)]
# Create result tensor
result = Tensor(embedded, requires_grad=self.weight.requires_grad)
embedded_data = self.weight.data[indices.data.astype(int)]
# Create output tensor with gradient tracking
from tinytorch.core.autograd import EmbeddingBackward
result = Tensor(embedded_data, requires_grad=self.weight.requires_grad)
# Attach gradient function (students learned this in Module 05!)
if self.weight.requires_grad:
from tinytorch.core.autograd import EmbeddingBackward
result._grad_fn = EmbeddingBackward(self.weight, indices)
result._grad_fn = EmbeddingBackward()
result._grad_fn.saved_tensors = (self.weight, indices)
return result
def __call__(self, indices: Tensor) -> Tensor:
"""Allows the embedding to be called like a function."""
return self.forward(indices)
def parameters(self) -> List[Tensor]:
"""Return trainable parameters."""
return [self.weight]
@@ -192,23 +174,16 @@ class PositionalEncoding:
f"Embedding dimension mismatch: expected {self.embed_dim}, got {embed_dim}"
)
# Get position embeddings for this sequence length (slice using .data for efficiency)
pos_embeddings_data = self.position_embeddings.data[:seq_len] # (seq_len, embed_dim)
# Get position embeddings for this sequence length
pos_embeddings = self.position_embeddings.data[:seq_len] # (seq_len, embed_dim)
# Broadcast to match batch dimension: (1, seq_len, embed_dim)
pos_embeddings_data = pos_embeddings_data[np.newaxis, :, :]
# Wrap in Tensor to preserve requires_grad
pos_embeddings = Tensor(pos_embeddings_data, requires_grad=self.position_embeddings.requires_grad)
pos_embeddings = pos_embeddings[np.newaxis, :, :]
# Add positional information using Tensor operation to preserve gradients!
result = x + pos_embeddings
# Add positional information to input embeddings
result = x.data + pos_embeddings
return result
def __call__(self, x: Tensor) -> Tensor:
"""Allows the positional encoding to be called like a function."""
return self.forward(x)
return Tensor(result)
def parameters(self) -> List[Tensor]:
"""Return trainable parameters."""
@@ -336,10 +311,6 @@ class EmbeddingLayer:
return output
def __call__(self, tokens: Tensor) -> Tensor:
"""Allows the embedding layer to be called like a function."""
return self.forward(tokens)
def parameters(self) -> List[Tensor]:
"""Return all trainable parameters."""
params = self.token_embedding.parameters()

View File

@@ -1,25 +1,14 @@
# ╔═══════════════════════════════════════════════════════════════════════════════╗
# ║ 🚨 CRITICAL WARNING 🚨 ║
# ║ AUTOGENERATED! DO NOT EDIT! ║
# ║ ║
# ║ This file is AUTOMATICALLY GENERATED from source modules. ║
# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║
# ║ ║
# ║ ✅ TO EDIT: modules/source/XX_tokenization/tokenization_dev.py ║
# ║ ✅ TO EXPORT: Run 'tito module complete <module_name>' ║
# ║ ║
# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║
# ║ Editing it directly may break module functionality and training. ║
# ║ ║
# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║
# ║ happens! The tinytorch/ directory is just the compiled output. ║
# ╚═══════════════════════════════════════════════════════════════════════════════╝
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/10_tokenization/tokenization_dev.ipynb.
# %% auto 0
__all__ = ['Tokenizer', 'CharTokenizer', 'BPETokenizer']
# %% ../../modules/source/10_tokenization/tokenization_dev.ipynb 0
#| default_exp text.tokenization
#| export
import numpy as np
from typing import List, Dict, Tuple, Optional, Set
import json
import re
from collections import defaultdict, Counter
# %% ../../modules/source/10_tokenization/tokenization_dev.ipynb 3
import numpy as np