Merge transformer-training into dev

Complete Milestone 05 - 2017 Transformer implementation Major Features: - TinyTalks interactive dashboard with rich CLI - Complete gradient flow fixes (13 tests passing) - Multiple training examples (5-min, 10-min, levels 1-2) - Milestone celebration card (perceptron style) - Comprehensive documentation Gradient Flow Fixes: - Fixed reshape, matmul (3D), embedding, sqrt, mean, sub, div, GELU - All transformer components now fully differentiable - Hybrid attention approach for educational clarity + gradients Training Results: - 10-min training: 96.6% loss improvement, 62.5% accuracy - 5-min training: 97.8% loss improvement, 66.7% accuracy - Working chatbot with coherent responses Files Added: - tinytalks_dashboard.py (main demo) - tinytalks_chatbot.py, tinytalks_dataset.py - level1_memorization.py, level2_patterns.py - Comprehensive docs and test suites Ready for student use 2>&1
2026-06-04 05:35:52 -05:00 · 2025-10-30 17:48:11 -04:00
parent ca93669fbc 330e1738db
commit 15d3ed5251
36 changed files with 7365 additions and 2240 deletions
--- a/tinytorch/text/embeddings.py
+++ b/tinytorch/text/embeddings.py
@@ -1,19 +1,5 @@
-# ╔═══════════════════════════════════════════════════════════════════════════════╗
-# ║                        🚨 CRITICAL WARNING 🚨                                ║
-# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
-# ║                                                                               ║
-# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
-# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
-# ║                                                                               ║
-# ║  ✅ TO EDIT: modules/source/XX_embeddings/embeddings_dev.py         ║
-# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
-# ║                                                                               ║
-# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
-# ║     Editing it directly may break module functionality and training.         ║
-# ║                                                                               ║
-# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
-# ║     happens! The tinytorch/ directory is just the compiled output.           ║
-# ╚═══════════════════════════════════════════════════════════════════════════════╝
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/11_embeddings/embeddings_dev.ipynb.
+
 # %% auto 0
 __all__ = ['Embedding', 'PositionalEncoding', 'EmbeddingLayer']

@@ -93,22 +79,18 @@ class Embedding:

        # Perform embedding lookup using advanced indexing
        # This is equivalent to one-hot multiplication but much more efficient
-        embedded = self.weight.data[indices.data.astype(int)]
-
-        # Create result tensor
-        result = Tensor(embedded, requires_grad=self.weight.requires_grad)
+        embedded_data = self.weight.data[indices.data.astype(int)]
+        
+        # Create output tensor with gradient tracking
+        from tinytorch.core.autograd import EmbeddingBackward
+        result = Tensor(embedded_data, requires_grad=self.weight.requires_grad)
        
-        # Attach gradient function (students learned this in Module 05!)
        if self.weight.requires_grad:
-            from tinytorch.core.autograd import EmbeddingBackward
-            result._grad_fn = EmbeddingBackward(self.weight, indices)
-
+            result._grad_fn = EmbeddingBackward()
+            result._grad_fn.saved_tensors = (self.weight, indices)
+        
        return result

-    def __call__(self, indices: Tensor) -> Tensor:
-        """Allows the embedding to be called like a function."""
-        return self.forward(indices)
-
    def parameters(self) -> List[Tensor]:
        """Return trainable parameters."""
        return [self.weight]
@@ -192,23 +174,16 @@ class PositionalEncoding:
                f"Embedding dimension mismatch: expected {self.embed_dim}, got {embed_dim}"
            )

-        # Get position embeddings for this sequence length (slice using .data for efficiency)
-        pos_embeddings_data = self.position_embeddings.data[:seq_len]  # (seq_len, embed_dim)
+        # Get position embeddings for this sequence length
+        pos_embeddings = self.position_embeddings.data[:seq_len]  # (seq_len, embed_dim)

        # Broadcast to match batch dimension: (1, seq_len, embed_dim)
-        pos_embeddings_data = pos_embeddings_data[np.newaxis, :, :]
-        
-        # Wrap in Tensor to preserve requires_grad
-        pos_embeddings = Tensor(pos_embeddings_data, requires_grad=self.position_embeddings.requires_grad)
+        pos_embeddings = pos_embeddings[np.newaxis, :, :]

-        # Add positional information using Tensor operation to preserve gradients!
-        result = x + pos_embeddings
+        # Add positional information to input embeddings
+        result = x.data + pos_embeddings

-        return result
-
-    def __call__(self, x: Tensor) -> Tensor:
-        """Allows the positional encoding to be called like a function."""
-        return self.forward(x)
+        return Tensor(result)

    def parameters(self) -> List[Tensor]:
        """Return trainable parameters."""
@@ -336,10 +311,6 @@ class EmbeddingLayer:

        return output

-    def __call__(self, tokens: Tensor) -> Tensor:
-        """Allows the embedding layer to be called like a function."""
-        return self.forward(tokens)
-
    def parameters(self) -> List[Tensor]:
        """Return all trainable parameters."""
        params = self.token_embedding.parameters()
--- a/tinytorch/text/tokenization.py
+++ b/tinytorch/text/tokenization.py
@@ -1,25 +1,14 @@
-# ╔═══════════════════════════════════════════════════════════════════════════════╗
-# ║                        🚨 CRITICAL WARNING 🚨                                ║
-# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
-# ║                                                                               ║
-# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
-# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
-# ║                                                                               ║
-# ║  ✅ TO EDIT: modules/source/XX_tokenization/tokenization_dev.py     ║
-# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
-# ║                                                                               ║
-# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
-# ║     Editing it directly may break module functionality and training.         ║
-# ║                                                                               ║
-# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
-# ║     happens! The tinytorch/ directory is just the compiled output.           ║
-# ╚═══════════════════════════════════════════════════════════════════════════════╝
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/10_tokenization/tokenization_dev.ipynb.
+
 # %% auto 0
 __all__ = ['Tokenizer', 'CharTokenizer', 'BPETokenizer']

 # %% ../../modules/source/10_tokenization/tokenization_dev.ipynb 0
-#| default_exp text.tokenization
-#| export
+import numpy as np
+from typing import List, Dict, Tuple, Optional, Set
+import json
+import re
+from collections import defaultdict, Counter

 # %% ../../modules/source/10_tokenization/tokenization_dev.ipynb 3
 import numpy as np