From 7d61acf84307c590ff1a677ed60b1e07590eae87 Mon Sep 17 00:00:00 2001 From: Vijay Janapa Reddi Date: Sun, 21 Sep 2025 15:54:05 -0400 Subject: [PATCH] Rename examples to exciting names and remove incomplete placeholders MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename xor_network/ β†’ xornet/ (more exciting!) - Rename cifar10_classifier/ β†’ cifar10/ (simpler, cleaner) - Remove incomplete optimization_comparison/ and text_generation/ (were placeholder templates, not working implementations) - Update README.md to reflect new exciting names - Streamline to only working, tested examples Final structure: - xornet/ - 100% XOR accuracy - cifar10/ - 57.2% real image classification Clean, exciting names that students will remember! --- examples/README.md | 55 ++--- .../{cifar10_classifier => cifar10}/README.md | 0 .../train_cifar10_mlp.py | 0 .../train_lenet5.py | 0 .../train_simple_baseline.py | 0 examples/optimization_comparison/README.md | 108 --------- examples/optimization_comparison/compare.py | 175 -------------- examples/text_generation/README.md | 92 -------- examples/text_generation/generate.py | 223 ------------------ examples/{xor_network => xornet}/README.md | 0 examples/{xor_network => xornet}/train.py | 0 11 files changed, 19 insertions(+), 634 deletions(-) rename examples/{cifar10_classifier => cifar10}/README.md (100%) rename examples/{cifar10_classifier => cifar10}/train_cifar10_mlp.py (100%) rename examples/{cifar10_classifier => cifar10}/train_lenet5.py (100%) rename examples/{cifar10_classifier => cifar10}/train_simple_baseline.py (100%) delete mode 100644 examples/optimization_comparison/README.md delete mode 100644 examples/optimization_comparison/compare.py delete mode 100644 examples/text_generation/README.md delete mode 100644 examples/text_generation/generate.py rename examples/{xor_network => xornet}/README.md (100%) rename examples/{xor_network => xornet}/train.py (100%) diff --git a/examples/README.md b/examples/README.md index 35fda768..aafcd1f7 100644 --- a/examples/README.md +++ b/examples/README.md @@ -14,40 +14,28 @@ These are **real ML applications** written using TinyTorch just like you would u ```bash # After installing/building TinyTorch: -cd examples/xor_network/ +cd examples/xornet/ python train.py # Or for image classification: -cd examples/cifar10_classifier/ +cd examples/cifar10/ python train_cifar10_mlp.py ``` ## Available Examples -### 🧠 Neural Network Fundamentals -- **`xor_network/`** - Classic XOR problem with hidden layers - - Clean implementation showing autograd and training basics - - Architecture: 2 β†’ 4 β†’ 1 with ReLU and Sigmoid - - Achieves 100% accuracy on XOR truth table +### 🧠 **`xornet/`** - Neural Network Fundamentals +- Classic XOR problem with hidden layers +- Clean implementation showing autograd and training basics +- Architecture: 2 β†’ 4 β†’ 1 with ReLU and Sigmoid +- **Achieves 100% accuracy** on XOR truth table -### πŸ‘οΈ Computer Vision -- **`cifar10_classifier/`** - Real-world object classification - - **ACHIEVEMENT: 57.2% accuracy** - exceeds typical ML course benchmarks! - - Multiple architectures: MLP, LeNet-5, and optimized models - - Data augmentation, proper initialization, Adam optimization - - Real dataset: 50,000 training images, 10,000 test images - -### πŸ€– Language & Generation -- **`text_generation/`** - Generate text with TinyGPT (Module 16) - - Transformer architecture built from scratch - - Character-level text generation - - Attention mechanisms and positional encoding - -### πŸ“Š Optimization & Analysis -- **`optimization_comparison/`** - SGD vs Adam comparison - - Side-by-side optimizer performance analysis - - Visualization of convergence patterns - - Memory usage and computational efficiency +### πŸ‘οΈ **`cifar10/`** - Real-World Computer Vision +- Real-world object classification +- **ACHIEVEMENT: 57.2% accuracy** - exceeds typical ML course benchmarks! +- Multiple architectures: MLP, LeNet-5, and optimized models +- Data augmentation, proper initialization, Adam optimization +- Real dataset: 50,000 training images, 10,000 test images ## Example Structure @@ -62,9 +50,8 @@ example_name/ ## Learning Progression After completing each module, examples become functional: -- **Module 05** β†’ `xor_network/` works (Dense layers + activations) -- **Module 11** β†’ `cifar10_classifier/` works with training loops -- **Module 16** β†’ `text_generation/` works (TinyGPT) +- **Module 05** β†’ `xornet/` works (Dense layers + activations) +- **Module 11** β†’ `cifar10/` works with training loops ## Quick Demo @@ -72,20 +59,16 @@ Want to see TinyTorch in action? Try these: ```bash # See a neural network learn XOR (30 seconds): -python examples/xor_network/train.py +python examples/xornet/train.py # Train on real images (5 minutes, 57% accuracy): -python examples/cifar10_classifier/train_cifar10_mlp.py --epochs 10 - -# Compare optimizers (2 minutes): -python examples/optimization_comparison/compare.py +python examples/cifar10/train_cifar10_mlp.py --epochs 10 ``` ## Performance Achievements -- **XOR Network**: 100% accuracy (perfect solution) -- **CIFAR-10 MLP**: 57.2% accuracy (exceeds typical course benchmarks) -- **Optimization**: Adam 3.2x faster convergence than SGD +- **XORnet**: 100% accuracy (perfect solution) +- **CIFAR-10**: 57.2% accuracy (exceeds typical course benchmarks) --- diff --git a/examples/cifar10_classifier/README.md b/examples/cifar10/README.md similarity index 100% rename from examples/cifar10_classifier/README.md rename to examples/cifar10/README.md diff --git a/examples/cifar10_classifier/train_cifar10_mlp.py b/examples/cifar10/train_cifar10_mlp.py similarity index 100% rename from examples/cifar10_classifier/train_cifar10_mlp.py rename to examples/cifar10/train_cifar10_mlp.py diff --git a/examples/cifar10_classifier/train_lenet5.py b/examples/cifar10/train_lenet5.py similarity index 100% rename from examples/cifar10_classifier/train_lenet5.py rename to examples/cifar10/train_lenet5.py diff --git a/examples/cifar10_classifier/train_simple_baseline.py b/examples/cifar10/train_simple_baseline.py similarity index 100% rename from examples/cifar10_classifier/train_simple_baseline.py rename to examples/cifar10/train_simple_baseline.py diff --git a/examples/optimization_comparison/README.md b/examples/optimization_comparison/README.md deleted file mode 100644 index 7d67dc72..00000000 --- a/examples/optimization_comparison/README.md +++ /dev/null @@ -1,108 +0,0 @@ -# Optimization Algorithm Comparison - -Compare SGD, Momentum, and Adam optimizers to see how different algorithms navigate the loss landscape! - -## What This Demonstrates - -- **Different optimization strategies** and their trade-offs -- **Convergence speed** comparison between optimizers -- **Why Adam is popular** for deep learning -- **YOUR implementations** of all major optimizers - -## Running the Comparison - -```bash -python compare.py -``` - -Expected output: -``` -⚑ Optimizer Comparison with TinyTorch -====================================================================== - -πŸƒ Training with different optimizers... ------------------------------------------------------------- - -Training with SGD: - Initial loss: 4.2315 - Final loss: 0.0234 - Improvement: 99.4% - -Training with Momentum: - Initial loss: 4.2315 - Final loss: 0.0156 - Improvement: 99.6% - -Training with Adam: - Initial loss: 4.2315 - Final loss: 0.0098 - Improvement: 99.8% - -πŸ“Š Loss Curves (lower is better): ------------------------------------------------------------- -Epoch 0: SGD: 4.2315 β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ Momentum: 4.2315 β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ Adam: 4.2315 β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ -Epoch 5: SGD: 1.5234 β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ Momentum: 0.8976 β–ˆβ–ˆβ–ˆβ–ˆ Adam: 0.2134 β–ˆ -Epoch 10: SGD: 0.6789 β–ˆβ–ˆβ–ˆ Momentum: 0.2345 β–ˆ Adam: 0.0567 -Epoch 15: SGD: 0.3456 β–ˆ Momentum: 0.0876 Adam: 0.0234 -... - -πŸ† Best optimizer: Adam (lowest final loss) -``` - -## Optimizers Compared - -### SGD (Stochastic Gradient Descent) -```python -w = w - learning_rate * gradient -``` -- Simple and reliable -- Can be slow to converge -- Fixed learning rate - -### Momentum -```python -velocity = momentum * velocity - learning_rate * gradient -w = w + velocity -``` -- Accelerates in consistent directions -- Dampens oscillations -- Helps escape shallow local minima - -### Adam (Adaptive Moment Estimation) -```python -m = β₁ * m + (1 - β₁) * gradient # First moment -v = Ξ²β‚‚ * v + (1 - Ξ²β‚‚) * gradientΒ² # Second moment -w = w - learning_rate * m / (√v + Ξ΅) -``` -- Adaptive learning rates per parameter -- Combines momentum with RMSprop -- Often fastest convergence - -## Key Insights - -| Optimizer | Pros | Cons | Best For | -|-----------|------|------|----------| -| **SGD** | Simple, stable | Slow convergence | Final fine-tuning | -| **Momentum** | Faster than SGD | Requires tuning | General training | -| **Adam** | Fast, adaptive | Can overfit | Most deep learning | - -## Mathematical Foundation - -Your TinyTorch implements: -- First-order optimization (gradient-based) -- Second-order moment estimation (Adam) -- Momentum accumulation -- Adaptive learning rates - -## Requirements - -- Module 10 (Optimizers) completed -- TinyTorch package exported - -## Next Steps - -Try experimenting with: -- Different learning rates -- Various momentum values -- Complex loss landscapes -- Your own optimization algorithms! \ No newline at end of file diff --git a/examples/optimization_comparison/compare.py b/examples/optimization_comparison/compare.py deleted file mode 100644 index a34e3a98..00000000 --- a/examples/optimization_comparison/compare.py +++ /dev/null @@ -1,175 +0,0 @@ -#!/usr/bin/env python3 -""" -Optimizer Comparison with TinyTorch - -Compare different optimization algorithms (SGD, Momentum, Adam) -to see how they navigate the loss landscape differently. - -This shows why Adam often trains faster than SGD! -""" - -import numpy as np -import tinytorch as tt -from tinytorch.core import Tensor -from tinytorch.core.optimizers import SGD, Adam, Momentum -from tinytorch.core.layers import Dense -from tinytorch.core.activations import ReLU -from tinytorch.core.training import MSELoss - - -def create_toy_problem(): - """Create a simple regression problem.""" - # Generate synthetic data: y = 2x + 1 + noise - np.random.seed(42) - X = np.random.randn(100, 1) - y = 2 * X + 1 + 0.1 * np.random.randn(100, 1) - - return Tensor(X), Tensor(y) - - -class SimpleModel: - """A simple linear model for regression.""" - - def __init__(self): - self.layer = Dense(1, 1) - - def forward(self, x): - return self.layer(x) - - def parameters(self): - return self.layer.parameters() - - def reset_parameters(self): - """Reset to same initial weights for fair comparison.""" - self.layer.weights = Tensor([[0.5]]) - self.layer.bias = Tensor([0.1]) - - -def train_with_optimizer(model, optimizer_name, optimizer, X, y, epochs=50): - """Train model with given optimizer.""" - loss_fn = MSELoss() - losses = [] - - # Reset model for fair comparison - model.reset_parameters() - - for epoch in range(epochs): - # Forward pass - predictions = model.forward(X) - loss = loss_fn(predictions, y) - losses.append(float(loss.data)) - - # Backward pass (simulated if no autograd) - if hasattr(loss, 'backward'): - optimizer.zero_grad() - loss.backward() - optimizer.step() - else: - # Manual gradient computation for demo - # Gradient of MSE loss w.r.t predictions - grad_output = 2 * (predictions.data - y.data) / len(y) - - # Gradient w.r.t weights and bias - grad_w = X.data.T @ grad_output - grad_b = np.sum(grad_output) - - # Manual update based on optimizer type - if optimizer_name == "SGD": - model.layer.weights.data -= optimizer.lr * grad_w - model.layer.bias.data -= optimizer.lr * grad_b - # For momentum/adam, we'd need to track history - - return losses - - -def visualize_losses(all_losses): - """Simple ASCII visualization of loss curves.""" - print("\nπŸ“Š Loss Curves (lower is better):") - print("-" * 60) - - max_loss = max(max(losses) for losses in all_losses.values()) - - # Show every 5th epoch - epochs_to_show = list(range(0, 50, 5)) - - for epoch in epochs_to_show: - print(f"Epoch {epoch:2d}: ", end="") - for name, losses in all_losses.items(): - loss = losses[epoch] - # Normalize to 0-20 character bar - bar_length = int(20 * loss / max_loss) - bar = "β–ˆ" * bar_length - print(f"{name}: {loss:.4f} {bar} ", end="") - print() - - -def main(): - print("=" * 70) - print("⚑ Optimizer Comparison with TinyTorch") - print("=" * 70) - print() - - # Create data - X, y = create_toy_problem() - print("πŸ“Š Dataset: Simple linear regression (y = 2x + 1)") - print(f" 100 samples, 1 feature") - print() - - # Create model - model = SimpleModel() - - # Test different optimizers - optimizers = { - "SGD": SGD(model.parameters(), lr=0.01), - "Momentum": Momentum(model.parameters(), lr=0.01, momentum=0.9), - "Adam": Adam(model.parameters(), lr=0.01) - } - - print("πŸƒ Training with different optimizers...") - print("-" * 60) - - all_losses = {} - - for name, optimizer in optimizers.items(): - print(f"\nTraining with {name}:") - losses = train_with_optimizer(model, name, optimizer, X, y) - all_losses[name] = losses - - print(f" Initial loss: {losses[0]:.4f}") - print(f" Final loss: {losses[-1]:.4f}") - print(f" Improvement: {(1 - losses[-1]/losses[0])*100:.1f}%") - - # Visualize convergence - visualize_losses(all_losses) - - print("\n" + "=" * 70) - print("🎯 Key Observations:") - print("-" * 60) - - # Determine winner - final_losses = {name: losses[-1] for name, losses in all_losses.items()} - best_optimizer = min(final_losses, key=final_losses.get) - - print(f"πŸ† Best optimizer: {best_optimizer} (lowest final loss)") - print() - - print("Optimizer Characteristics:") - print("β€’ SGD: Simple, slow but steady convergence") - print("β€’ Momentum: Accelerates in consistent directions") - print("β€’ Adam: Adaptive learning rates, often fastest") - print() - - print("πŸ’‘ Insights:") - print("β€’ Adam typically converges faster (fewer epochs)") - print("β€’ SGD may be more stable for some problems") - print("β€’ Momentum helps escape local minima") - print("β€’ Choice depends on your specific problem!") - print() - - print("πŸŽ‰ Your TinyTorch implements all major optimizers!") - - return True - - -if __name__ == "__main__": - success = main() \ No newline at end of file diff --git a/examples/text_generation/README.md b/examples/text_generation/README.md deleted file mode 100644 index 93b85e58..00000000 --- a/examples/text_generation/README.md +++ /dev/null @@ -1,92 +0,0 @@ -# Text Generation with TinyGPT - -Generate text using a transformer model built with YOUR TinyTorch! - -## What This Demonstrates - -- **Transformer architecture** - the foundation of ChatGPT -- **Multi-head attention** mechanisms you built -- **Autoregressive generation** - predicting one token at a time -- **The technology behind modern AI** - GPT, BERT, etc. - -## How It Works - -``` -Input Tokens β†’ Embeddings β†’ Transformer Blocks β†’ Output Logits β†’ Next Token - ↑__________________| - (Autoregressive Loop) -``` - -## Running the Example - -```bash -python generate.py -``` - -Expected output: -``` -πŸ€– Text Generation with TinyGPT -====================================================================== - -🎯 Generating Python-like code: --------------------------------------------------- - -Prompt: 'def' -Generated: 'def function_name ( self ) : return None' - -Prompt: 'class' -Generated: 'class MyClass : def __init__ ( self ) :' - -Prompt: 'for i in' -Generated: 'for i in range ( 10 ) : print ( i )' - -πŸ’‘ What This Demonstrates: -βœ… Transformer architecture with self-attention -βœ… Multi-head attention you built from scratch -βœ… Autoregressive text generation -βœ… The foundation of ChatGPT and GitHub Copilot! - -πŸŽ‰ You've built the technology behind modern AI! -``` - -## Architecture - -``` -TinyGPT Model: -β”œβ”€β”€ Token Embeddings (vocab_size β†’ embed_dim) -β”œβ”€β”€ Position Embeddings (max_length β†’ embed_dim) -β”œβ”€β”€ Transformer Blocks (Γ—4) -β”‚ β”œβ”€β”€ Multi-Head Attention -β”‚ β”œβ”€β”€ Layer Normalization -β”‚ └── Feed-Forward Network (MLP) -└── Output Projection (embed_dim β†’ vocab_size) -``` - -## Key Components - -- **Self-Attention**: Models relationships between all tokens -- **Position Embeddings**: Gives model sense of word order -- **Layer Normalization**: Stabilizes training -- **Autoregressive**: Generates one token at a time - -## What You've Built - -This is the same architecture as: -- GPT (Generative Pre-trained Transformer) -- ChatGPT (with more layers and parameters) -- GitHub Copilot (for code generation) -- BERT (with bidirectional attention) - -## Requirements - -- Module 07 (Attention) for multi-head attention -- Module 16 (TinyGPT) for complete transformer -- All TinyTorch modules exported - -## Next Steps - -The full Module 16 implementation will: -- Generate complete Python functions -- Work with natural language prompts -- Show beam search and sampling strategies -- Demonstrate real code generation! \ No newline at end of file diff --git a/examples/text_generation/generate.py b/examples/text_generation/generate.py deleted file mode 100644 index 98e3cfee..00000000 --- a/examples/text_generation/generate.py +++ /dev/null @@ -1,223 +0,0 @@ -#!/usr/bin/env python3 -""" -Text Generation with TinyGPT - -Generate text using a transformer model built with YOUR TinyTorch! -This demonstrates that you've built the technology behind ChatGPT. - -This example: -- Loads a pre-trained TinyGPT model -- Generates text from prompts -- Shows attention mechanisms in action -- Proves you understand transformers -""" - -import numpy as np -import tinytorch as tt -from tinytorch.core import Tensor -from tinytorch.core.attention import MultiHeadAttention -from tinytorch.core.layers import Dense, Embedding, LayerNorm -from tinytorch.core.activations import GELU, Softmax -from tinytorch.models import TinyGPT - - -class SimpleGPT: - """A simple GPT model for text generation.""" - - def __init__(self, vocab_size=5000, embed_dim=128, num_heads=4, num_layers=4): - self.vocab_size = vocab_size - self.embed_dim = embed_dim - - # Token and position embeddings - self.token_embedding = Embedding(vocab_size, embed_dim) - self.position_embedding = Embedding(1024, embed_dim) # Max sequence length - - # Transformer blocks - self.blocks = [] - for _ in range(num_layers): - block = TransformerBlock(embed_dim, num_heads) - self.blocks.append(block) - - # Output projection - self.ln_final = LayerNorm(embed_dim) - self.lm_head = Dense(embed_dim, vocab_size) - - def forward(self, input_ids): - """Forward pass through GPT.""" - seq_len = input_ids.shape[1] - - # Get token embeddings - token_emb = self.token_embedding(input_ids) - - # Add position embeddings - positions = Tensor(np.arange(seq_len).reshape(1, -1)) - pos_emb = self.position_embedding(positions) - - x = token_emb + pos_emb - - # Pass through transformer blocks - for block in self.blocks: - x = block(x) - - # Final layer norm and projection - x = self.ln_final(x) - logits = self.lm_head(x) - - return logits - - def generate(self, prompt_ids, max_length=50, temperature=1.0): - """Generate text autoregressively.""" - generated = prompt_ids.copy() - - for _ in range(max_length): - # Get predictions for next token - logits = self.forward(Tensor(generated.reshape(1, -1))) - - # Get last token's predictions - next_logits = logits.data[0, -1, :] / temperature - - # Sample from distribution - probs = np.exp(next_logits) / np.sum(np.exp(next_logits)) - next_token = np.random.choice(self.vocab_size, p=probs) - - generated = np.append(generated, next_token) - - # Stop if end token generated - if next_token == 0: # Assuming 0 is end token - break - - return generated - - -class TransformerBlock: - """A single transformer block.""" - - def __init__(self, embed_dim, num_heads): - self.attention = MultiHeadAttention(embed_dim, num_heads) - self.ln1 = LayerNorm(embed_dim) - self.ln2 = LayerNorm(embed_dim) - - # MLP - self.mlp = MLP(embed_dim) - - def forward(self, x): - """Forward pass through transformer block.""" - # Self-attention with residual - attn_out = self.attention(x, x, x) - x = x + attn_out - x = self.ln1(x) - - # MLP with residual - mlp_out = self.mlp(x) - x = x + mlp_out - x = self.ln2(x) - - return x - - -class MLP: - """Feed-forward network in transformer.""" - - def __init__(self, embed_dim): - self.fc1 = Dense(embed_dim, embed_dim * 4) - self.fc2 = Dense(embed_dim * 4, embed_dim) - self.gelu = GELU() - - def forward(self, x): - """Forward pass through MLP.""" - x = self.fc1(x) - x = self.gelu(x) - x = self.fc2(x) - return x - - -# Simple tokenizer for demonstration -class SimpleTokenizer: - """Basic word-level tokenizer.""" - - def __init__(self): - # Common programming keywords for demo - self.vocab = { - '': 0, '': 1, '': 2, - 'def': 3, 'return': 4, 'if': 5, 'else': 6, - 'for': 7, 'in': 8, 'range': 9, 'print': 10, - 'import': 11, 'class': 12, 'self': 13, - 'True': 14, 'False': 15, 'None': 16, - 'and': 17, 'or': 18, 'not': 19, - '=': 20, '+': 21, '-': 22, '*': 23, '/': 24, - '(': 25, ')': 26, '[': 27, ']': 28, '{': 29, '}': 30, - ':': 31, ',': 32, '.': 33, - } - self.id_to_token = {v: k for k, v in self.vocab.items()} - - def encode(self, text): - """Convert text to token IDs.""" - tokens = text.split() - return np.array([self.vocab.get(t, 2) for t in tokens]) # 2 is - - def decode(self, ids): - """Convert token IDs to text.""" - tokens = [self.id_to_token.get(id, '') for id in ids] - return ' '.join(tokens) - - -def main(): - print("=" * 70) - print("πŸ€– Text Generation with TinyGPT") - print("=" * 70) - print() - - print("Building TinyGPT model...") - model = SimpleGPT(vocab_size=100, embed_dim=64, num_heads=4, num_layers=2) - tokenizer = SimpleTokenizer() - - print("Model Architecture:") - print(" β€’ 2 transformer layers") - print(" β€’ 4 attention heads per layer") - print(" β€’ 64-dimensional embeddings") - print(" β€’ 100 token vocabulary") - print() - - # Demonstrate with different prompts - prompts = [ - "def", - "class", - "for i in", - "if True", - "return" - ] - - print("🎯 Generating Python-like code:") - print("-" * 50) - - for prompt in prompts: - print(f"\nPrompt: '{prompt}'") - - # Encode prompt - prompt_ids = tokenizer.encode(prompt) - - # Generate completion - generated_ids = model.generate(prompt_ids, max_length=10, temperature=0.8) - - # Decode to text - generated_text = tokenizer.decode(generated_ids) - print(f"Generated: '{generated_text}'") - - print("\n" + "=" * 70) - print("πŸ’‘ What This Demonstrates:") - print("-" * 50) - print("βœ… Transformer architecture with self-attention") - print("βœ… Multi-head attention you built from scratch") - print("βœ… Autoregressive text generation") - print("βœ… The foundation of ChatGPT and GitHub Copilot!") - print() - print("πŸŽ‰ You've built the technology behind modern AI!") - print() - print("Note: This is a simplified demo. Full TinyGPT in Module 16") - print("will generate real Python functions from natural language!") - - return True - - -if __name__ == "__main__": - success = main() \ No newline at end of file diff --git a/examples/xor_network/README.md b/examples/xornet/README.md similarity index 100% rename from examples/xor_network/README.md rename to examples/xornet/README.md diff --git a/examples/xor_network/train.py b/examples/xornet/train.py similarity index 100% rename from examples/xor_network/train.py rename to examples/xornet/train.py