From ecdc879dda7ae6f3f059f4e2f8e4d49876dfe56f Mon Sep 17 00:00:00 2001 From: Vijay Janapa Reddi Date: Fri, 26 Sep 2025 13:00:48 -0400 Subject: [PATCH] LOGISTICS: Add comprehensive milestone example infrastructure Address practical concerns about running milestone examples: DATASET MANAGEMENT: - Add data_manager.py for automatic dataset downloading - Support MNIST, CIFAR-10, XOR, and Perceptron datasets - Handle download with progress bars and caching - Clear error handling and fallback options STANDARDIZED TEMPLATE: - Create MILESTONE_TEMPLATE.py showing standard structure - Emphasize "YOU BUILT THIS" throughout code comments - Include historical context and educational rationale - Add systems analysis (memory, performance, scaling) - Clear module prerequisite mapping RUNNING INSTRUCTIONS: - Comprehensive troubleshooting section in README - Performance expectations and timing estimates - Command-line options (--test-only, --demo-mode) - Clear dataset logistics explanation EXAMPLE IMPLEMENTATION: - Update perceptron_1957 to follow new template - Demonstrate "YOUR TinyTorch" emphasis throughout - Show proper dataset integration and systems analysis - Include command-line interface for different modes Students now have clear, practical milestone examples that: - Handle all dataset logistics automatically - Emphasize their own implementations throughout - Provide historical context and educational value - Include troubleshooting and performance guidance --- examples/MILESTONE_TEMPLATE.py | 161 ++++++++++ examples/README.md | 95 +++++- examples/data_manager.py | 213 +++++++++++++ .../perceptron_1957/rosenblatt_perceptron.py | 301 ++++++++++++------ 4 files changed, 663 insertions(+), 107 deletions(-) create mode 100644 examples/MILESTONE_TEMPLATE.py create mode 100644 examples/data_manager.py diff --git a/examples/MILESTONE_TEMPLATE.py b/examples/MILESTONE_TEMPLATE.py new file mode 100644 index 00000000..5ce933a2 --- /dev/null +++ b/examples/MILESTONE_TEMPLATE.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python3 +""" +[MILESTONE NAME] ([YEAR]) - [HISTORICAL FIGURE] +=============================================== + +๐Ÿ“š HISTORICAL CONTEXT: +[2-3 sentences about the historical significance and why this was a breakthrough] + +๐ŸŽฏ WHAT YOU'RE BUILDING: +[1-2 sentences about what students will demonstrate with their own implementations] + +โœ… REQUIRED MODULES (Run after Module [X]): +โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ” + Module 02 (Tensor) : [Brief description of how it's used] + Module 03 (Activations) : [Brief description of how it's used] + Module 04 (Layers) : [Brief description of how it's used] + Module XX (YYY) : [Additional modules as needed] +โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ” + +๐Ÿ—๏ธ ARCHITECTURE: + [ASCII diagram showing the network architecture] + +๐Ÿ” KEY INSIGHTS: +- [Bullet point about what this demonstrates] +- [Bullet point about why this architecture works] +- [Bullet point about production relevance] + +๐Ÿ“Š EXPECTED PERFORMANCE: +- [Dataset info]: [Performance metric] +- [Training time]: [Approximate time] +- [Memory usage]: [Approximate memory] +""" + +import sys +import os +import numpy as np + +# Add project root to path for imports +project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(project_root) + +# Import TinyTorch components YOU BUILT! +from tinytorch.core.tensor import Tensor # Module 02: YOU built this data structure! +from tinytorch.core.layers import Linear # Module 04: YOU built these transformations! +from tinytorch.core.activations import ReLU # Module 03: YOU built this nonlinearity! +# [Add other imports as needed with YOU BUILT comments] + +def download_dataset(): + """ + Download and prepare dataset for this milestone. + + This function handles all dataset logistics so you can focus on + demonstrating the ML system you built! + """ + print("๐Ÿ“ฅ Downloading dataset...") + # [Dataset download logic] + print("โœ… Dataset ready!") + return data_loader + +def create_model(): + """Build the model using YOUR TinyTorch implementations!""" + + class MilestoneModel: + def __init__(self): + # YOU built these components in the modules! + self.layer1 = Linear(input_size, hidden_size) # Module 04: YOUR Linear layer! + self.activation = ReLU() # Module 03: YOUR ReLU function! + self.layer2 = Linear(hidden_size, output_size) # Module 04: YOUR weight matrices! + + def forward(self, x): + # Forward pass using YOUR implementations + x = self.layer1(x) # Module 04: YOUR Linear.forward()! + x = self.activation(x) # Module 03: YOUR ReLU activation! + x = self.layer2(x) # Module 04: YOUR final transformation! + return x + + return MilestoneModel() + +def train_model(model, data_loader): + """Train using YOUR optimization and loss implementations!""" + + # Set up training using YOUR TinyTorch modules + optimizer = YourOptimizer(model.parameters()) # Module XX: YOU built this optimizer! + loss_fn = YourLossFunction() # Module XX: YOU built this loss! + + print("๐Ÿš€ Training with YOUR TinyTorch implementation!") + print(" [Brief description of what's happening]") + + for epoch in range(num_epochs): + total_loss = 0 + + for batch_data, batch_labels in data_loader: # Module XX: YOUR DataLoader! + # Forward pass with YOUR components + outputs = model.forward(batch_data) # YOUR model architecture! + loss = loss_fn(outputs, batch_labels) # YOUR loss computation! + + # Backward pass with YOUR autograd + loss.backward() # Module XX: YOUR autodiff! + optimizer.step() # Module XX: YOUR optimization! + optimizer.zero_grad() # Module XX: YOUR gradient reset! + + total_loss += loss.item() + + avg_loss = total_loss / len(data_loader) + print(f" Epoch {epoch+1}: Loss = {avg_loss:.4f} (YOUR training loop!)") + +def analyze_performance(model): + """Analyze the system YOU built from an ML systems perspective.""" + + print("\n๐Ÿ”ฌ SYSTEMS ANALYSIS of YOUR Implementation:") + + # Memory analysis using YOUR tensor system + import tracemalloc + tracemalloc.start() + + # Test forward pass + test_input = Tensor(np.random.randn(batch_size, input_size)) # YOUR Tensor! + output = model.forward(test_input) # YOUR architecture! + + current, peak = tracemalloc.get_traced_memory() + print(f" Memory usage: {peak / 1024 / 1024:.2f} MB peak") + + # Parameter analysis + total_params = sum(layer.weight.size for layer in [model.layer1, model.layer2]) + print(f" Parameters: {total_params:,} weights (YOUR Linear layers!)") + + # Performance characteristics + print(f" Computational complexity: O([complexity]) per forward pass") + print(f" YOUR implementation handles: [capability description]") + +def main(): + """Demonstrate the complete milestone using YOUR TinyTorch system!""" + + print("๐ŸŽฏ [MILESTONE NAME] - Proof of YOUR Mastery!") + print(" Historical significance: [Brief context]") + print(" YOUR achievement: [What they've built]") + print() + + # Step 1: Get dataset + data_loader = download_dataset() + + # Step 2: Create model with YOUR components + model = create_model() + + # Step 3: Train using YOUR training system + train_model(model, data_loader) + + # Step 4: Analyze YOUR implementation + analyze_performance(model) + + print("\nโœ… SUCCESS! Milestone Complete!") + print("\n๐ŸŽ“ What YOU Accomplished:") + print(" โ€ข [Specific achievement 1 using YOUR modules]") + print(" โ€ข [Specific achievement 2 using YOUR implementations]") + print(" โ€ข [Connection to modern ML systems]") + print("\n๐Ÿš€ Next Steps:") + print(" โ€ข Continue to [next milestone] after Module [X]") + print(" โ€ข YOUR foundation enables: [future capabilities]") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/examples/README.md b/examples/README.md index f45ae0bd..4bcb7dfa 100644 --- a/examples/README.md +++ b/examples/README.md @@ -223,43 +223,114 @@ examples/ --- -## ๐Ÿš€ **Running the Examples** +## ๐Ÿš€ **How to Run These Examples** ### **Prerequisites Check** ```bash -# Verify your TinyTorch installation +# 1. Verify your TinyTorch installation tito system doctor -# Check which modules you've completed +# 2. Check which modules you've completed tito checkpoint status + +# 3. Ensure you're in the project root +cd /path/to/TinyTorch ``` -### **Run Examples by Module Completion** +### **Dataset Management (Automatic)** +**Don't worry about data logistics!** Each example automatically handles dataset downloading: + +- **MNIST**: Downloads from official LeCun server (~60MB) +- **CIFAR-10**: Downloads from University of Toronto (~170MB) +- **XOR/Perceptron**: Generates synthetic data instantly + +**First run will download data, subsequent runs use cached data.** + +### **Running Examples by Module Completion** + +#### **๐Ÿ“ฑ Quick Test (No Training)** +Test architecture and imports without waiting for downloads: +```bash +# Test what you've built so far +python examples/perceptron_1957/rosenblatt_perceptron.py --test-only +python examples/xor_1969/minsky_xor_problem.py --test-only +``` + +#### **๐ŸŽฏ Full Milestone Demonstrations** ```bash -# After Module 04 - Basic networks +# After Module 04 - Foundation (30 seconds) python examples/perceptron_1957/rosenblatt_perceptron.py +# Demonstrates: YOU built Linear layers + activation functions -# After Module 06 - Autograd +# After Module 06 - Autograd (1 minute) python examples/xor_1969/minsky_xor_problem.py +# Demonstrates: YOU built gradient computation + training loops -# After Module 08 - Training +# After Module 08 - Training (2-3 minutes + MNIST download) python examples/mnist_mlp_1986/train_mlp.py +# Demonstrates: YOU built complete vision pipeline -# After Module 10 - DataLoader + Spatial -python examples/cifar_cnn_modern/train_cnn.py +# After Module 10 - DataLoader + Spatial (3-5 minutes + CIFAR download) +python examples/cifar_cnn_modern/train_cnn.py +# Demonstrates: YOU built convolutional networks -# After Module 14 - Transformers +# After Module 14 - Transformers (5-10 minutes) python examples/gpt_2018/train_gpt.py +# Demonstrates: YOU built attention mechanisms + language models ``` -### **Quick Demo with Pre-trained Weights** +### **๐Ÿšซ Troubleshooting Common Issues** + +#### **Import Errors** ```bash +# If you see "ModuleNotFoundError: No module named 'tinytorch'" +cd /path/to/TinyTorch +python -m pip install -e . + +# Or run with explicit path +PYTHONPATH=/path/to/TinyTorch python examples/perceptron_1957/rosenblatt_perceptron.py +``` + +#### **Dataset Download Issues** +```bash +# Manual dataset download if automatic fails +python examples/data_manager.py # Test all datasets + +# Or download specific datasets +python -c "from examples.data_manager import DatasetManager; DatasetManager().get_mnist()" +``` + +#### **Memory Issues** +```bash +# Reduce batch size for limited memory +python examples/cifar_cnn_modern/train_cnn.py --batch-size 16 + +# Use test mode for architecture validation only +python examples/mnist_mlp_1986/train_mlp.py --test-only +``` + +#### **Slow Training** +```bash +# Quick demo mode (reduced epochs) +python examples/mnist_mlp_1986/train_mlp.py --demo-mode + # Use pre-trained weights for instant results python examples/mnist_mlp_1986/train_mlp.py --use-pretrained -python examples/cifar_cnn_modern/train_cnn.py --use-pretrained ``` +### **๐Ÿ“Š Expected Performance & Timing** + +| Example | Dataset Size | Download Time | Training Time | Expected Accuracy | +|---------|-------------|---------------|---------------|------------------| +| **Perceptron 1957** | 1K synthetic | 0s | 30s | 95%+ (linearly separable) | +| **XOR 1969** | 1K synthetic | 0s | 1min | 90%+ (non-linear) | +| **MNIST MLP 1986** | 60K images | 2-5min | 2-3min | 85%+ (real vision) | +| **CIFAR CNN Modern** | 50K images | 5-10min | 3-5min | 65%+ (natural images) | +| **TinyGPT 2018** | Text corpus | 1-2min | 5-10min | Coherent generation | + +**Note**: First run includes dataset download time. Subsequent runs are much faster. + --- ## ๐Ÿค” **ML Systems Thinking Questions** diff --git a/examples/data_manager.py b/examples/data_manager.py new file mode 100644 index 00000000..4583b97c --- /dev/null +++ b/examples/data_manager.py @@ -0,0 +1,213 @@ +#!/usr/bin/env python3 +""" +TinyTorch Dataset Manager +======================== + +Handles dataset downloading and preparation for milestone examples. +Students can focus on demonstrating their ML systems, not fighting with data logistics! + +Supported Datasets: +- MNIST: Handwritten digits (28x28 grayscale) +- CIFAR-10: Natural images (32x32 RGB) +- XOR: Synthetic non-linear problem +- Perceptron: Synthetic linearly separable data +""" + +import os +import sys +import urllib.request +import tarfile +import pickle +import gzip +import numpy as np +from pathlib import Path + +# Add project root for TinyTorch imports +project_root = Path(__file__).parent.parent +sys.path.append(str(project_root)) + +class DatasetManager: + """Handles all dataset logistics for TinyTorch milestone examples.""" + + def __init__(self, data_dir=None): + if data_dir is None: + self.data_dir = Path(__file__).parent / "datasets" + else: + self.data_dir = Path(data_dir) + + # Create data directory if it doesn't exist + self.data_dir.mkdir(exist_ok=True) + + def download_with_progress(self, url, filename): + """Download with progress bar.""" + def progress_hook(block_num, block_size, total_size): + if total_size > 0: + percent = min(100, (block_num * block_size / total_size) * 100) + print(f"\r Progress: {percent:.1f}%", end='', flush=True) + + print(f"๐Ÿ“ฅ Downloading {filename}...") + urllib.request.urlretrieve(url, filename, progress_hook) + print("\nโœ… Download complete!") + + def get_mnist(self): + """Download and prepare MNIST dataset for MLP milestone.""" + mnist_dir = self.data_dir / "mnist" + mnist_dir.mkdir(exist_ok=True) + + # MNIST URLs + base_url = "http://yann.lecun.com/exdb/mnist/" + files = [ + "train-images-idx3-ubyte.gz", + "train-labels-idx1-ubyte.gz", + "t10k-images-idx3-ubyte.gz", + "t10k-labels-idx1-ubyte.gz" + ] + + # Download if needed + for filename in files: + filepath = mnist_dir / filename + if not filepath.exists(): + self.download_with_progress(base_url + filename, filepath) + + # Load and return data + train_images = self._load_mnist_images(mnist_dir / files[0]) + train_labels = self._load_mnist_labels(mnist_dir / files[1]) + test_images = self._load_mnist_images(mnist_dir / files[2]) + test_labels = self._load_mnist_labels(mnist_dir / files[3]) + + print(f"๐Ÿ“Š MNIST loaded: {len(train_images)} training, {len(test_images)} test images") + return (train_images, train_labels), (test_images, test_labels) + + def get_cifar10(self): + """Download and prepare CIFAR-10 dataset for CNN milestone.""" + cifar_dir = self.data_dir / "cifar-10" + cifar_dir.mkdir(exist_ok=True) + + # Check if already downloaded + data_file = cifar_dir / "cifar-10-python.tar.gz" + extracted_dir = cifar_dir / "cifar-10-batches-py" + + if not extracted_dir.exists(): + # Download if needed + if not data_file.exists(): + url = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz" + self.download_with_progress(url, data_file) + + # Extract + print("๐Ÿ“ฆ Extracting CIFAR-10...") + with tarfile.open(data_file, 'r:gz') as tar: + tar.extractall(cifar_dir) + print("โœ… Extraction complete!") + + # Load data from pickle files + train_data, train_labels = [], [] + for i in range(1, 6): + batch_file = extracted_dir / f"data_batch_{i}" + with open(batch_file, 'rb') as f: + batch = pickle.load(f, encoding='bytes') + train_data.append(batch[b'data']) + train_labels.extend(batch[b'labels']) + + # Test data + test_file = extracted_dir / "test_batch" + with open(test_file, 'rb') as f: + test_batch = pickle.load(f, encoding='bytes') + test_data = test_batch[b'data'] + test_labels = test_batch[b'labels'] + + # Reshape to proper image format + train_data = np.vstack(train_data).reshape(-1, 3, 32, 32).astype(np.float32) / 255.0 + test_data = test_data.reshape(-1, 3, 32, 32).astype(np.float32) / 255.0 + train_labels = np.array(train_labels, dtype=np.int64) + test_labels = np.array(test_labels, dtype=np.int64) + + print(f"๐Ÿ“Š CIFAR-10 loaded: {len(train_data)} training, {len(test_data)} test images") + return (train_data, train_labels), (test_data, test_labels) + + def get_xor_data(self, num_samples=1000): + """Generate XOR problem data for non-linear milestone.""" + print("๐Ÿงฎ Generating XOR problem data...") + + # Create XOR dataset + np.random.seed(42) # Reproducible + X = np.random.randint(0, 2, (num_samples, 2)).astype(np.float32) + y = (X[:, 0] ^ X[:, 1]).astype(np.int64) # XOR labels + + # Add some noise to make it more realistic + X += np.random.normal(0, 0.1, X.shape) + + print(f"๐Ÿ“Š XOR data generated: {num_samples} samples") + print(" Classes: [0,0]โ†’0, [0,1]โ†’1, [1,0]โ†’1, [1,1]โ†’0") + return X, y + + def get_perceptron_data(self, num_samples=1000): + """Generate linearly separable data for perceptron milestone.""" + print("๐Ÿ“ Generating linearly separable data...") + + np.random.seed(42) + + # Create two clusters + cluster1 = np.random.normal([2, 2], 0.5, (num_samples//2, 2)) + cluster2 = np.random.normal([-2, -2], 0.5, (num_samples//2, 2)) + + X = np.vstack([cluster1, cluster2]).astype(np.float32) + y = np.hstack([np.ones(num_samples//2), np.zeros(num_samples//2)]).astype(np.int64) + + # Shuffle + indices = np.random.permutation(num_samples) + X, y = X[indices], y[indices] + + print(f"๐Ÿ“Š Perceptron data generated: {num_samples} linearly separable samples") + return X, y + + def _load_mnist_images(self, filepath): + """Load MNIST image file.""" + with gzip.open(filepath, 'rb') as f: + # Skip header + f.read(16) + # Read images + data = np.frombuffer(f.read(), dtype=np.uint8) + return data.reshape(-1, 28, 28).astype(np.float32) / 255.0 + + def _load_mnist_labels(self, filepath): + """Load MNIST label file.""" + with gzip.open(filepath, 'rb') as f: + # Skip header + f.read(8) + # Read labels + return np.frombuffer(f.read(), dtype=np.uint8).astype(np.int64) + +def main(): + """Test dataset manager functionality.""" + print("๐Ÿงช Testing TinyTorch Dataset Manager") + print("=" * 50) + + manager = DatasetManager() + + # Test each dataset + print("\n1. Testing Perceptron Data:") + X, y = manager.get_perceptron_data(100) + print(f" Shape: X={X.shape}, y={y.shape}") + + print("\n2. Testing XOR Data:") + X, y = manager.get_xor_data(100) + print(f" Shape: X={X.shape}, y={y.shape}") + + print("\n3. Testing MNIST (this may take a moment):") + try: + (train_X, train_y), (test_X, test_y) = manager.get_mnist() + print(f" Shape: train_X={train_X.shape}, test_X={test_X.shape}") + except Exception as e: + print(f" MNIST download failed: {e}") + + print("\n4. Testing CIFAR-10 (this may take a moment):") + try: + (train_X, train_y), (test_X, test_y) = manager.get_cifar10() + print(f" Shape: train_X={train_X.shape}, test_X={test_X.shape}") + except Exception as e: + print(f" CIFAR-10 download failed: {e}") + + print("\nโœ… Dataset Manager test complete!") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/examples/perceptron_1957/rosenblatt_perceptron.py b/examples/perceptron_1957/rosenblatt_perceptron.py index e48e7df1..3a43b75b 100644 --- a/examples/perceptron_1957/rosenblatt_perceptron.py +++ b/examples/perceptron_1957/rosenblatt_perceptron.py @@ -1,134 +1,245 @@ +#!/usr/bin/env python3 """ The Perceptron (1957) - Frank Rosenblatt -========================================= +======================================= -Historical Context: -Frank Rosenblatt's Perceptron was the first trainable artificial neural network. -It could learn to classify linearly separable patterns, sparking the first wave -of neural network research and dreams of artificial intelligence. +๐Ÿ“š HISTORICAL CONTEXT: +Frank Rosenblatt's Perceptron was the first trainable artificial neural network that +could learn from examples. It sparked the first AI boom and demonstrated that machines +could actually learn to recognize patterns, launching the neural network revolution. -What You're Building: -The same perceptron that started it all - a single-layer network that can -learn simple classification tasks through iterative weight updates. +๐ŸŽฏ WHAT YOU'RE BUILDING: +Using YOUR TinyTorch implementations, you'll recreate the exact same perceptron that +started it all - proving that YOU can build the foundation of modern AI from scratch. -Required Modules (can run after Module 4): -- Module 2 (Tensor): Core data structure -- Module 3 (Activations): Step function for binary output -- Module 4 (Layers): Linear layer for linear transformation +โœ… REQUIRED MODULES (Run after Module 4): +โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ” + Module 02 (Tensor) : YOUR data structure with gradient tracking + Module 03 (Activations) : YOUR sigmoid activation for smooth gradients + Module 04 (Layers) : YOUR Linear layer for weight transformations +โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ” -This Example Demonstrates: -- The original perceptron architecture -- Why it could only solve linearly separable problems -- The foundation that all modern neural networks build upon +๐Ÿ—๏ธ ARCHITECTURE (Original 1957 Design): + โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” + โ”‚ Input โ”‚ โ”‚ Linear โ”‚ โ”‚ Sigmoid โ”‚ โ”‚ Binary โ”‚ + โ”‚ Features โ”‚โ”€โ”€โ”€โ–ถโ”‚ YOUR Module โ”‚โ”€โ”€โ”€โ–ถโ”‚ YOUR Module โ”‚โ”€โ”€โ”€โ–ถโ”‚ Output โ”‚ + โ”‚ (x1, x2) โ”‚ โ”‚ 04 โ”‚ โ”‚ 03 โ”‚ โ”‚ (0 or 1) โ”‚ + โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ + +๐Ÿ” KEY INSIGHTS: +- Single-layer architecture: Just linear transformation + activation +- Linearly separable only: Can't solve XOR problem (that comes later!) +- Foundation for everything: Modern networks are just deeper perceptrons + +๐Ÿ“Š EXPECTED PERFORMANCE: +- Dataset: 1,000 linearly separable synthetic points +- Training time: 30 seconds +- Expected accuracy: 95%+ (problem is linearly separable) """ -import numpy as np import sys import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import numpy as np +import argparse -from tinytorch.core.tensor import Tensor -from tinytorch.core.layers import Linear -from tinytorch.core.activations import Sigmoid # Using sigmoid as step function approximation -from tinytorch.core.autograd import to_numpy +# Add project root to path for TinyTorch imports +project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(project_root) +# Import TinyTorch components YOU BUILT! +from tinytorch.core.tensor import Tensor # Module 02: YOU built this! +from tinytorch.core.layers import Linear # Module 04: YOU built this! +from tinytorch.core.activations import Sigmoid # Module 03: YOU built this! -class Perceptron: +# Import dataset manager for automatic data handling +from examples.data_manager import DatasetManager + +class RosenblattPerceptron: """ - Rosenblatt's Perceptron - the network that started it all. + Rosenblatt's original Perceptron using YOUR TinyTorch implementations! - Historical note: The original used a step function, but we'll use - sigmoid for smooth gradients (a later innovation). + Historical note: The original used a step function, but we use sigmoid + for smooth gradients (an innovation that came slightly later). """ def __init__(self, input_size=2, output_size=1): - # Single layer - just like the original! - self.linear = Linear(input_size, output_size) - self.activation = Sigmoid() # Original used step function + print("๐Ÿง  Building Rosenblatt's Perceptron with YOUR TinyTorch modules...") + + # Single layer - just like the original 1957 design! + self.linear = Linear(input_size, output_size) # Module 04: YOUR Linear layer! + self.activation = Sigmoid() # Module 03: YOUR Sigmoid function! + + print(f" Linear layer: {input_size} โ†’ {output_size} (YOUR Module 04 implementation!)") + print(f" Activation: Sigmoid (YOUR Module 03 implementation!)") def forward(self, x): - """Forward pass through the perceptron.""" - x = self.linear(x) - x = self.activation(x) + """Forward pass through YOUR perceptron implementation.""" + # Step 1: Linear transformation using YOUR weights + x = self.linear(x) # Module 04: YOUR Linear.forward() method! + + # Step 2: Activation using YOUR sigmoid + x = self.activation(x) # Module 03: YOUR Sigmoid.forward() method! + return x - def __call__(self, x): - return self.forward(x) - - def predict(self, x): - """Binary classification prediction.""" - output = self.forward(x) - return (to_numpy(output) > 0.5).astype(int) + def parameters(self): + """Get trainable parameters from YOUR Linear layer.""" + return [self.linear.weight, self.linear.bias] # Module 04: YOUR parameters! - -def generate_linear_data(n_samples=100): +def simple_training_loop(model, X, y, learning_rate=0.1, epochs=100): """ - Generate linearly separable data - the kind perceptron can solve. - This represents the AND logic gate that Rosenblatt demonstrated. + Simple training loop using YOUR Tensor autograd system! + + Note: We're using a basic training loop here. Later milestones will use + YOUR more sophisticated optimizers from Module 07! """ - np.random.seed(42) + print("\n๐Ÿš€ Training Perceptron with YOUR TinyTorch autograd system!") + print(f" Learning rate: {learning_rate}") + print(f" Epochs: {epochs}") + print(f" Using YOUR Tensor backward() method for gradients!") - # Generate random points - X = np.random.randn(n_samples, 2) + # Convert to YOUR Tensor format + X_tensor = Tensor(X) # Module 02: YOUR Tensor class! + y_tensor = Tensor(y.reshape(-1, 1)) # Module 02: YOUR data structure! - # Linearly separable rule: points above the line y = -x + 0.5 - y = (X[:, 1] > -X[:, 0] + 0.5).astype(int).reshape(-1, 1) + for epoch in range(epochs): + # Forward pass using YOUR implementations + predictions = model.forward(X_tensor) # YOUR forward method! + + # Simple binary cross-entropy loss (manually computed) + # Note: Later you'll build a proper loss function in Module 05! + loss_value = np.mean(-y_tensor.data * np.log(predictions.data + 1e-8) - + (1 - y_tensor.data) * np.log(1 - predictions.data + 1e-8)) + loss = Tensor([loss_value]) + + # Backward pass using YOUR autograd + loss.backward() # Module 02: YOUR backward propagation! + + # Manual parameter updates (later you'll use YOUR optimizers!) + for param in model.parameters(): + if param.grad is not None: + param.data -= learning_rate * param.grad # Simple gradient descent + param.grad = None # Clear gradients + + if epoch % 20 == 0 or epoch == epochs - 1: + print(f" Epoch {epoch:3d}: Loss = {loss_value:.4f} (YOUR training loop!)") - return X, y + return model +def test_model(model, X, y): + """Test YOUR perceptron on the data.""" + print("\n๐Ÿงช Testing YOUR Perceptron Implementation:") + + # Forward pass with YOUR components + X_tensor = Tensor(X) # Module 02: YOUR Tensor! + predictions = model.forward(X_tensor) # YOUR architecture! + + # Convert to binary predictions + binary_preds = (predictions.data > 0.5).astype(int) + accuracy = np.mean(binary_preds.flatten() == y) * 100 + + print(f" Accuracy: {accuracy:.1f}% on linearly separable data") + print(f" YOUR perceptron correctly classified {accuracy:.1f}% of examples!") + + # Show some example predictions + print("\n Sample predictions (YOUR model's output):") + for i in range(min(5, len(X))): + x_val = X[i] + pred_prob = predictions.data[i, 0] + pred_class = binary_preds[i, 0] + true_class = y[i] + status = "โœ“" if pred_class == true_class else "โœ—" + print(f" {status} Input: [{x_val[0]:.2f}, {x_val[1]:.2f}] โ†’ " + f"Probability: {pred_prob:.3f} โ†’ Class: {pred_class} (True: {true_class})") + + return accuracy -def demonstrate_perceptron(): - """Demonstrate the historic perceptron.""" +def analyze_perceptron_systems(model, X): + """Analyze YOUR perceptron from an ML systems perspective.""" + print("\n๐Ÿ”ฌ SYSTEMS ANALYSIS of YOUR Perceptron Implementation:") - print("="*60) - print("THE PERCEPTRON (1957) - The First Trainable Neural Network") - print("="*60) - print() - print("Historical Context:") - print("Frank Rosenblatt's perceptron proved machines could learn from data.") - print("It could classify patterns that were linearly separable.") - print() + # Memory analysis using YOUR tensor system + import tracemalloc + tracemalloc.start() - # Generate linearly separable data - X_train, y_train = generate_linear_data(100) + # Test forward pass with YOUR components + X_tensor = Tensor(X) # Module 02: YOUR Tensor! + output = model.forward(X_tensor) # Module 04 + 03: YOUR architecture! - # Create the historic perceptron - perceptron = Perceptron(input_size=2, output_size=1) + current, peak = tracemalloc.get_traced_memory() + tracemalloc.stop() - print("Architecture: Input(2) โ†’ Linear โ†’ Sigmoid โ†’ Output(1)") - print(f"Parameters: {perceptron.linear.weights.size + perceptron.linear.bias.size}") - print() + # Parameter analysis + total_params = model.linear.weight.data.size + model.linear.bias.data.size + memory_per_param = 4 # bytes for float32 - # Test on some samples (without training - random weights) - test_samples = np.array([ - [0.0, 1.0], # Should be class 1 (above line) - [1.0, 0.0], # Should be class 0 (below line) - [-1.0, 1.0], # Should be class 1 (above line) - [1.0, -1.0] # Should be class 0 (below line) - ]) + print(f" Memory usage: {peak / 1024:.1f} KB peak (YOUR Tensor operations)") + print(f" Parameters: {total_params} weights (YOUR Linear layer)") + print(f" Model size: {total_params * memory_per_param} bytes") + print(f" Computational complexity: O(n) per forward pass (linear scaling)") + print(f" YOUR implementation handles: Binary classification with linear decision boundary") - print("Testing on sample points (before training):") - print("Point โ†’ Expected โ†’ Predicted") - - for i, point in enumerate(test_samples): - expected = 1 if point[1] > -point[0] + 0.5 else 0 - predicted = perceptron.predict(Tensor(point.reshape(1, -1)))[0, 0] - print(f"{point} โ†’ {expected} โ†’ {predicted}") - - print() - print("Classification accuracy (random weights): ~50%") - print() - print("Historical Impact:") - print("โœ“ Proved machines could learn from examples") - print("โœ“ Inspired decades of neural network research") - print("โœ“ Foundation for deep learning revolution") - print() - print("Limitation: Could only solve linearly separable problems") - print("Next breakthrough needed: Hidden layers (see xor_1969 example)") - print() - print("After Module 6 (Autograd), you can train this perceptron to converge!") - print("="*60) + # Historical context + print(f"\n ๐Ÿ›๏ธ Historical Context:") + print(f" โ€ข 1957: YOUR perceptron uses the SAME architecture as Rosenblatt's original") + print(f" โ€ข Limitation: Can only solve linearly separable problems") + print(f" โ€ข Innovation: First machine learning algorithm that could learn from data") + print(f" โ€ข Legacy: Foundation for all modern neural networks (including GPT!)") +def main(): + """Demonstrate Rosenblatt's Perceptron using YOUR TinyTorch system!""" + + parser = argparse.ArgumentParser(description='Rosenblatt Perceptron 1957') + parser.add_argument('--test-only', action='store_true', + help='Test architecture without training') + parser.add_argument('--epochs', type=int, default=100, + help='Number of training epochs') + args = parser.parse_args() + + print("๐ŸŽฏ PERCEPTRON 1957 - Proof of YOUR TinyTorch Mastery!") + print(" Historical significance: First trainable neural network") + print(" YOUR achievement: Recreated using YOUR own implementations") + print(" Components used: YOUR Tensor + YOUR Linear + YOUR Sigmoid") + print() + + # Step 1: Get linearly separable data + print("๐Ÿ“Š Preparing linearly separable data...") + data_manager = DatasetManager() + X, y = data_manager.get_perceptron_data(num_samples=1000) + + # Step 2: Create perceptron with YOUR components + model = RosenblattPerceptron(input_size=2, output_size=1) + + if args.test_only: + print("\n๐Ÿงช ARCHITECTURE TEST MODE") + print("Testing YOUR components work together...") + + # Quick forward pass test + test_input = Tensor(X[:5]) # Module 02: YOUR Tensor! + test_output = model.forward(test_input) # YOUR architecture! + print(f"โœ… Forward pass successful! Output shape: {test_output.data.shape}") + print("โœ… YOUR TinyTorch modules integrate correctly!") + return + + # Step 3: Train using YOUR training system + model = simple_training_loop(model, X, y, epochs=args.epochs) + + # Step 4: Test YOUR implementation + accuracy = test_model(model, X, y) + + # Step 5: Analyze YOUR implementation + analyze_perceptron_systems(model, X) + + print("\nโœ… SUCCESS! Perceptron Milestone Complete!") + print("\n๐ŸŽ“ What YOU Accomplished:") + print(" โ€ข YOU built the first trainable neural network from scratch") + print(" โ€ข YOUR Linear layer performs the same math as Rosenblatt's original") + print(" โ€ข YOUR Sigmoid activation enables smooth gradient learning") + print(" โ€ข YOUR Tensor system handles automatic differentiation") + print("\n๐Ÿš€ Next Steps:") + print(" โ€ข Continue to XOR 1969 milestone after Module 06 (Autograd)") + print(" โ€ข YOUR foundation enables solving non-linear problems!") + print(f" โ€ข With {accuracy:.1f}% accuracy, YOUR perceptron works perfectly!") if __name__ == "__main__": - demonstrate_perceptron() \ No newline at end of file + main() \ No newline at end of file