diff --git a/modules/layers/layers_dev.py b/modules/layers/layers_dev.py index 04f18fa3..eaac932b 100644 --- a/modules/layers/layers_dev.py +++ b/modules/layers/layers_dev.py @@ -85,25 +85,80 @@ from tinytorch.core.activations import ReLU, Sigmoid, Tanh """ ## Step 1: What is a Layer? -A **layer** is a function that transforms tensors. Think of it as: -- **Input**: Tensor with some shape -- **Transformation**: Mathematical operation (linear, nonlinear, etc.) -- **Output**: Tensor with possibly different shape +### Definition +A **layer** is a function that transforms tensors. Think of it as a mathematical operation that takes input data and produces output data: -**The fundamental insight**: Neural networks are just function composition! +``` +Input Tensor → Layer → Output Tensor +``` + +### Why Layers Matter in Neural Networks +Layers are the fundamental building blocks of all neural networks because: +- **Modularity**: Each layer has a specific job (linear transformation, nonlinearity, etc.) +- **Composability**: Layers can be combined to create complex functions +- **Learnability**: Each layer has parameters that can be learned from data +- **Interpretability**: Different layers learn different features + +### The Fundamental Insight +**Neural networks are just function composition!** ``` x → Layer1 → Layer2 → Layer3 → y ``` -**Why layers matter**: -- They're the building blocks of all neural networks -- Each layer learns a different transformation -- Composing layers creates complex functions -- Understanding layers = understanding neural networks +Each layer transforms the data, and the final output is the composition of all these transformations. + +### Real-World Examples +- **Dense Layer**: Learns linear relationships between features +- **Convolutional Layer**: Learns spatial patterns in images +- **Recurrent Layer**: Learns temporal patterns in sequences +- **Activation Layer**: Adds nonlinearity to make networks powerful + +### Visual Intuition +``` +Input: [1, 2, 3] (3 features) +Dense Layer: y = Wx + b +Weights W: [[0.1, 0.2, 0.3], + [0.4, 0.5, 0.6]] (2×3 matrix) +Bias b: [0.1, 0.2] (2 values) +Output: [0.1*1 + 0.2*2 + 0.3*3 + 0.1, + 0.4*1 + 0.5*2 + 0.6*3 + 0.2] = [1.4, 3.2] +``` Let's start with the most important layer: **Dense** (also called Linear or Fully Connected). """ +# %% [markdown] +""" +## Step 2: Understanding Matrix Multiplication + +Before we build layers, let's understand the core operation: **matrix multiplication**. This is what powers all neural network computations. + +### Why Matrix Multiplication Matters +- **Efficiency**: Process multiple inputs at once +- **Parallelization**: GPU acceleration works great with matrix operations +- **Batch processing**: Handle multiple samples simultaneously +- **Mathematical foundation**: Linear algebra is the language of neural networks + +### The Math Behind It +For matrices A (m×n) and B (n×p), the result C (m×p) is: +``` +C[i,j] = sum(A[i,k] * B[k,j] for k in range(n)) +``` + +### Visual Example +``` +A = [[1, 2], B = [[5, 6], + [3, 4]] [7, 8]] + +C = A @ B = [[1*5 + 2*7, 1*6 + 2*8], + [3*5 + 4*7, 3*6 + 4*8]] + = [[19, 22], + [43, 50]] +``` + +Let's implement this step by step! +""" + # %% #| export def matmul_naive(A: np.ndarray, B: np.ndarray) -> np.ndarray: @@ -120,6 +175,30 @@ def matmul_naive(A: np.ndarray, B: np.ndarray) -> np.ndarray: Matrix of shape (m, p) where C[i,j] = sum(A[i,k] * B[k,j] for k in range(n)) TODO: Implement matrix multiplication using three nested for-loops. + + APPROACH: + 1. Get the dimensions: m, n from A and n2, p from B + 2. Check that n == n2 (matrices must be compatible) + 3. Create output matrix C of shape (m, p) filled with zeros + 4. Use three nested loops: + - i loop: rows of A (0 to m-1) + - j loop: columns of B (0 to p-1) + - k loop: shared dimension (0 to n-1) + 5. For each (i,j), compute: C[i,j] += A[i,k] * B[k,j] + + EXAMPLE: + A = [[1, 2], B = [[5, 6], + [3, 4]] [7, 8]] + + C[0,0] = A[0,0]*B[0,0] + A[0,1]*B[1,0] = 1*5 + 2*7 = 19 + C[0,1] = A[0,0]*B[0,1] + A[0,1]*B[1,1] = 1*6 + 2*8 = 22 + C[1,0] = A[1,0]*B[0,0] + A[1,1]*B[1,0] = 3*5 + 4*7 = 43 + C[1,1] = A[1,0]*B[0,1] + A[1,1]*B[1,1] = 3*6 + 4*8 = 50 + + HINTS: + - Start with C = np.zeros((m, p)) + - Use three nested for loops: for i in range(m): for j in range(p): for k in range(n): + - Accumulate the sum: C[i,j] += A[i,k] * B[k,j] """ raise NotImplementedError("Student implementation required") @@ -143,6 +222,81 @@ def matmul_naive(A: np.ndarray, B: np.ndarray) -> np.ndarray: C[i, j] += A[i, k] * B[k, j] return C +# %% [markdown] +""" +### 🧪 Test Your Matrix Multiplication +""" + +# %% +# Test matrix multiplication +print("Testing matrix multiplication...") + +try: + # Test case 1: Simple 2x2 matrices + A = np.array([[1, 2], [3, 4]], dtype=np.float32) + B = np.array([[5, 6], [7, 8]], dtype=np.float32) + + result = matmul_naive(A, B) + expected = np.array([[19, 22], [43, 50]], dtype=np.float32) + + print(f"✅ Matrix A:\n{A}") + print(f"✅ Matrix B:\n{B}") + print(f"✅ Your result:\n{result}") + print(f"✅ Expected:\n{expected}") + + assert np.allclose(result, expected), "❌ Result doesn't match expected!" + print("🎉 Matrix multiplication works!") + + # Test case 2: Compare with NumPy + numpy_result = A @ B + assert np.allclose(result, numpy_result), "❌ Doesn't match NumPy result!" + print("✅ Matches NumPy implementation!") + +except Exception as e: + print(f"❌ Error: {e}") + print("Make sure to implement matmul_naive above!") + +# %% [markdown] +""" +## Step 3: Building the Dense Layer + +Now let's build the **Dense layer**, the most fundamental building block of neural networks. A Dense layer performs a linear transformation: `y = Wx + b` + +### What is a Dense Layer? +- **Linear transformation**: `y = Wx + b` +- **W**: Weight matrix (learnable parameters) +- **x**: Input tensor +- **b**: Bias vector (learnable parameters) +- **y**: Output tensor + +### Why Dense Layers Matter +- **Universal approximation**: Can approximate any function with enough neurons +- **Feature learning**: Each neuron learns a different feature +- **Nonlinearity**: When combined with activation functions, becomes very powerful +- **Foundation**: All other layers build on this concept + +### The Math +For input x of shape (batch_size, input_size): +- **W**: Weight matrix of shape (input_size, output_size) +- **b**: Bias vector of shape (output_size) +- **y**: Output of shape (batch_size, output_size) + +### Visual Example +``` +Input: x = [1, 2, 3] (3 features) +Weights: W = [[0.1, 0.2], Bias: b = [0.1, 0.2] + [0.3, 0.4], + [0.5, 0.6]] + +Step 1: Wx = [0.1*1 + 0.3*2 + 0.5*3, 0.2*1 + 0.4*2 + 0.6*3] + = [2.2, 3.2] + +Step 2: y = Wx + b = [2.2 + 0.1, 3.2 + 0.2] = [2.3, 3.4] +``` + +Let's implement this! +""" + # %% #| export class Dense: @@ -159,6 +313,23 @@ class Dense: use_naive_matmul: Whether to use naive matrix multiplication (for learning) TODO: Implement the Dense layer with weight initialization and forward pass. + + APPROACH: + 1. Store layer parameters (input_size, output_size, use_bias, use_naive_matmul) + 2. Initialize weights with small random values (Xavier/Glorot initialization) + 3. Initialize bias to zeros (if use_bias=True) + 4. Implement forward pass using matrix multiplication and bias addition + + EXAMPLE: + layer = Dense(input_size=3, output_size=2) + x = Tensor([[1, 2, 3]]) # batch_size=1, input_size=3 + y = layer(x) # shape: (1, 2) + + HINTS: + - Use np.random.randn() for random initialization + - Scale weights by sqrt(2/(input_size + output_size)) for Xavier init + - Store weights and bias as numpy arrays + - Use matmul_naive or @ operator based on use_naive_matmul flag """ def __init__(self, input_size: int, output_size: int, use_bias: bool = True, @@ -176,6 +347,18 @@ class Dense: 1. Store layer parameters (input_size, output_size, use_bias, use_naive_matmul) 2. Initialize weights with small random values 3. Initialize bias to zeros (if use_bias=True) + + STEP-BY-STEP: + 1. Store the parameters as instance variables + 2. Calculate scale factor for Xavier initialization: sqrt(2/(input_size + output_size)) + 3. Initialize weights: np.random.randn(input_size, output_size) * scale + 4. If use_bias=True, initialize bias: np.zeros(output_size) + 5. If use_bias=False, set bias to None + + EXAMPLE: + Dense(3, 2) creates: + - weights: shape (3, 2) with small random values + - bias: shape (2,) with zeros """ raise NotImplementedError("Student implementation required") @@ -191,8 +374,27 @@ class Dense: TODO: Implement matrix multiplication and bias addition - Use self.use_naive_matmul to choose between NumPy and naive implementation - - If use_naive_matmul=True, use matmul_naive(x.data, self.weights.data) - - If use_naive_matmul=False, use x.data @ self.weights.data + - If use_naive_matmul=True, use matmul_naive(x.data, self.weights) + - If use_naive_matmul=False, use x.data @ self.weights + - Add bias if self.use_bias=True + + STEP-BY-STEP: + 1. Perform matrix multiplication: Wx + - If use_naive_matmul: result = matmul_naive(x.data, self.weights) + - Else: result = x.data @ self.weights + 2. Add bias if use_bias: result += self.bias + 3. Return Tensor(result) + + EXAMPLE: + Input x: Tensor([[1, 2, 3]]) # shape (1, 3) + Weights: shape (3, 2) + Output: Tensor([[val1, val2]]) # shape (1, 2) + + HINTS: + - x.data gives you the numpy array + - self.weights is your weight matrix + - Use broadcasting for bias addition: result + self.bias + - Return Tensor(result) to wrap the result """ raise NotImplementedError("Student implementation required") @@ -213,40 +415,52 @@ class Dense: def __init__(self, input_size: int, output_size: int, use_bias: bool = True, use_naive_matmul: bool = False): - """Initialize Dense layer with random weights.""" + """ + Initialize Dense layer with random weights. + + Args: + input_size: Number of input features + output_size: Number of output features + use_bias: Whether to include bias term + use_naive_matmul: Use naive matrix multiplication (for learning) + """ + # Store parameters self.input_size = input_size self.output_size = output_size self.use_bias = use_bias self.use_naive_matmul = use_naive_matmul - # Initialize weights with Xavier/Glorot initialization - # This helps with gradient flow during training - limit = math.sqrt(6.0 / (input_size + output_size)) - self.weights = Tensor( - np.random.uniform(-limit, limit, (input_size, output_size)).astype(np.float32) - ) + # Xavier/Glorot initialization + scale = np.sqrt(2.0 / (input_size + output_size)) + self.weights = np.random.randn(input_size, output_size).astype(np.float32) * scale - # Initialize bias to zeros + # Initialize bias if use_bias: - self.bias = Tensor(np.zeros(output_size, dtype=np.float32)) + self.bias = np.zeros(output_size, dtype=np.float32) else: self.bias = None def forward(self, x: Tensor) -> Tensor: - """Forward pass: y = Wx + b""" - # Choose matrix multiplication implementation + """ + Forward pass: y = Wx + b + + Args: + x: Input tensor of shape (batch_size, input_size) + + Returns: + Output tensor of shape (batch_size, output_size) + """ + # Matrix multiplication if self.use_naive_matmul: - # Use naive implementation (for learning) - output = Tensor(matmul_naive(x.data, self.weights.data)) + result = matmul_naive(x.data, self.weights) else: - # Use NumPy's optimized implementation (for speed) - output = Tensor(x.data @ self.weights.data) + result = x.data @ self.weights - # Add bias if present - if self.bias is not None: - output = Tensor(output.data + self.bias.data) + # Add bias + if self.use_bias: + result += self.bias - return output + return Tensor(result) def __call__(self, x: Tensor) -> Tensor: """Make layer callable: layer(x) same as layer.forward(x)""" @@ -255,36 +469,38 @@ class Dense: # %% [markdown] """ ### 🧪 Test Your Dense Layer - -Once you implement the Dense layer above, run this cell to test it: """ # %% -# Test the Dense layer +# Test Dense layer +print("Testing Dense layer...") + try: - print("=== Testing Dense Layer ===") + # Test basic Dense layer + layer = Dense(input_size=3, output_size=2, use_bias=True) + x = Tensor([[1, 2, 3]]) # batch_size=1, input_size=3 - # Create a simple Dense layer: 3 inputs → 2 outputs - layer = Dense(input_size=3, output_size=2) - print(f"Created Dense layer: {layer.input_size} → {layer.output_size}") - print(f"Weights shape: {layer.weights.shape}") - print(f"Bias shape: {layer.bias.shape if layer.bias else 'No bias'}") + print(f"✅ Input shape: {x.shape}") + print(f"✅ Layer weights shape: {layer.weights.shape}") + print(f"✅ Layer bias shape: {layer.bias.shape}") - # Test with a single example - x = Tensor([[1.0, 2.0, 3.0]]) # Shape: (1, 3) y = layer(x) - print(f"Input shape: {x.shape}") - print(f"Output shape: {y.shape}") - print(f"Input: {x.data}") - print(f"Output: {y.data}") + print(f"✅ Output shape: {y.shape}") + print(f"✅ Output: {y}") - # Test with batch - x_batch = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) # Shape: (2, 3) - y_batch = layer(x_batch) - print(f"\nBatch input shape: {x_batch.shape}") - print(f"Batch output shape: {y_batch.shape}") + # Test without bias + layer_no_bias = Dense(input_size=2, output_size=1, use_bias=False) + x2 = Tensor([[1, 2]]) + y2 = layer_no_bias(x2) + print(f"✅ No bias output: {y2}") - print("✅ Dense layer working!") + # Test naive matrix multiplication + layer_naive = Dense(input_size=2, output_size=2, use_naive_matmul=True) + x3 = Tensor([[1, 2]]) + y3 = layer_naive(x3) + print(f"✅ Naive matmul output: {y3}") + + print("\n🎉 All Dense layer tests passed!") except Exception as e: print(f"❌ Error: {e}") @@ -292,369 +508,155 @@ except Exception as e: # %% [markdown] """ -## Step 1.5: Understanding Matrix Multiplication +## Step 4: Composing Layers with Activations -Let's compare the naive matrix multiplication with NumPy's optimized version! +Now let's see how layers work together! A neural network is just layers composed with activation functions. + +### Why Layer Composition Matters +- **Nonlinearity**: Activation functions make networks powerful +- **Feature learning**: Each layer learns different levels of features +- **Universal approximation**: Can approximate any function +- **Modularity**: Easy to experiment with different architectures + +### The Pattern +``` +Input → Dense → Activation → Dense → Activation → Output +``` + +### Real-World Example +``` +Input: [1, 2, 3] (3 features) +Dense(3→2): [1.4, 2.8] (linear transformation) +ReLU: [1.4, 2.8] (nonlinearity) +Dense(2→1): [3.2] (final prediction) +``` + +Let's build a simple network! """ # %% -# Test matrix multiplication implementations +# Test layer composition +print("Testing layer composition...") + try: - print("=== Testing Matrix Multiplication Implementations ===") + # Create a simple network: Dense → ReLU → Dense + dense1 = Dense(input_size=3, output_size=2) + relu = ReLU() + dense2 = Dense(input_size=2, output_size=1) - # Create small test matrices - A = np.array([[1, 2], [3, 4]], dtype=np.float32) # 2x2 - B = np.array([[5, 6], [7, 8]], dtype=np.float32) # 2x2 + # Test input + x = Tensor([[1, 2, 3]]) + print(f"✅ Input: {x}") - print(f"Matrix A (2x2):\n{A}") - print(f"Matrix B (2x2):\n{B}") + # Forward pass through the network + h1 = dense1(x) + print(f"✅ After Dense1: {h1}") - # Test NumPy's implementation - C_numpy = A @ B - print(f"\nNumPy result (A @ B):\n{C_numpy}") + h2 = relu(h1) + print(f"✅ After ReLU: {h2}") - # Test naive implementation - C_naive = matmul_naive(A, B) - print(f"Naive result:\n{C_naive}") + y = dense2(h2) + print(f"✅ Final output: {y}") - # Compare results - if np.allclose(C_numpy, C_naive): - print("✅ Both implementations give the same result!") - else: - print("❌ Results differ! Check your naive implementation.") - - # Show the computation step by step - print(f"\n📊 Step-by-step computation for C[0,0]:") - print(f"C[0,0] = A[0,0]*B[0,0] + A[0,1]*B[1,0]") - print(f"C[0,0] = {A[0,0]}*{B[0,0]} + {A[0,1]}*{B[1,0]}") - print(f"C[0,0] = {A[0,0]*B[0,0]} + {A[0,1]*B[1,0]}") - print(f"C[0,0] = {A[0,0]*B[0,0] + A[0,1]*B[1,0]}") - print(f"Expected: {C_numpy[0,0]}") + print("\n🎉 Layer composition works!") + print("This is how neural networks work: layers + activations!") except Exception as e: print(f"❌ Error: {e}") - print("Make sure to implement matmul_naive above!") + print("Make sure all your layers and activations are working!") + +# %% [markdown] +""" +## Step 5: Performance Comparison + +Let's compare our naive matrix multiplication with NumPy's optimized version to understand why optimization matters in ML. + +### Why Performance Matters +- **Training time**: Neural networks train for hours/days +- **Inference speed**: Real-time applications need fast predictions +- **GPU utilization**: Optimized operations use hardware efficiently +- **Scalability**: Large models need efficient implementations +""" # %% # Performance comparison +print("Comparing naive vs NumPy matrix multiplication...") + try: - print("=== Performance Comparison ===") - - # Create larger matrices for timing - size = 50 - A = np.random.randn(size, size).astype(np.float32) - B = np.random.randn(size, size).astype(np.float32) - import time - # Time NumPy implementation - start_time = time.time() - C_numpy = A @ B - numpy_time = time.time() - start_time + # Create test matrices + A = np.random.randn(100, 100).astype(np.float32) + B = np.random.randn(100, 100).astype(np.float32) # Time naive implementation start_time = time.time() - C_naive = matmul_naive(A, B) + result_naive = matmul_naive(A, B) naive_time = time.time() - start_time - print(f"Matrix size: {size}x{size}") - print(f"NumPy time: {numpy_time:.6f} seconds") - print(f"Naive time: {naive_time:.6f} seconds") - print(f"Speedup: {naive_time/numpy_time:.1f}x slower") + # Time NumPy implementation + start_time = time.time() + result_numpy = A @ B + numpy_time = time.time() - start_time - # Verify results are the same - if np.allclose(C_numpy, C_naive): - print("✅ Results are identical!") - else: - print("❌ Results differ!") + print(f"✅ Naive time: {naive_time:.4f} seconds") + print(f"✅ NumPy time: {numpy_time:.4f} seconds") + print(f"✅ Speedup: {naive_time/numpy_time:.1f}x faster") - print(f"\n💡 Why is NumPy so much faster?") - print(f" • Vectorized operations (no Python loops)") - print(f" • Optimized C/Fortran backend") - print(f" • Cache-friendly memory access") - print(f" • Parallel processing") + # Verify correctness + assert np.allclose(result_naive, result_numpy), "Results don't match!" + print("✅ Results are identical!") + + print("\n💡 This is why we use optimized libraries in production!") except Exception as e: print(f"❌ Error: {e}") - print("Make sure to implement matmul_naive above!") - -# %% -# Test Dense layer with both implementations -try: - print("=== Testing Dense Layer with Both Implementations ===") - - # Create test data - x = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) # Shape: (2, 3) - - # Test with NumPy implementation - layer_numpy = Dense(input_size=3, output_size=2, use_naive_matmul=False) - y_numpy = layer_numpy(x) - - # Test with naive implementation - layer_naive = Dense(input_size=3, output_size=2, use_naive_matmul=True) - y_naive = layer_naive(x) - - print(f"Input shape: {x.shape}") - print(f"NumPy output: {y_numpy.data}") - print(f"Naive output: {y_naive.data}") - - # Compare results - if np.allclose(y_numpy.data, y_naive.data): - print("✅ Both Dense implementations give the same result!") - else: - print("❌ Results differ! Check your implementations.") - - print(f"\n🎯 Key Insight:") - print(f" • Both implementations compute the same mathematical operation") - print(f" • NumPy is much faster but hides the computation") - print(f" • Naive implementation shows you exactly what's happening") - print(f" • Understanding the naive version helps you understand neural networks!") - -except Exception as e: - print(f"❌ Error: {e}") - print("Make sure to implement both matmul_naive and Dense layer!") # %% [markdown] """ -## Step 2: Activation Functions - Adding Nonlinearity +## 🎯 Module Summary -Now we'll use the activation functions from the **activations** module! +Congratulations! You've built the foundation of neural network layers: -**Clean Architecture**: We import the activation functions rather than redefining them: -```python -from tinytorch.core.activations import ReLU, Sigmoid, Tanh -``` +### What You've Accomplished +✅ **Matrix Multiplication**: Understanding the core operation +✅ **Dense Layer**: Linear transformation with weights and bias +✅ **Layer Composition**: Combining layers with activations +✅ **Performance Awareness**: Understanding optimization importance +✅ **Testing**: Immediate feedback on your implementations -**Why this matters**: -- **Separation of concerns**: Math functions vs. layer building blocks -- **Reusability**: Activations can be used anywhere in the system -- **Maintainability**: One place to update activation implementations -- **Composability**: Clean imports make neural networks easier to build +### Key Concepts You've Learned +- **Layers** are functions that transform tensors +- **Matrix multiplication** powers all neural network computations +- **Dense layers** perform linear transformations: `y = Wx + b` +- **Layer composition** creates complex functions from simple building blocks +- **Performance** matters for real-world ML applications -**Why nonlinearity matters**: Without it, stacking layers is pointless! -``` -Linear → Linear → Linear = Just one big Linear transformation -Linear → NonLinear → Linear = Can learn complex patterns -``` -""" +### What's Next +In the next modules, you'll build on this foundation: +- **Networks**: Compose layers into complete models +- **Training**: Learn parameters with gradients and optimization +- **Convolutional layers**: Process spatial data like images +- **Recurrent layers**: Process sequential data like text -# %% [markdown] -""" -### 🧪 Test Activation Functions from Activations Module +### Real-World Connection +Your Dense layer is now ready to: +- Learn patterns in data through weight updates +- Transform features for classification and regression +- Serve as building blocks for complex architectures +- Integrate with the rest of the TinyTorch ecosystem -Let's test that we can use the activation functions from the activations module: +**Ready for the next challenge?** Let's move on to building complete neural networks! """ # %% -# Test activation functions from activations module -try: - print("=== Testing Activation Functions from Activations Module ===") - - # Test data: mix of positive, negative, and zero - x = Tensor([[-2.0, -1.0, 0.0, 1.0, 2.0]]) - print(f"Input: {x.data}") - - # Test ReLU from activations module - relu = ReLU() - y_relu = relu(x) - print(f"ReLU output: {y_relu.data}") - - # Test Sigmoid from activations module - sigmoid = Sigmoid() - y_sigmoid = sigmoid(x) - print(f"Sigmoid output: {y_sigmoid.data}") - - # Test Tanh from activations module - tanh = Tanh() - y_tanh = tanh(x) - print(f"Tanh output: {y_tanh.data}") - - print("✅ Activation functions from activations module working!") - print("🎉 Clean architecture: layers module uses activations module!") - -except Exception as e: - print(f"❌ Error: {e}") - print("Make sure the activations module is properly exported!") - -# %% [markdown] -""" -## Step 3: Layer Composition - Building Neural Networks - -Now comes the magic! We can **compose** layers to build neural networks: - -``` -Input → Dense → ReLU → Dense → Sigmoid → Output -``` - -This is a 2-layer neural network that can learn complex nonlinear patterns! - -**Notice the clean architecture**: -- Dense layers handle linear transformations -- Activation functions (from activations module) handle nonlinearity -- Composition creates complex behaviors from simple building blocks -""" - -# %% -# Build a simple 2-layer neural network -try: - print("=== Building a 2-Layer Neural Network ===") - - # Network architecture: 3 → 4 → 2 - # Input: 3 features - # Hidden: 4 neurons with ReLU - # Output: 2 neurons with Sigmoid - - layer1 = Dense(input_size=3, output_size=4) - activation1 = ReLU() # From activations module - layer2 = Dense(input_size=4, output_size=2) - activation2 = Sigmoid() # From activations module - - print("Network architecture:") - print(f" Input: 3 features") - print(f" Hidden: {layer1.input_size} → {layer1.output_size} (Dense + ReLU)") - print(f" Output: {layer2.input_size} → {layer2.output_size} (Dense + Sigmoid)") - - # Test with sample data - x = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) # 2 examples, 3 features each - print(f"\nInput shape: {x.shape}") - print(f"Input data: {x.data}") - - # Forward pass through the network - h1 = layer1(x) # Dense layer 1 - h1_activated = activation1(h1) # ReLU activation - h2 = layer2(h1_activated) # Dense layer 2 - output = activation2(h2) # Sigmoid activation - - print(f"\nAfter layer 1: {h1.shape}") - print(f"After ReLU: {h1_activated.shape}") - print(f"After layer 2: {h2.shape}") - print(f"Final output: {output.shape}") - print(f"Output values: {output.data}") - - print("\n🎉 Neural network working! You just built your first neural network!") - print("🏗️ Clean architecture: Dense layers + Activations module = Neural Network") - print("Notice how the network transforms 3D input into 2D output through learned transformations.") - -except Exception as e: - print(f"❌ Error: {e}") - print("Make sure to implement the layers and check activations module!") - -# %% [markdown] -""" -## Step 4: Understanding What We Built - -Congratulations! You just implemented a clean, modular neural network architecture: - -### 🧱 **What You Built** -1. **Dense Layer**: Linear transformation `y = Wx + b` -2. **Activation Functions**: Imported from activations module (ReLU, Sigmoid, Tanh) -3. **Layer Composition**: Chaining layers to build networks - -### 🏗️ **Clean Architecture Benefits** -- **Separation of concerns**: Math functions vs. layer building blocks -- **Reusability**: Activations can be used across different modules -- **Maintainability**: One place to update activation implementations -- **Composability**: Clean imports make complex networks easier to build - -### 🎯 **Key Insights** -- **Layers are functions**: They transform tensors from one space to another -- **Composition creates complexity**: Simple layers → complex networks -- **Nonlinearity is crucial**: Without it, deep networks are just linear transformations -- **Neural networks are function approximators**: They learn to map inputs to outputs -- **Modular design**: Building blocks can be combined in many ways - -### 🚀 **What's Next** -In the next modules, you'll learn: -- **Training**: How networks learn from data (backpropagation, optimizers) -- **Architectures**: Specialized layers for different problems (CNNs, RNNs) -- **Applications**: Using networks for real problems - -### 🔧 **Export to Package** -Run this to export your layers to the TinyTorch package: -```bash -python bin/tito.py sync -``` - -Then test your implementation: -```bash -python bin/tito.py test --module layers -``` - -**Great job! You've built a clean, modular foundation for neural networks!** 🎉 -""" - -# %% -# Final demonstration: A more complex example -try: - print("=== Final Demo: Image Classification Network ===") - - # Simulate a small image: 28x28 pixels flattened to 784 features - # This is like a tiny MNIST digit - image_size = 28 * 28 # 784 pixels - num_classes = 10 # 10 digits (0-9) - - # Build a 3-layer network for digit classification - # 784 → 128 → 64 → 10 - layer1 = Dense(input_size=image_size, output_size=128) - relu1 = ReLU() # From activations module - layer2 = Dense(input_size=128, output_size=64) - relu2 = ReLU() # From activations module - layer3 = Dense(input_size=64, output_size=num_classes) - softmax = Sigmoid() # Using Sigmoid as a simple "probability-like" output - - print(f"Image classification network:") - print(f" Input: {image_size} pixels (28x28 image)") - print(f" Hidden 1: {layer1.input_size} → {layer1.output_size} (Dense + ReLU)") - print(f" Hidden 2: {layer2.input_size} → {layer2.output_size} (Dense + ReLU)") - print(f" Output: {layer3.input_size} → {layer3.output_size} (Dense + Sigmoid)") - - # Simulate a batch of 5 images - batch_size = 5 - fake_images = Tensor(np.random.randn(batch_size, image_size).astype(np.float32)) - - # Forward pass - h1 = relu1(layer1(fake_images)) - h2 = relu2(layer2(h1)) - predictions = softmax(layer3(h2)) - - print(f"\nBatch processing:") - print(f" Input batch shape: {fake_images.shape}") - print(f" Predictions shape: {predictions.shape}") - print(f" Sample predictions: {predictions.data[0]}") # First image predictions - - print("\n🎉 You built a neural network that could classify images!") - print("🏗️ Clean architecture: Dense layers + Activations module = Image Classifier") - print("With training, this network could learn to recognize handwritten digits!") - -except Exception as e: - print(f"❌ Error: {e}") - print("Check your layer implementations and activations module!") - -# %% [markdown] -""" -## 🎓 Module Summary - -### What You Learned -1. **Layer Architecture**: Dense layers as linear transformations -2. **Clean Dependencies**: Layers module uses activations module -3. **Function Composition**: Simple building blocks → complex networks -4. **Modular Design**: Separation of concerns for maintainable code - -### Key Architectural Insight -``` -activations (math functions) → layers (building blocks) → networks (applications) -``` - -This clean dependency graph makes the system: -- **Understandable**: Each module has a clear purpose -- **Testable**: Each module can be tested independently -- **Reusable**: Components can be used across different contexts -- **Maintainable**: Changes are localized to appropriate modules - -### Next Steps -- **Training**: Learn how networks learn from data -- **Advanced Architectures**: CNNs, RNNs, Transformers -- **Applications**: Real-world machine learning problems - -**Congratulations on building a clean, modular neural network foundation!** 🚀 -""" \ No newline at end of file +# Final verification +print("\n" + "="*50) +print("🎉 LAYERS MODULE COMPLETE!") +print("="*50) +print("✅ Matrix multiplication understanding") +print("✅ Dense layer implementation") +print("✅ Layer composition with activations") +print("✅ Performance awareness") +print("✅ Comprehensive testing") +print("\n🚀 Ready to build networks in the next module!") \ No newline at end of file