From e2c659023dadde7bef45d65488f10acb4899ff3d Mon Sep 17 00:00:00 2001 From: Vijay Janapa Reddi Date: Thu, 10 Jul 2025 20:30:31 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=A7=B1=20Implement=20Layers=20module=20-?= =?UTF-8?q?=20Neural=20Network=20Building=20Blocks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit โœจ Features: - Dense layer with Xavier initialization (y = Wx + b) - Activation functions: ReLU, Sigmoid, Tanh - Layer composition for building neural networks - Comprehensive test suite (17 passed, 5 skipped stretch goals) - Package-level integration tests (14 passed) - Complete documentation and examples ๐ŸŽฏ Educational Design: - Follows 'Build โ†’ Use โ†’ Understand' pedagogical framework - Immediate visual feedback with working examples - Progressive complexity from simple layers to full networks - Students see neural networks as function composition ๐Ÿงช Testing Architecture: - Module tests: 17/17 core tests pass, 5 stretch goals available - Package tests: 14/14 integration tests pass - Dual testing supports both learning and validation ๐Ÿ“š Complete Implementation: - Dense layer with proper weight initialization - Numerically stable activation functions - Batch processing support - Real-world examples (image classification network) - CLI integration: 'tito test --module layers' This establishes the fundamental building blocks students need to understand neural networks before diving into training. --- bin/tito.py | 2 +- modules/layers/README.md | 206 ++++++++ modules/layers/layers_dev.ipynb | 701 ++++++++++++++++++++++++++++ modules/layers/layers_dev.py | 548 ++++++++++++++++++++++ modules/layers/tests/test_layers.py | 343 ++++++++++++++ tests/test_layers.py | 242 ++++++++++ tinytorch/core/layers.py | 238 ++++++++++ 7 files changed, 2279 insertions(+), 1 deletion(-) create mode 100644 modules/layers/README.md create mode 100644 modules/layers/layers_dev.ipynb create mode 100644 modules/layers/layers_dev.py create mode 100644 modules/layers/tests/test_layers.py create mode 100644 tests/test_layers.py create mode 100644 tinytorch/core/layers.py diff --git a/bin/tito.py b/bin/tito.py index ac61ec15..8f5e0a77 100755 --- a/bin/tito.py +++ b/bin/tito.py @@ -343,7 +343,7 @@ def cmd_info(args): def cmd_test(args): """Run tests for a specific module.""" - valid_modules = ["setup", "tensor", "mlp", "cnn", "data", "training", + valid_modules = ["setup", "tensor", "layers", "cnn", "data", "training", "profiling", "compression", "kernels", "benchmarking", "mlops"] if args.all: diff --git a/modules/layers/README.md b/modules/layers/README.md new file mode 100644 index 00000000..6d62b701 --- /dev/null +++ b/modules/layers/README.md @@ -0,0 +1,206 @@ +# ๐Ÿงฑ Module 2: Layers - Neural Network Building Blocks + +**Build the fundamental transformations that compose into neural networks** + +## ๐ŸŽฏ Learning Objectives + +After completing this module, you will: +- Understand layers as functions that transform tensors: `y = f(x)` +- Implement Dense layers with linear transformations: `y = Wx + b` +- Add activation functions for nonlinearity (ReLU, Sigmoid, Tanh) +- See how neural networks are just function composition +- Build intuition for neural network architecture before diving into training + +## ๐Ÿงฑ Build โ†’ Use โ†’ Understand + +This module follows the TinyTorch pedagogical framework: + +1. **Build**: Dense layers and activation functions from scratch +2. **Use**: Transform tensors and see immediate results +3. **Understand**: How neural networks transform information + +## ๐Ÿ“š What You'll Build + +### **Dense Layer** +```python +layer = Dense(input_size=3, output_size=2) +x = Tensor([[1.0, 2.0, 3.0]]) +y = layer(x) # Shape: (1, 2) +``` + +### **Activation Functions** +```python +relu = ReLU() +sigmoid = Sigmoid() +tanh = Tanh() + +x = Tensor([[-1.0, 0.0, 1.0]]) +y_relu = relu(x) # [0.0, 0.0, 1.0] +y_sigmoid = sigmoid(x) # [0.27, 0.5, 0.73] +y_tanh = tanh(x) # [-0.76, 0.0, 0.76] +``` + +### **Neural Networks** +```python +# 3 โ†’ 4 โ†’ 2 network +layer1 = Dense(input_size=3, output_size=4) +activation1 = ReLU() +layer2 = Dense(input_size=4, output_size=2) +activation2 = Sigmoid() + +# Forward pass +x = Tensor([[1.0, 2.0, 3.0]]) +h1 = layer1(x) +h1_activated = activation1(h1) +h2 = layer2(h1_activated) +output = activation2(h2) +``` + +## ๐Ÿš€ Getting Started + +### Prerequisites +- Complete Module 1: Tensor โœ… +- Understand basic linear algebra (matrix multiplication) +- Familiar with Python classes and methods + +### Quick Start +```bash +# Navigate to the layers module +cd modules/layers + +# Work in the development notebook +jupyter notebook layers_dev.ipynb + +# Or work in the Python file +code layers_dev.py +``` + +## ๐Ÿ“– Module Structure + +``` +modules/layers/ +โ”œโ”€โ”€ layers_dev.py # Main development file (work here!) +โ”œโ”€โ”€ layers_dev.ipynb # Jupyter notebook version +โ”œโ”€โ”€ tests/ +โ”‚ โ””โ”€โ”€ test_layers.py # Comprehensive tests +โ”œโ”€โ”€ README.md # This file +โ””โ”€โ”€ solutions/ # Reference implementations (if stuck) +``` + +## ๐ŸŽ“ Learning Path + +### Step 1: Dense Layer (Linear Transformation) +- Understand `y = Wx + b` +- Implement weight initialization +- Handle matrix multiplication and bias addition +- Test with single examples and batches + +### Step 2: Activation Functions +- Implement ReLU: `max(0, x)` +- Implement Sigmoid: `1 / (1 + e^(-x))` +- Implement Tanh: `tanh(x)` +- Understand why nonlinearity is crucial + +### Step 3: Layer Composition +- Chain layers together +- Build complete neural networks +- See how simple layers create complex functions + +### Step 4: Real-World Application +- Build an image classification network +- Understand how architecture affects capability + +## ๐Ÿงช Testing Your Implementation + +### Module-Level Tests +```bash +# Run comprehensive tests +python -m pytest tests/test_layers.py -v + +# Quick test +python -c "from layers_dev import Dense, ReLU; print('โœ… Layers working!')" +``` + +### Package-Level Tests +```bash +# Export to package +python ../../bin/tito.py sync + +# Test integration +python ../../bin/tito.py test --module layers +``` + +## ๐ŸŽฏ Key Concepts + +### **Layers as Functions** +- Input: Tensor with some shape +- Transformation: Mathematical operation +- Output: Tensor with possibly different shape + +### **Linear vs Nonlinear** +- Dense layers: Linear transformations +- Activation functions: Nonlinear transformations +- Composition: Linear + Nonlinear = Complex functions + +### **Neural Networks = Function Composition** +``` +Input โ†’ Dense โ†’ ReLU โ†’ Dense โ†’ Sigmoid โ†’ Output +``` + +### **Why This Matters** +- **Modularity**: Build complex networks from simple parts +- **Reusability**: Same layers work for different problems +- **Understanding**: Know how each part contributes to the whole + +## ๐Ÿ” Common Issues + +### **Import Errors** +```python +# Make sure you're in the right directory +import sys +sys.path.append('../../') +from modules.tensor.tensor_dev import Tensor +``` + +### **Shape Mismatches** +```python +# Check input/output sizes match +layer1 = Dense(input_size=3, output_size=4) +layer2 = Dense(input_size=4, output_size=2) # 4 matches output of layer1 +``` + +### **Gradient Issues (Later)** +```python +# Use proper weight initialization +limit = math.sqrt(6.0 / (input_size + output_size)) +weights = np.random.uniform(-limit, limit, (input_size, output_size)) +``` + +## ๐ŸŽ‰ Success Criteria + +You've successfully completed this module when: +- โœ… All tests pass (`pytest tests/test_layers.py`) +- โœ… You can build a 2-layer neural network +- โœ… You understand how layers transform tensors +- โœ… You see the connection between layers and neural networks +- โœ… Package export works (`tito test --module layers`) + +## ๐Ÿš€ What's Next + +After completing this module, you're ready for: +- **Module 3: Networks** - Compose layers into common architectures +- **Module 4: Training** - Learn how networks improve through experience +- **Module 5: Applications** - Use networks for real problems + +## ๐Ÿค Getting Help + +- Check the tests for examples of expected behavior +- Look at the solutions/ directory if you're stuck +- Review the pedagogical principles in `docs/pedagogy/` +- Remember: Build โ†’ Use โ†’ Understand! + +--- + +**Great job building the foundation of neural networks!** ๐ŸŽ‰ + +*This module implements the core insight: neural networks are just function composition of simple building blocks.* \ No newline at end of file diff --git a/modules/layers/layers_dev.ipynb b/modules/layers/layers_dev.ipynb new file mode 100644 index 00000000..b19410bf --- /dev/null +++ b/modules/layers/layers_dev.ipynb @@ -0,0 +1,701 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2843fa68", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "# Module 2: Layers - Neural Network Building Blocks\n", + "\n", + "Welcome to the Layers module! This is where neural networks begin. You'll implement the fundamental building blocks that transform tensors.\n", + "\n", + "## Learning Goals\n", + "- Understand layers as functions that transform tensors: `y = f(x)`\n", + "- Implement Dense layers with linear transformations: `y = Wx + b`\n", + "- Add activation functions for nonlinearity (ReLU, Sigmoid, Tanh)\n", + "- See how neural networks are just function composition\n", + "- Build intuition before diving into training\n", + "\n", + "## Build โ†’ Use โ†’ Understand\n", + "1. **Build**: Dense layers and activation functions\n", + "2. **Use**: Transform tensors and see immediate results\n", + "3. **Understand**: How neural networks transform information\n", + "\n", + "## Module โ†’ Package Structure\n", + "**๐ŸŽ“ Teaching vs. ๐Ÿ”ง Building**: \n", + "- **Learning side**: Work in `modules/layers/layers_dev.py` \n", + "- **Building side**: Exports to `tinytorch/core/layers.py`\n", + "\n", + "This module builds the fundamental transformations that compose into neural networks." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9d285d84", + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp core.layers\n", + "\n", + "# Setup and imports\n", + "import numpy as np\n", + "import sys\n", + "from typing import Union, Optional, Callable\n", + "import math" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a12b7f36", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "import numpy as np\n", + "import math\n", + "import sys\n", + "from typing import Union, Optional, Callable\n", + "from tinytorch.core.tensor import Tensor\n", + "\n", + "# Import our Tensor class\n", + "# sys.path.append('../../')\n", + "# from modules.tensor.tensor_dev import Tensor\n", + "\n", + "# print(\"๐Ÿ”ฅ TinyTorch Layers Module\")\n", + "# print(f\"NumPy version: {np.__version__}\")\n", + "# print(f\"Python version: {sys.version_info.major}.{sys.version_info.minor}\")\n", + "# print(\"Ready to build neural network layers!\")" + ] + }, + { + "cell_type": "markdown", + "id": "1b8b760c", + "metadata": { + "cell_marker": "\"\"\"", + "lines_to_next_cell": 1 + }, + "source": [ + "## Step 1: What is a Layer?\n", + "\n", + "A **layer** is a function that transforms tensors. Think of it as:\n", + "- **Input**: Tensor with some shape\n", + "- **Transformation**: Mathematical operation (linear, nonlinear, etc.)\n", + "- **Output**: Tensor with possibly different shape\n", + "\n", + "**The fundamental insight**: Neural networks are just function composition!\n", + "```\n", + "x โ†’ Layer1 โ†’ Layer2 โ†’ Layer3 โ†’ y\n", + "```\n", + "\n", + "**Why layers matter**:\n", + "- They're the building blocks of all neural networks\n", + "- Each layer learns a different transformation\n", + "- Composing layers creates complex functions\n", + "- Understanding layers = understanding neural networks\n", + "\n", + "Let's start with the most important layer: **Dense** (also called Linear or Fully Connected)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fabf403c", + "metadata": { + "lines_to_next_cell": 1 + }, + "outputs": [], + "source": [ + "#| export\n", + "class Dense:\n", + " \"\"\"\n", + " Dense (Linear) Layer: y = Wx + b\n", + " \n", + " The fundamental building block of neural networks.\n", + " Performs linear transformation: matrix multiplication + bias addition.\n", + " \n", + " Args:\n", + " input_size: Number of input features\n", + " output_size: Number of output features\n", + " use_bias: Whether to include bias term (default: True)\n", + " \n", + " TODO: Implement the Dense layer with weight initialization and forward pass.\n", + " \"\"\"\n", + " \n", + " def __init__(self, input_size: int, output_size: int, use_bias: bool = True):\n", + " \"\"\"\n", + " Initialize Dense layer with random weights.\n", + " \n", + " TODO: \n", + " 1. Store layer parameters (input_size, output_size, use_bias)\n", + " 2. Initialize weights with small random values\n", + " 3. Initialize bias to zeros (if use_bias=True)\n", + " \"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")\n", + " \n", + " def forward(self, x: Tensor) -> Tensor:\n", + " \"\"\"\n", + " Forward pass: y = Wx + b\n", + " \n", + " Args:\n", + " x: Input tensor of shape (batch_size, input_size)\n", + " \n", + " Returns:\n", + " Output tensor of shape (batch_size, output_size)\n", + " \n", + " TODO: Implement matrix multiplication and bias addition\n", + " \"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")\n", + " \n", + " def __call__(self, x: Tensor) -> Tensor:\n", + " \"\"\"Make layer callable: layer(x) same as layer.forward(x)\"\"\"\n", + " return self.forward(x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "718aafe5", + "metadata": { + "lines_to_next_cell": 1 + }, + "outputs": [], + "source": [ + "#| hide\n", + "#| export\n", + "class Dense:\n", + " \"\"\"\n", + " Dense (Linear) Layer: y = Wx + b\n", + " \n", + " The fundamental building block of neural networks.\n", + " Performs linear transformation: matrix multiplication + bias addition.\n", + " \"\"\"\n", + " \n", + " def __init__(self, input_size: int, output_size: int, use_bias: bool = True):\n", + " \"\"\"Initialize Dense layer with random weights.\"\"\"\n", + " self.input_size = input_size\n", + " self.output_size = output_size\n", + " self.use_bias = use_bias\n", + " \n", + " # Initialize weights with Xavier/Glorot initialization\n", + " # This helps with gradient flow during training\n", + " limit = math.sqrt(6.0 / (input_size + output_size))\n", + " self.weights = Tensor(\n", + " np.random.uniform(-limit, limit, (input_size, output_size)).astype(np.float32)\n", + " )\n", + " \n", + " # Initialize bias to zeros\n", + " if use_bias:\n", + " self.bias = Tensor(np.zeros(output_size, dtype=np.float32))\n", + " else:\n", + " self.bias = None\n", + " \n", + " def forward(self, x: Tensor) -> Tensor:\n", + " \"\"\"Forward pass: y = Wx + b\"\"\"\n", + " # Matrix multiplication: x @ weights\n", + " # x shape: (batch_size, input_size)\n", + " # weights shape: (input_size, output_size)\n", + " # result shape: (batch_size, output_size)\n", + " output = Tensor(x.data @ self.weights.data)\n", + " \n", + " # Add bias if present\n", + " if self.bias is not None:\n", + " output = Tensor(output.data + self.bias.data)\n", + " \n", + " return output\n", + " \n", + " def __call__(self, x: Tensor) -> Tensor:\n", + " \"\"\"Make layer callable: layer(x) same as layer.forward(x)\"\"\"\n", + " return self.forward(x)" + ] + }, + { + "cell_type": "markdown", + "id": "54390574", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "### ๐Ÿงช Test Your Dense Layer\n", + "\n", + "Once you implement the Dense layer above, run this cell to test it:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c24b9bc7", + "metadata": {}, + "outputs": [], + "source": [ + "# Test the Dense layer\n", + "try:\n", + " print(\"=== Testing Dense Layer ===\")\n", + " \n", + " # Create a simple Dense layer: 3 inputs โ†’ 2 outputs\n", + " layer = Dense(input_size=3, output_size=2)\n", + " print(f\"Created Dense layer: {layer.input_size} โ†’ {layer.output_size}\")\n", + " print(f\"Weights shape: {layer.weights.shape}\")\n", + " print(f\"Bias shape: {layer.bias.shape if layer.bias else 'No bias'}\")\n", + " \n", + " # Test with a single example\n", + " x = Tensor([[1.0, 2.0, 3.0]]) # Shape: (1, 3)\n", + " y = layer(x)\n", + " print(f\"Input shape: {x.shape}\")\n", + " print(f\"Output shape: {y.shape}\")\n", + " print(f\"Input: {x.data}\")\n", + " print(f\"Output: {y.data}\")\n", + " \n", + " # Test with batch of examples\n", + " x_batch = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) # Shape: (2, 3)\n", + " y_batch = layer(x_batch)\n", + " print(f\"\\nBatch input shape: {x_batch.shape}\")\n", + " print(f\"Batch output shape: {y_batch.shape}\")\n", + " print(f\"Batch output: {y_batch.data}\")\n", + " \n", + " print(\"โœ… Dense layer working!\")\n", + " \n", + "except Exception as e:\n", + " print(f\"โŒ Error: {e}\")\n", + " print(\"Make sure to implement the Dense layer above!\")" + ] + }, + { + "cell_type": "markdown", + "id": "50ccc78d", + "metadata": { + "cell_marker": "\"\"\"", + "lines_to_next_cell": 1 + }, + "source": [ + "## Step 2: Activation Functions\n", + "\n", + "Dense layers alone can only learn **linear** transformations. But most real-world problems need **nonlinear** transformations.\n", + "\n", + "**Activation functions** add nonlinearity:\n", + "- **ReLU**: `max(0, x)` - Most common, simple and effective\n", + "- **Sigmoid**: `1 / (1 + e^(-x))` - Squashes to (0, 1)\n", + "- **Tanh**: `tanh(x)` - Squashes to (-1, 1)\n", + "\n", + "**Why nonlinearity matters**: Without it, stacking layers is pointless!\n", + "```\n", + "Linear โ†’ Linear โ†’ Linear = Just one big Linear transformation\n", + "Linear โ†’ NonLinear โ†’ Linear = Can learn complex patterns\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "85818dc3", + "metadata": { + "lines_to_next_cell": 1 + }, + "outputs": [], + "source": [ + "#| export\n", + "class ReLU:\n", + " \"\"\"\n", + " ReLU Activation: f(x) = max(0, x)\n", + " \n", + " The most popular activation function in deep learning.\n", + " Simple, effective, and computationally efficient.\n", + " \n", + " TODO: Implement ReLU activation function.\n", + " \"\"\"\n", + " \n", + " def forward(self, x: Tensor) -> Tensor:\n", + " \"\"\"\n", + " Apply ReLU: f(x) = max(0, x)\n", + " \n", + " Args:\n", + " x: Input tensor\n", + " \n", + " Returns:\n", + " Output tensor with ReLU applied element-wise\n", + " \n", + " TODO: Implement element-wise max(0, x) operation\n", + " \"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")\n", + " \n", + " def __call__(self, x: Tensor) -> Tensor:\n", + " \"\"\"Make activation callable: relu(x) same as relu.forward(x)\"\"\"\n", + " return self.forward(x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23e807f1", + "metadata": { + "lines_to_next_cell": 1 + }, + "outputs": [], + "source": [ + "#| hide\n", + "#| export\n", + "class ReLU:\n", + " \"\"\"ReLU Activation: f(x) = max(0, x)\"\"\"\n", + " \n", + " def forward(self, x: Tensor) -> Tensor:\n", + " \"\"\"Apply ReLU: f(x) = max(0, x)\"\"\"\n", + " return Tensor(np.maximum(0, x.data))\n", + " \n", + " def __call__(self, x: Tensor) -> Tensor:\n", + " return self.forward(x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3c0bb26a", + "metadata": { + "lines_to_next_cell": 1 + }, + "outputs": [], + "source": [ + "#| export\n", + "class Sigmoid:\n", + " \"\"\"\n", + " Sigmoid Activation: f(x) = 1 / (1 + e^(-x))\n", + " \n", + " Squashes input to range (0, 1). Often used for binary classification.\n", + " \n", + " TODO: Implement Sigmoid activation function.\n", + " \"\"\"\n", + " \n", + " def forward(self, x: Tensor) -> Tensor:\n", + " \"\"\"\n", + " Apply Sigmoid: f(x) = 1 / (1 + e^(-x))\n", + " \n", + " Args:\n", + " x: Input tensor\n", + " \n", + " Returns:\n", + " Output tensor with Sigmoid applied element-wise\n", + " \n", + " TODO: Implement sigmoid function (be careful with numerical stability!)\n", + " \"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")\n", + " \n", + " def __call__(self, x: Tensor) -> Tensor:\n", + " return self.forward(x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "972e9668", + "metadata": { + "lines_to_next_cell": 1 + }, + "outputs": [], + "source": [ + "#| hide\n", + "#| export\n", + "class Sigmoid:\n", + " \"\"\"Sigmoid Activation: f(x) = 1 / (1 + e^(-x))\"\"\"\n", + " \n", + " def forward(self, x: Tensor) -> Tensor:\n", + " \"\"\"Apply Sigmoid with numerical stability\"\"\"\n", + " # Use the numerically stable version to avoid overflow\n", + " # For x >= 0: sigmoid(x) = 1 / (1 + exp(-x))\n", + " # For x < 0: sigmoid(x) = exp(x) / (1 + exp(x))\n", + " x_data = x.data\n", + " result = np.zeros_like(x_data)\n", + " \n", + " # Stable computation\n", + " positive_mask = x_data >= 0\n", + " result[positive_mask] = 1.0 / (1.0 + np.exp(-x_data[positive_mask]))\n", + " result[~positive_mask] = np.exp(x_data[~positive_mask]) / (1.0 + np.exp(x_data[~positive_mask]))\n", + " \n", + " return Tensor(result)\n", + " \n", + " def __call__(self, x: Tensor) -> Tensor:\n", + " return self.forward(x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2babe8a8", + "metadata": { + "lines_to_next_cell": 1 + }, + "outputs": [], + "source": [ + "#| export\n", + "class Tanh:\n", + " \"\"\"\n", + " Tanh Activation: f(x) = tanh(x)\n", + " \n", + " Squashes input to range (-1, 1). Zero-centered output.\n", + " \n", + " TODO: Implement Tanh activation function.\n", + " \"\"\"\n", + " \n", + " def forward(self, x: Tensor) -> Tensor:\n", + " \"\"\"\n", + " Apply Tanh: f(x) = tanh(x)\n", + " \n", + " Args:\n", + " x: Input tensor\n", + " \n", + " Returns:\n", + " Output tensor with Tanh applied element-wise\n", + " \n", + " TODO: Implement tanh function\n", + " \"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")\n", + " \n", + " def __call__(self, x: Tensor) -> Tensor:\n", + " return self.forward(x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5eff4e44", + "metadata": { + "lines_to_next_cell": 1 + }, + "outputs": [], + "source": [ + "#| hide\n", + "#| export\n", + "class Tanh:\n", + " \"\"\"Tanh Activation: f(x) = tanh(x)\"\"\"\n", + " \n", + " def forward(self, x: Tensor) -> Tensor:\n", + " \"\"\"Apply Tanh\"\"\"\n", + " return Tensor(np.tanh(x.data))\n", + " \n", + " def __call__(self, x: Tensor) -> Tensor:\n", + " return self.forward(x)" + ] + }, + { + "cell_type": "markdown", + "id": "c39e4420", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "### ๐Ÿงช Test Your Activation Functions\n", + "\n", + "Once you implement the activation functions above, run this cell to test them:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f73687cc", + "metadata": {}, + "outputs": [], + "source": [ + "# Test activation functions\n", + "try:\n", + " print(\"=== Testing Activation Functions ===\")\n", + " \n", + " # Test data: mix of positive, negative, and zero\n", + " x = Tensor([[-2.0, -1.0, 0.0, 1.0, 2.0]])\n", + " print(f\"Input: {x.data}\")\n", + " \n", + " # Test ReLU\n", + " relu = ReLU()\n", + " y_relu = relu(x)\n", + " print(f\"ReLU output: {y_relu.data}\")\n", + " \n", + " # Test Sigmoid\n", + " sigmoid = Sigmoid()\n", + " y_sigmoid = sigmoid(x)\n", + " print(f\"Sigmoid output: {y_sigmoid.data}\")\n", + " \n", + " # Test Tanh\n", + " tanh = Tanh()\n", + " y_tanh = tanh(x)\n", + " print(f\"Tanh output: {y_tanh.data}\")\n", + " \n", + " print(\"โœ… Activation functions working!\")\n", + " \n", + "except Exception as e:\n", + " print(f\"โŒ Error: {e}\")\n", + " print(\"Make sure to implement the activation functions above!\")" + ] + }, + { + "cell_type": "markdown", + "id": "ec82e933", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "## Step 3: Layer Composition - Building Neural Networks\n", + "\n", + "Now comes the magic! We can **compose** layers to build neural networks:\n", + "\n", + "```\n", + "Input โ†’ Dense โ†’ ReLU โ†’ Dense โ†’ Sigmoid โ†’ Output\n", + "```\n", + "\n", + "This is a 2-layer neural network that can learn complex nonlinear patterns!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "06c5692f", + "metadata": {}, + "outputs": [], + "source": [ + "# Build a simple 2-layer neural network\n", + "try:\n", + " print(\"=== Building a 2-Layer Neural Network ===\")\n", + " \n", + " # Network architecture: 3 โ†’ 4 โ†’ 2\n", + " # Input: 3 features\n", + " # Hidden: 4 neurons with ReLU\n", + " # Output: 2 neurons with Sigmoid\n", + " \n", + " layer1 = Dense(input_size=3, output_size=4)\n", + " activation1 = ReLU()\n", + " layer2 = Dense(input_size=4, output_size=2)\n", + " activation2 = Sigmoid()\n", + " \n", + " print(\"Network architecture:\")\n", + " print(f\" Input: 3 features\")\n", + " print(f\" Hidden: {layer1.input_size} โ†’ {layer1.output_size} (Dense + ReLU)\")\n", + " print(f\" Output: {layer2.input_size} โ†’ {layer2.output_size} (Dense + Sigmoid)\")\n", + " \n", + " # Test with sample data\n", + " x = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) # 2 examples, 3 features each\n", + " print(f\"\\nInput shape: {x.shape}\")\n", + " print(f\"Input data: {x.data}\")\n", + " \n", + " # Forward pass through the network\n", + " h1 = layer1(x) # Dense layer 1\n", + " h1_activated = activation1(h1) # ReLU activation\n", + " h2 = layer2(h1_activated) # Dense layer 2 \n", + " output = activation2(h2) # Sigmoid activation\n", + " \n", + " print(f\"\\nAfter layer 1: {h1.shape}\")\n", + " print(f\"After ReLU: {h1_activated.shape}\")\n", + " print(f\"After layer 2: {h2.shape}\")\n", + " print(f\"Final output: {output.shape}\")\n", + " print(f\"Output values: {output.data}\")\n", + " \n", + " print(\"\\n๐ŸŽ‰ Neural network working! You just built your first neural network!\")\n", + " print(\"Notice how the network transforms 3D input into 2D output through learned transformations.\")\n", + " \n", + "except Exception as e:\n", + " print(f\"โŒ Error: {e}\")\n", + " print(\"Make sure to implement the layers and activations above!\")" + ] + }, + { + "cell_type": "markdown", + "id": "13dc6d9a", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "## Step 4: Understanding What We Built\n", + "\n", + "Congratulations! You just implemented the fundamental building blocks of neural networks:\n", + "\n", + "### ๐Ÿงฑ **What You Built**\n", + "1. **Dense Layer**: Linear transformation `y = Wx + b`\n", + "2. **Activation Functions**: Nonlinear transformations (ReLU, Sigmoid, Tanh)\n", + "3. **Layer Composition**: Chaining layers to build networks\n", + "\n", + "### ๐ŸŽฏ **Key Insights**\n", + "- **Layers are functions**: They transform tensors from one space to another\n", + "- **Composition creates complexity**: Simple layers โ†’ complex networks\n", + "- **Nonlinearity is crucial**: Without it, deep networks are just linear transformations\n", + "- **Neural networks are function approximators**: They learn to map inputs to outputs\n", + "\n", + "### ๐Ÿš€ **What's Next**\n", + "In the next modules, you'll learn:\n", + "- **Training**: How networks learn from data (backpropagation, optimizers)\n", + "- **Architectures**: Specialized layers for different problems (CNNs, RNNs)\n", + "- **Applications**: Using networks for real problems\n", + "\n", + "### ๐Ÿ”ง **Export to Package**\n", + "Run this to export your layers to the TinyTorch package:\n", + "```bash\n", + "python bin/tito.py sync\n", + "```\n", + "\n", + "Then test your implementation:\n", + "```bash\n", + "python bin/tito.py test --module layers\n", + "```\n", + "\n", + "**Great job! You've built the foundation of neural networks!** ๐ŸŽ‰" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a54d8ce9", + "metadata": {}, + "outputs": [], + "source": [ + "# Final demonstration: A more complex example\n", + "try:\n", + " print(\"=== Final Demo: Image Classification Network ===\")\n", + " \n", + " # Simulate a small image: 28x28 pixels flattened to 784 features\n", + " # This is like a tiny MNIST digit\n", + " image_size = 28 * 28 # 784 pixels\n", + " num_classes = 10 # 10 digits (0-9)\n", + " \n", + " # Build a 3-layer network for digit classification\n", + " # 784 โ†’ 128 โ†’ 64 โ†’ 10\n", + " layer1 = Dense(input_size=image_size, output_size=128)\n", + " relu1 = ReLU()\n", + " layer2 = Dense(input_size=128, output_size=64)\n", + " relu2 = ReLU()\n", + " layer3 = Dense(input_size=64, output_size=num_classes)\n", + " softmax = Sigmoid() # Using Sigmoid as a simple \"probability-like\" output\n", + " \n", + " print(f\"Image classification network:\")\n", + " print(f\" Input: {image_size} pixels (28x28 image)\")\n", + " print(f\" Hidden 1: {layer1.input_size} โ†’ {layer1.output_size} (Dense + ReLU)\")\n", + " print(f\" Hidden 2: {layer2.input_size} โ†’ {layer2.output_size} (Dense + ReLU)\")\n", + " print(f\" Output: {layer3.input_size} โ†’ {layer3.output_size} (Dense + Sigmoid)\")\n", + " \n", + " # Simulate a batch of 5 images\n", + " batch_size = 5\n", + " fake_images = Tensor(np.random.randn(batch_size, image_size).astype(np.float32))\n", + " \n", + " # Forward pass\n", + " h1 = relu1(layer1(fake_images))\n", + " h2 = relu2(layer2(h1))\n", + " predictions = softmax(layer3(h2))\n", + " \n", + " print(f\"\\nBatch processing:\")\n", + " print(f\" Input batch shape: {fake_images.shape}\")\n", + " print(f\" Predictions shape: {predictions.shape}\")\n", + " print(f\" Sample predictions: {predictions.data[0]}\") # First image predictions\n", + " \n", + " print(\"\\n๐ŸŽ‰ You built a neural network that could classify images!\")\n", + " print(\"With training, this network could learn to recognize handwritten digits!\")\n", + " \n", + "except Exception as e:\n", + " print(f\"โŒ Error: {e}\")\n", + " print(\"Check your layer implementations!\") " + ] + } + ], + "metadata": { + "jupytext": { + "main_language": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/modules/layers/layers_dev.py b/modules/layers/layers_dev.py new file mode 100644 index 00000000..5b657c1e --- /dev/null +++ b/modules/layers/layers_dev.py @@ -0,0 +1,548 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.17.1 +# --- + +# %% [markdown] +""" +# Module 2: Layers - Neural Network Building Blocks + +Welcome to the Layers module! This is where neural networks begin. You'll implement the fundamental building blocks that transform tensors. + +## Learning Goals +- Understand layers as functions that transform tensors: `y = f(x)` +- Implement Dense layers with linear transformations: `y = Wx + b` +- Add activation functions for nonlinearity (ReLU, Sigmoid, Tanh) +- See how neural networks are just function composition +- Build intuition before diving into training + +## Build โ†’ Use โ†’ Understand +1. **Build**: Dense layers and activation functions +2. **Use**: Transform tensors and see immediate results +3. **Understand**: How neural networks transform information + +## Module โ†’ Package Structure +**๐ŸŽ“ Teaching vs. ๐Ÿ”ง Building**: +- **Learning side**: Work in `modules/layers/layers_dev.py` +- **Building side**: Exports to `tinytorch/core/layers.py` + +This module builds the fundamental transformations that compose into neural networks. +""" + +# %% +#| default_exp core.layers + +# Setup and imports +import numpy as np +import sys +from typing import Union, Optional, Callable +import math + +# %% +#| export +import numpy as np +import math +import sys +from typing import Union, Optional, Callable +from tinytorch.core.tensor import Tensor + +# Import our Tensor class +# sys.path.append('../../') +# from modules.tensor.tensor_dev import Tensor + +# print("๐Ÿ”ฅ TinyTorch Layers Module") +# print(f"NumPy version: {np.__version__}") +# print(f"Python version: {sys.version_info.major}.{sys.version_info.minor}") +# print("Ready to build neural network layers!") + +# %% [markdown] +""" +## Step 1: What is a Layer? + +A **layer** is a function that transforms tensors. Think of it as: +- **Input**: Tensor with some shape +- **Transformation**: Mathematical operation (linear, nonlinear, etc.) +- **Output**: Tensor with possibly different shape + +**The fundamental insight**: Neural networks are just function composition! +``` +x โ†’ Layer1 โ†’ Layer2 โ†’ Layer3 โ†’ y +``` + +**Why layers matter**: +- They're the building blocks of all neural networks +- Each layer learns a different transformation +- Composing layers creates complex functions +- Understanding layers = understanding neural networks + +Let's start with the most important layer: **Dense** (also called Linear or Fully Connected). +""" + +# %% +#| export +class Dense: + """ + Dense (Linear) Layer: y = Wx + b + + The fundamental building block of neural networks. + Performs linear transformation: matrix multiplication + bias addition. + + Args: + input_size: Number of input features + output_size: Number of output features + use_bias: Whether to include bias term (default: True) + + TODO: Implement the Dense layer with weight initialization and forward pass. + """ + + def __init__(self, input_size: int, output_size: int, use_bias: bool = True): + """ + Initialize Dense layer with random weights. + + TODO: + 1. Store layer parameters (input_size, output_size, use_bias) + 2. Initialize weights with small random values + 3. Initialize bias to zeros (if use_bias=True) + """ + raise NotImplementedError("Student implementation required") + + def forward(self, x: Tensor) -> Tensor: + """ + Forward pass: y = Wx + b + + Args: + x: Input tensor of shape (batch_size, input_size) + + Returns: + Output tensor of shape (batch_size, output_size) + + TODO: Implement matrix multiplication and bias addition + """ + raise NotImplementedError("Student implementation required") + + def __call__(self, x: Tensor) -> Tensor: + """Make layer callable: layer(x) same as layer.forward(x)""" + return self.forward(x) + +# %% +#| hide +#| export +class Dense: + """ + Dense (Linear) Layer: y = Wx + b + + The fundamental building block of neural networks. + Performs linear transformation: matrix multiplication + bias addition. + """ + + def __init__(self, input_size: int, output_size: int, use_bias: bool = True): + """Initialize Dense layer with random weights.""" + self.input_size = input_size + self.output_size = output_size + self.use_bias = use_bias + + # Initialize weights with Xavier/Glorot initialization + # This helps with gradient flow during training + limit = math.sqrt(6.0 / (input_size + output_size)) + self.weights = Tensor( + np.random.uniform(-limit, limit, (input_size, output_size)).astype(np.float32) + ) + + # Initialize bias to zeros + if use_bias: + self.bias = Tensor(np.zeros(output_size, dtype=np.float32)) + else: + self.bias = None + + def forward(self, x: Tensor) -> Tensor: + """Forward pass: y = Wx + b""" + # Matrix multiplication: x @ weights + # x shape: (batch_size, input_size) + # weights shape: (input_size, output_size) + # result shape: (batch_size, output_size) + output = Tensor(x.data @ self.weights.data) + + # Add bias if present + if self.bias is not None: + output = Tensor(output.data + self.bias.data) + + return output + + def __call__(self, x: Tensor) -> Tensor: + """Make layer callable: layer(x) same as layer.forward(x)""" + return self.forward(x) + +# %% [markdown] +""" +### ๐Ÿงช Test Your Dense Layer + +Once you implement the Dense layer above, run this cell to test it: +""" + +# %% +# Test the Dense layer +try: + print("=== Testing Dense Layer ===") + + # Create a simple Dense layer: 3 inputs โ†’ 2 outputs + layer = Dense(input_size=3, output_size=2) + print(f"Created Dense layer: {layer.input_size} โ†’ {layer.output_size}") + print(f"Weights shape: {layer.weights.shape}") + print(f"Bias shape: {layer.bias.shape if layer.bias else 'No bias'}") + + # Test with a single example + x = Tensor([[1.0, 2.0, 3.0]]) # Shape: (1, 3) + y = layer(x) + print(f"Input shape: {x.shape}") + print(f"Output shape: {y.shape}") + print(f"Input: {x.data}") + print(f"Output: {y.data}") + + # Test with batch of examples + x_batch = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) # Shape: (2, 3) + y_batch = layer(x_batch) + print(f"\nBatch input shape: {x_batch.shape}") + print(f"Batch output shape: {y_batch.shape}") + print(f"Batch output: {y_batch.data}") + + print("โœ… Dense layer working!") + +except Exception as e: + print(f"โŒ Error: {e}") + print("Make sure to implement the Dense layer above!") + +# %% [markdown] +""" +## Step 2: Activation Functions + +Dense layers alone can only learn **linear** transformations. But most real-world problems need **nonlinear** transformations. + +**Activation functions** add nonlinearity: +- **ReLU**: `max(0, x)` - Most common, simple and effective +- **Sigmoid**: `1 / (1 + e^(-x))` - Squashes to (0, 1) +- **Tanh**: `tanh(x)` - Squashes to (-1, 1) + +**Why nonlinearity matters**: Without it, stacking layers is pointless! +``` +Linear โ†’ Linear โ†’ Linear = Just one big Linear transformation +Linear โ†’ NonLinear โ†’ Linear = Can learn complex patterns +``` +""" + +# %% +#| export +class ReLU: + """ + ReLU Activation: f(x) = max(0, x) + + The most popular activation function in deep learning. + Simple, effective, and computationally efficient. + + TODO: Implement ReLU activation function. + """ + + def forward(self, x: Tensor) -> Tensor: + """ + Apply ReLU: f(x) = max(0, x) + + Args: + x: Input tensor + + Returns: + Output tensor with ReLU applied element-wise + + TODO: Implement element-wise max(0, x) operation + """ + raise NotImplementedError("Student implementation required") + + def __call__(self, x: Tensor) -> Tensor: + """Make activation callable: relu(x) same as relu.forward(x)""" + return self.forward(x) + +# %% +#| hide +#| export +class ReLU: + """ReLU Activation: f(x) = max(0, x)""" + + def forward(self, x: Tensor) -> Tensor: + """Apply ReLU: f(x) = max(0, x)""" + return Tensor(np.maximum(0, x.data)) + + def __call__(self, x: Tensor) -> Tensor: + return self.forward(x) + +# %% +#| export +class Sigmoid: + """ + Sigmoid Activation: f(x) = 1 / (1 + e^(-x)) + + Squashes input to range (0, 1). Often used for binary classification. + + TODO: Implement Sigmoid activation function. + """ + + def forward(self, x: Tensor) -> Tensor: + """ + Apply Sigmoid: f(x) = 1 / (1 + e^(-x)) + + Args: + x: Input tensor + + Returns: + Output tensor with Sigmoid applied element-wise + + TODO: Implement sigmoid function (be careful with numerical stability!) + """ + raise NotImplementedError("Student implementation required") + + def __call__(self, x: Tensor) -> Tensor: + return self.forward(x) + +# %% +#| hide +#| export +class Sigmoid: + """Sigmoid Activation: f(x) = 1 / (1 + e^(-x))""" + + def forward(self, x: Tensor) -> Tensor: + """Apply Sigmoid with numerical stability""" + # Use the numerically stable version to avoid overflow + # For x >= 0: sigmoid(x) = 1 / (1 + exp(-x)) + # For x < 0: sigmoid(x) = exp(x) / (1 + exp(x)) + x_data = x.data + result = np.zeros_like(x_data) + + # Stable computation + positive_mask = x_data >= 0 + result[positive_mask] = 1.0 / (1.0 + np.exp(-x_data[positive_mask])) + result[~positive_mask] = np.exp(x_data[~positive_mask]) / (1.0 + np.exp(x_data[~positive_mask])) + + return Tensor(result) + + def __call__(self, x: Tensor) -> Tensor: + return self.forward(x) + +# %% +#| export +class Tanh: + """ + Tanh Activation: f(x) = tanh(x) + + Squashes input to range (-1, 1). Zero-centered output. + + TODO: Implement Tanh activation function. + """ + + def forward(self, x: Tensor) -> Tensor: + """ + Apply Tanh: f(x) = tanh(x) + + Args: + x: Input tensor + + Returns: + Output tensor with Tanh applied element-wise + + TODO: Implement tanh function + """ + raise NotImplementedError("Student implementation required") + + def __call__(self, x: Tensor) -> Tensor: + return self.forward(x) + +# %% +#| hide +#| export +class Tanh: + """Tanh Activation: f(x) = tanh(x)""" + + def forward(self, x: Tensor) -> Tensor: + """Apply Tanh""" + return Tensor(np.tanh(x.data)) + + def __call__(self, x: Tensor) -> Tensor: + return self.forward(x) + +# %% [markdown] +""" +### ๐Ÿงช Test Your Activation Functions + +Once you implement the activation functions above, run this cell to test them: +""" + +# %% +# Test activation functions +try: + print("=== Testing Activation Functions ===") + + # Test data: mix of positive, negative, and zero + x = Tensor([[-2.0, -1.0, 0.0, 1.0, 2.0]]) + print(f"Input: {x.data}") + + # Test ReLU + relu = ReLU() + y_relu = relu(x) + print(f"ReLU output: {y_relu.data}") + + # Test Sigmoid + sigmoid = Sigmoid() + y_sigmoid = sigmoid(x) + print(f"Sigmoid output: {y_sigmoid.data}") + + # Test Tanh + tanh = Tanh() + y_tanh = tanh(x) + print(f"Tanh output: {y_tanh.data}") + + print("โœ… Activation functions working!") + +except Exception as e: + print(f"โŒ Error: {e}") + print("Make sure to implement the activation functions above!") + +# %% [markdown] +""" +## Step 3: Layer Composition - Building Neural Networks + +Now comes the magic! We can **compose** layers to build neural networks: + +``` +Input โ†’ Dense โ†’ ReLU โ†’ Dense โ†’ Sigmoid โ†’ Output +``` + +This is a 2-layer neural network that can learn complex nonlinear patterns! +""" + +# %% +# Build a simple 2-layer neural network +try: + print("=== Building a 2-Layer Neural Network ===") + + # Network architecture: 3 โ†’ 4 โ†’ 2 + # Input: 3 features + # Hidden: 4 neurons with ReLU + # Output: 2 neurons with Sigmoid + + layer1 = Dense(input_size=3, output_size=4) + activation1 = ReLU() + layer2 = Dense(input_size=4, output_size=2) + activation2 = Sigmoid() + + print("Network architecture:") + print(f" Input: 3 features") + print(f" Hidden: {layer1.input_size} โ†’ {layer1.output_size} (Dense + ReLU)") + print(f" Output: {layer2.input_size} โ†’ {layer2.output_size} (Dense + Sigmoid)") + + # Test with sample data + x = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) # 2 examples, 3 features each + print(f"\nInput shape: {x.shape}") + print(f"Input data: {x.data}") + + # Forward pass through the network + h1 = layer1(x) # Dense layer 1 + h1_activated = activation1(h1) # ReLU activation + h2 = layer2(h1_activated) # Dense layer 2 + output = activation2(h2) # Sigmoid activation + + print(f"\nAfter layer 1: {h1.shape}") + print(f"After ReLU: {h1_activated.shape}") + print(f"After layer 2: {h2.shape}") + print(f"Final output: {output.shape}") + print(f"Output values: {output.data}") + + print("\n๐ŸŽ‰ Neural network working! You just built your first neural network!") + print("Notice how the network transforms 3D input into 2D output through learned transformations.") + +except Exception as e: + print(f"โŒ Error: {e}") + print("Make sure to implement the layers and activations above!") + +# %% [markdown] +""" +## Step 4: Understanding What We Built + +Congratulations! You just implemented the fundamental building blocks of neural networks: + +### ๐Ÿงฑ **What You Built** +1. **Dense Layer**: Linear transformation `y = Wx + b` +2. **Activation Functions**: Nonlinear transformations (ReLU, Sigmoid, Tanh) +3. **Layer Composition**: Chaining layers to build networks + +### ๐ŸŽฏ **Key Insights** +- **Layers are functions**: They transform tensors from one space to another +- **Composition creates complexity**: Simple layers โ†’ complex networks +- **Nonlinearity is crucial**: Without it, deep networks are just linear transformations +- **Neural networks are function approximators**: They learn to map inputs to outputs + +### ๐Ÿš€ **What's Next** +In the next modules, you'll learn: +- **Training**: How networks learn from data (backpropagation, optimizers) +- **Architectures**: Specialized layers for different problems (CNNs, RNNs) +- **Applications**: Using networks for real problems + +### ๐Ÿ”ง **Export to Package** +Run this to export your layers to the TinyTorch package: +```bash +python bin/tito.py sync +``` + +Then test your implementation: +```bash +python bin/tito.py test --module layers +``` + +**Great job! You've built the foundation of neural networks!** ๐ŸŽ‰ +""" + +# %% +# Final demonstration: A more complex example +try: + print("=== Final Demo: Image Classification Network ===") + + # Simulate a small image: 28x28 pixels flattened to 784 features + # This is like a tiny MNIST digit + image_size = 28 * 28 # 784 pixels + num_classes = 10 # 10 digits (0-9) + + # Build a 3-layer network for digit classification + # 784 โ†’ 128 โ†’ 64 โ†’ 10 + layer1 = Dense(input_size=image_size, output_size=128) + relu1 = ReLU() + layer2 = Dense(input_size=128, output_size=64) + relu2 = ReLU() + layer3 = Dense(input_size=64, output_size=num_classes) + softmax = Sigmoid() # Using Sigmoid as a simple "probability-like" output + + print(f"Image classification network:") + print(f" Input: {image_size} pixels (28x28 image)") + print(f" Hidden 1: {layer1.input_size} โ†’ {layer1.output_size} (Dense + ReLU)") + print(f" Hidden 2: {layer2.input_size} โ†’ {layer2.output_size} (Dense + ReLU)") + print(f" Output: {layer3.input_size} โ†’ {layer3.output_size} (Dense + Sigmoid)") + + # Simulate a batch of 5 images + batch_size = 5 + fake_images = Tensor(np.random.randn(batch_size, image_size).astype(np.float32)) + + # Forward pass + h1 = relu1(layer1(fake_images)) + h2 = relu2(layer2(h1)) + predictions = softmax(layer3(h2)) + + print(f"\nBatch processing:") + print(f" Input batch shape: {fake_images.shape}") + print(f" Predictions shape: {predictions.shape}") + print(f" Sample predictions: {predictions.data[0]}") # First image predictions + + print("\n๐ŸŽ‰ You built a neural network that could classify images!") + print("With training, this network could learn to recognize handwritten digits!") + +except Exception as e: + print(f"โŒ Error: {e}") + print("Check your layer implementations!") \ No newline at end of file diff --git a/modules/layers/tests/test_layers.py b/modules/layers/tests/test_layers.py new file mode 100644 index 00000000..c85e208a --- /dev/null +++ b/modules/layers/tests/test_layers.py @@ -0,0 +1,343 @@ +""" +Tests for TinyTorch Layers module. + +Tests the core layer functionality including Dense layers, activation functions, +and layer composition. + +These tests work with the current implementation and provide stretch goals +for students to implement additional features. +""" + +import sys +import os +import pytest +import numpy as np + +# Add the parent directory to path to import layers_dev +sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) + +# Import from the module's development file +# Note: This imports the instructor version with full implementation +from layers_dev import Dense, ReLU, Sigmoid, Tanh, Tensor + +def safe_numpy(tensor): + """Get numpy array from tensor, using .numpy() if available, otherwise .data""" + if hasattr(tensor, 'numpy'): + return tensor.numpy() + else: + return tensor.data + +class TestDenseLayer: + """Test Dense (Linear) layer functionality.""" + + def test_dense_creation(self): + """Test creating Dense layers with different configurations.""" + # Basic dense layer + layer = Dense(input_size=3, output_size=2) + assert layer.input_size == 3 + assert layer.output_size == 2 + assert layer.use_bias == True + assert layer.weights.shape == (3, 2) + assert layer.bias.shape == (2,) + + # Dense layer without bias + layer_no_bias = Dense(input_size=4, output_size=3, use_bias=False) + assert layer_no_bias.use_bias == False + assert layer_no_bias.bias is None + + def test_dense_forward_single(self): + """Test Dense layer forward pass with single input.""" + layer = Dense(input_size=3, output_size=2) + + # Single input + x = Tensor([[1.0, 2.0, 3.0]]) + y = layer(x) + + assert y.shape == (1, 2) + assert isinstance(y, Tensor) + + def test_dense_forward_batch(self): + """Test Dense layer forward pass with batch input.""" + layer = Dense(input_size=3, output_size=2) + + # Batch input + x = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) + y = layer(x) + + assert y.shape == (2, 2) + assert isinstance(y, Tensor) + + def test_dense_no_bias(self): + """Test Dense layer without bias.""" + layer = Dense(input_size=2, output_size=1, use_bias=False) + + x = Tensor([[1.0, 2.0]]) + y = layer(x) + + assert y.shape == (1, 1) + # Should be just matrix multiplication without bias + expected = safe_numpy(x) @ safe_numpy(layer.weights) + np.testing.assert_array_almost_equal(safe_numpy(y), expected) + + def test_dense_callable(self): + """Test that Dense layer is callable.""" + layer = Dense(input_size=2, output_size=1) + x = Tensor([[1.0, 2.0]]) + + # Both should work + y1 = layer.forward(x) + y2 = layer(x) + + np.testing.assert_array_equal(safe_numpy(y1), safe_numpy(y2)) + +class TestActivationFunctions: + """Test activation function implementations.""" + + def test_relu_basic(self): + """Test ReLU activation function.""" + relu = ReLU() + x = Tensor([[-2.0, -1.0, 0.0, 1.0, 2.0]]) + y = relu(x) + + expected = [[0.0, 0.0, 0.0, 1.0, 2.0]] + np.testing.assert_array_equal(safe_numpy(y), expected) + + def test_relu_callable(self): + """Test that ReLU is callable.""" + relu = ReLU() + x = Tensor([[1.0, -1.0]]) + + y1 = relu.forward(x) + y2 = relu(x) + + np.testing.assert_array_equal(safe_numpy(y1), safe_numpy(y2)) + + def test_sigmoid_basic(self): + """Test Sigmoid activation function.""" + sigmoid = Sigmoid() + x = Tensor([[0.0]]) # sigmoid(0) = 0.5 + y = sigmoid(x) + + np.testing.assert_array_almost_equal(safe_numpy(y), [[0.5]]) + + def test_sigmoid_range(self): + """Test Sigmoid output range.""" + sigmoid = Sigmoid() + x = Tensor([[-10.0, 0.0, 10.0]]) + y = sigmoid(x) + + # Should be in range [0, 1] - use reasonable bounds + assert np.all(safe_numpy(y) >= 0) + assert np.all(safe_numpy(y) <= 1) + # Check that extreme values are close to bounds + assert safe_numpy(y)[0][0] < 0.01 # Very small for -10 + assert safe_numpy(y)[0][2] > 0.99 # Very large for 10 + + def test_tanh_basic(self): + """Test Tanh activation function.""" + tanh = Tanh() + x = Tensor([[0.0]]) # tanh(0) = 0 + y = tanh(x) + + np.testing.assert_array_almost_equal(safe_numpy(y), [[0.0]]) + + def test_tanh_range(self): + """Test Tanh output range.""" + tanh = Tanh() + x = Tensor([[-10.0, 0.0, 10.0]]) + y = tanh(x) + + # Should be in range [-1, 1] - use reasonable bounds + assert np.all(safe_numpy(y) >= -1) + assert np.all(safe_numpy(y) <= 1) + # Check that extreme values are close to bounds + assert safe_numpy(y)[0][0] < -0.99 # Very negative for -10 + assert safe_numpy(y)[0][2] > 0.99 # Very positive for 10 + +class TestLayerComposition: + """Test composing layers into neural networks.""" + + def test_simple_network(self): + """Test a simple 2-layer network.""" + # 3 โ†’ 4 โ†’ 2 network + layer1 = Dense(input_size=3, output_size=4) + relu = ReLU() + layer2 = Dense(input_size=4, output_size=2) + sigmoid = Sigmoid() + + # Forward pass + x = Tensor([[1.0, 2.0, 3.0]]) + h1 = layer1(x) + h1_activated = relu(h1) + h2 = layer2(h1_activated) + output = sigmoid(h2) + + assert h1.shape == (1, 4) + assert h1_activated.shape == (1, 4) + assert h2.shape == (1, 2) + assert output.shape == (1, 2) + + # Output should be in sigmoid range + assert np.all(safe_numpy(output) >= 0) + assert np.all(safe_numpy(output) <= 1) + + def test_batch_network(self): + """Test network with batch processing.""" + layer1 = Dense(input_size=2, output_size=3) + relu = ReLU() + layer2 = Dense(input_size=3, output_size=1) + + # Batch of 4 examples + x = Tensor([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]]) + + h1 = layer1(x) + h1_activated = relu(h1) + output = layer2(h1_activated) + + assert output.shape == (4, 1) + + def test_deep_network(self): + """Test deeper network composition.""" + # 5-layer network + layers = [ + Dense(input_size=10, output_size=8), + ReLU(), + Dense(input_size=8, output_size=6), + ReLU(), + Dense(input_size=6, output_size=4), + ReLU(), + Dense(input_size=4, output_size=2), + Sigmoid() + ] + + x = Tensor([[1.0] * 10]) # 10 features + + # Forward pass through all layers + current = x + for layer in layers: + current = layer(current) + + assert current.shape == (1, 2) + # Final output should be in sigmoid range + assert np.all(safe_numpy(current) >= 0) + assert np.all(safe_numpy(current) <= 1) + +class TestEdgeCases: + """Test edge cases and error conditions.""" + + def test_zero_input(self): + """Test layers with zero input.""" + layer = Dense(input_size=3, output_size=2) + relu = ReLU() + + x = Tensor([[0.0, 0.0, 0.0]]) + y = layer(x) + y_relu = relu(y) + + assert y.shape == (1, 2) + assert y_relu.shape == (1, 2) + + def test_large_input(self): + """Test layers with large input values.""" + layer = Dense(input_size=2, output_size=1) + sigmoid = Sigmoid() + + x = Tensor([[1000.0, -1000.0]]) + y = layer(x) + y_sigmoid = sigmoid(y) + + # Should not overflow + assert not np.any(np.isnan(safe_numpy(y_sigmoid))) + assert not np.any(np.isinf(safe_numpy(y_sigmoid))) + + def test_single_neuron(self): + """Test single neuron layers.""" + layer = Dense(input_size=1, output_size=1) + x = Tensor([[5.0]]) + y = layer(x) + + assert y.shape == (1, 1) + +# Stretch goal tests (these will be skipped if methods don't exist) +class TestStretchGoals: + """Stretch goal tests for advanced features.""" + + @pytest.mark.skip(reason="Stretch goal: Weight initialization methods") + def test_weight_initialization_methods(self): + """Test different weight initialization strategies.""" + # Xavier initialization + layer_xavier = Dense(input_size=100, output_size=50, init_method='xavier') + weights_xavier = safe_numpy(layer_xavier.weights) + + # He initialization + layer_he = Dense(input_size=100, output_size=50, init_method='he') + weights_he = safe_numpy(layer_he.weights) + + # Check initialization ranges + xavier_limit = np.sqrt(6.0 / (100 + 50)) + assert np.all(np.abs(weights_xavier) <= xavier_limit) + + he_limit = np.sqrt(2.0 / 100) + assert np.std(weights_he) <= he_limit * 1.5 # Some tolerance + + @pytest.mark.skip(reason="Stretch goal: Layer parameter access") + def test_layer_parameters(self): + """Test accessing and modifying layer parameters.""" + layer = Dense(input_size=3, output_size=2) + + # Should be able to access parameters + assert hasattr(layer, 'parameters') + params = layer.parameters() + assert len(params) == 2 # weights and bias + + # Should be able to set parameters + new_weights = Tensor(np.ones((3, 2))) + layer.set_weights(new_weights) + np.testing.assert_array_equal(safe_numpy(layer.weights), safe_numpy(new_weights)) + + @pytest.mark.skip(reason="Stretch goal: Additional activation functions") + def test_additional_activations(self): + """Test additional activation functions.""" + # Leaky ReLU + leaky_relu = LeakyReLU(alpha=0.1) + x = Tensor([[-1.0, 0.0, 1.0]]) + y = leaky_relu(x) + expected = [[-0.1, 0.0, 1.0]] + np.testing.assert_array_almost_equal(safe_numpy(y), expected) + + # Softmax + softmax = Softmax() + x = Tensor([[1.0, 2.0, 3.0]]) + y = softmax(x) + # Should sum to 1 + assert np.allclose(np.sum(safe_numpy(y)), 1.0) + + @pytest.mark.skip(reason="Stretch goal: Dropout layer") + def test_dropout_layer(self): + """Test dropout layer implementation.""" + dropout = Dropout(p=0.5) + x = Tensor([[1.0, 2.0, 3.0, 4.0]]) + + # Training mode + dropout.train() + y_train = dropout(x) + + # Inference mode + dropout.eval() + y_eval = dropout(x) + + # In eval mode, should be same as input + np.testing.assert_array_equal(safe_numpy(y_eval), safe_numpy(x)) + + @pytest.mark.skip(reason="Stretch goal: Batch normalization") + def test_batch_normalization(self): + """Test batch normalization layer.""" + bn = BatchNorm1d(num_features=3) + x = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) + y = bn(x) + + # Should normalize across batch dimension + assert y.shape == x.shape + # Mean should be close to 0, std close to 1 + assert np.allclose(np.mean(safe_numpy(y), axis=0), 0.0, atol=1e-6) + assert np.allclose(np.std(safe_numpy(y), axis=0), 1.0, atol=1e-6) \ No newline at end of file diff --git a/tests/test_layers.py b/tests/test_layers.py new file mode 100644 index 00000000..a8cada6b --- /dev/null +++ b/tests/test_layers.py @@ -0,0 +1,242 @@ +""" +Integration tests for TinyTorch Layers package. + +Tests the exported layers functionality that students will use. +These tests ensure the student experience works correctly. +""" + +import pytest +import numpy as np +from tinytorch.core.layers import Dense, ReLU, Sigmoid, Tanh +from tinytorch.core.tensor import Tensor + + +class TestDenseLayerIntegration: + """Test Dense layer integration with exported package.""" + + def test_dense_basic_functionality(self): + """Test basic Dense layer functionality.""" + layer = Dense(input_size=3, output_size=2) + x = Tensor([[1.0, 2.0, 3.0]]) + y = layer(x) + + assert y.shape == (1, 2) + assert isinstance(y, Tensor) + + def test_dense_batch_processing(self): + """Test Dense layer with batch processing.""" + layer = Dense(input_size=2, output_size=3) + x = Tensor([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]) + y = layer(x) + + assert y.shape == (3, 3) + assert isinstance(y, Tensor) + + def test_dense_no_bias(self): + """Test Dense layer without bias.""" + layer = Dense(input_size=2, output_size=1, use_bias=False) + x = Tensor([[1.0, 2.0]]) + y = layer(x) + + assert y.shape == (1, 1) + assert layer.bias is None + + +class TestActivationFunctionsIntegration: + """Test activation functions integration.""" + + def test_relu_integration(self): + """Test ReLU activation function.""" + relu = ReLU() + x = Tensor([[-1.0, 0.0, 1.0]]) + y = relu(x) + + expected = [[0.0, 0.0, 1.0]] + np.testing.assert_array_equal(y.data, expected) + + def test_sigmoid_integration(self): + """Test Sigmoid activation function.""" + sigmoid = Sigmoid() + x = Tensor([[0.0]]) + y = sigmoid(x) + + np.testing.assert_array_almost_equal(y.data, [[0.5]]) + + def test_tanh_integration(self): + """Test Tanh activation function.""" + tanh = Tanh() + x = Tensor([[0.0]]) + y = tanh(x) + + np.testing.assert_array_almost_equal(y.data, [[0.0]]) + + +class TestNeuralNetworkIntegration: + """Test complete neural network integration.""" + + def test_simple_network_integration(self): + """Test building a simple neural network.""" + # 3 โ†’ 4 โ†’ 2 network + layer1 = Dense(input_size=3, output_size=4) + relu = ReLU() + layer2 = Dense(input_size=4, output_size=2) + sigmoid = Sigmoid() + + # Forward pass + x = Tensor([[1.0, 2.0, 3.0]]) + h1 = layer1(x) + h1_activated = relu(h1) + h2 = layer2(h1_activated) + output = sigmoid(h2) + + assert output.shape == (1, 2) + # Output should be in sigmoid range + assert np.all(output.data >= 0) + assert np.all(output.data <= 1) + + def test_batch_network_integration(self): + """Test network with batch processing.""" + layer1 = Dense(input_size=2, output_size=3) + relu = ReLU() + layer2 = Dense(input_size=3, output_size=1) + + # Batch of 4 examples + x = Tensor([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]]) + + h1 = layer1(x) + h1_activated = relu(h1) + output = layer2(h1_activated) + + assert output.shape == (4, 1) + + def test_image_classification_network(self): + """Test a realistic image classification network.""" + # Simulate MNIST: 784 โ†’ 128 โ†’ 64 โ†’ 10 + layer1 = Dense(input_size=784, output_size=128) + relu1 = ReLU() + layer2 = Dense(input_size=128, output_size=64) + relu2 = ReLU() + layer3 = Dense(input_size=64, output_size=10) + sigmoid = Sigmoid() + + # Simulate a batch of 3 images + batch_size = 3 + fake_images = Tensor(np.random.randn(batch_size, 784).astype(np.float32)) + + # Forward pass + h1 = relu1(layer1(fake_images)) + h2 = relu2(layer2(h1)) + predictions = sigmoid(layer3(h2)) + + assert predictions.shape == (batch_size, 10) + # All predictions should be in [0, 1] range + assert np.all(predictions.data >= 0) + assert np.all(predictions.data <= 1) + + +class TestLayerCompositionIntegration: + """Test layer composition patterns.""" + + def test_sequential_composition(self): + """Test sequential layer composition.""" + layers = [ + Dense(input_size=5, output_size=4), + ReLU(), + Dense(input_size=4, output_size=3), + ReLU(), + Dense(input_size=3, output_size=2), + Sigmoid() + ] + + x = Tensor([[1.0, 2.0, 3.0, 4.0, 5.0]]) + + # Apply layers sequentially + current = x + for layer in layers: + current = layer(current) + + assert current.shape == (1, 2) + assert np.all(current.data >= 0) + assert np.all(current.data <= 1) + + def test_different_activation_functions(self): + """Test using different activation functions.""" + # Network with different activations + layer1 = Dense(input_size=3, output_size=4) + relu = ReLU() + layer2 = Dense(input_size=4, output_size=4) + tanh = Tanh() + layer3 = Dense(input_size=4, output_size=2) + sigmoid = Sigmoid() + + x = Tensor([[1.0, 2.0, 3.0]]) + + # Forward pass + h1 = relu(layer1(x)) + h2 = tanh(layer2(h1)) + output = sigmoid(layer3(h2)) + + assert output.shape == (1, 2) + # Final output should be in sigmoid range + assert np.all(output.data >= 0) + assert np.all(output.data <= 1) + + +class TestStudentExperience: + """Test the typical student experience.""" + + def test_first_neural_network(self): + """Test the first neural network a student would build.""" + # Simple 2-layer network like in the tutorial + layer1 = Dense(input_size=3, output_size=4) + activation1 = ReLU() + layer2 = Dense(input_size=4, output_size=2) + activation2 = Sigmoid() + + # Sample data + x = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) + + # Forward pass + h1 = layer1(x) + h1_activated = activation1(h1) + h2 = layer2(h1_activated) + output = activation2(h2) + + # Should work without errors + assert output.shape == (2, 2) + assert isinstance(output, Tensor) + + def test_layer_inspection(self): + """Test that students can inspect layer properties.""" + layer = Dense(input_size=3, output_size=2) + + # Students should be able to access these properties + assert hasattr(layer, 'input_size') + assert hasattr(layer, 'output_size') + assert hasattr(layer, 'weights') + assert hasattr(layer, 'bias') + + assert layer.input_size == 3 + assert layer.output_size == 2 + assert layer.weights.shape == (3, 2) + assert layer.bias.shape == (2,) + + def test_activation_function_behavior(self): + """Test activation function behavior that students will observe.""" + # ReLU clips negative values + relu = ReLU() + x = Tensor([[-1.0, 0.0, 1.0]]) + y = relu(x) + assert np.array_equal(y.data, [[0.0, 0.0, 1.0]]) + + # Sigmoid maps to (0, 1) + sigmoid = Sigmoid() + x = Tensor([[0.0]]) + y = sigmoid(x) + assert np.isclose(y.data[0][0], 0.5) + + # Tanh maps to (-1, 1) + tanh = Tanh() + x = Tensor([[0.0]]) + y = tanh(x) + assert np.isclose(y.data[0][0], 0.0) \ No newline at end of file diff --git a/tinytorch/core/layers.py b/tinytorch/core/layers.py new file mode 100644 index 00000000..567b612a --- /dev/null +++ b/tinytorch/core/layers.py @@ -0,0 +1,238 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/layers/layers_dev.ipynb. + +# %% auto 0 +__all__ = ['Dense', 'ReLU', 'Sigmoid', 'Tanh'] + +# %% ../../modules/layers/layers_dev.ipynb 2 +import numpy as np +import math +import sys +from typing import Union, Optional, Callable +from .tensor import Tensor + +# Import our Tensor class +# sys.path.append('../../') +# from modules.tensor.tensor_dev import Tensor + +# print("๐Ÿ”ฅ TinyTorch Layers Module") +# print(f"NumPy version: {np.__version__}") +# print(f"Python version: {sys.version_info.major}.{sys.version_info.minor}") +# print("Ready to build neural network layers!") + +# %% ../../modules/layers/layers_dev.ipynb 4 +class Dense: + """ + Dense (Linear) Layer: y = Wx + b + + The fundamental building block of neural networks. + Performs linear transformation: matrix multiplication + bias addition. + + Args: + input_size: Number of input features + output_size: Number of output features + use_bias: Whether to include bias term (default: True) + + TODO: Implement the Dense layer with weight initialization and forward pass. + """ + + def __init__(self, input_size: int, output_size: int, use_bias: bool = True): + """ + Initialize Dense layer with random weights. + + TODO: + 1. Store layer parameters (input_size, output_size, use_bias) + 2. Initialize weights with small random values + 3. Initialize bias to zeros (if use_bias=True) + """ + raise NotImplementedError("Student implementation required") + + def forward(self, x: Tensor) -> Tensor: + """ + Forward pass: y = Wx + b + + Args: + x: Input tensor of shape (batch_size, input_size) + + Returns: + Output tensor of shape (batch_size, output_size) + + TODO: Implement matrix multiplication and bias addition + """ + raise NotImplementedError("Student implementation required") + + def __call__(self, x: Tensor) -> Tensor: + """Make layer callable: layer(x) same as layer.forward(x)""" + return self.forward(x) + +# %% ../../modules/layers/layers_dev.ipynb 5 +class Dense: + """ + Dense (Linear) Layer: y = Wx + b + + The fundamental building block of neural networks. + Performs linear transformation: matrix multiplication + bias addition. + """ + + def __init__(self, input_size: int, output_size: int, use_bias: bool = True): + """Initialize Dense layer with random weights.""" + self.input_size = input_size + self.output_size = output_size + self.use_bias = use_bias + + # Initialize weights with Xavier/Glorot initialization + # This helps with gradient flow during training + limit = math.sqrt(6.0 / (input_size + output_size)) + self.weights = Tensor( + np.random.uniform(-limit, limit, (input_size, output_size)).astype(np.float32) + ) + + # Initialize bias to zeros + if use_bias: + self.bias = Tensor(np.zeros(output_size, dtype=np.float32)) + else: + self.bias = None + + def forward(self, x: Tensor) -> Tensor: + """Forward pass: y = Wx + b""" + # Matrix multiplication: x @ weights + # x shape: (batch_size, input_size) + # weights shape: (input_size, output_size) + # result shape: (batch_size, output_size) + output = Tensor(x.data @ self.weights.data) + + # Add bias if present + if self.bias is not None: + output = Tensor(output.data + self.bias.data) + + return output + + def __call__(self, x: Tensor) -> Tensor: + """Make layer callable: layer(x) same as layer.forward(x)""" + return self.forward(x) + +# %% ../../modules/layers/layers_dev.ipynb 9 +class ReLU: + """ + ReLU Activation: f(x) = max(0, x) + + The most popular activation function in deep learning. + Simple, effective, and computationally efficient. + + TODO: Implement ReLU activation function. + """ + + def forward(self, x: Tensor) -> Tensor: + """ + Apply ReLU: f(x) = max(0, x) + + Args: + x: Input tensor + + Returns: + Output tensor with ReLU applied element-wise + + TODO: Implement element-wise max(0, x) operation + """ + raise NotImplementedError("Student implementation required") + + def __call__(self, x: Tensor) -> Tensor: + """Make activation callable: relu(x) same as relu.forward(x)""" + return self.forward(x) + +# %% ../../modules/layers/layers_dev.ipynb 10 +class ReLU: + """ReLU Activation: f(x) = max(0, x)""" + + def forward(self, x: Tensor) -> Tensor: + """Apply ReLU: f(x) = max(0, x)""" + return Tensor(np.maximum(0, x.data)) + + def __call__(self, x: Tensor) -> Tensor: + return self.forward(x) + +# %% ../../modules/layers/layers_dev.ipynb 11 +class Sigmoid: + """ + Sigmoid Activation: f(x) = 1 / (1 + e^(-x)) + + Squashes input to range (0, 1). Often used for binary classification. + + TODO: Implement Sigmoid activation function. + """ + + def forward(self, x: Tensor) -> Tensor: + """ + Apply Sigmoid: f(x) = 1 / (1 + e^(-x)) + + Args: + x: Input tensor + + Returns: + Output tensor with Sigmoid applied element-wise + + TODO: Implement sigmoid function (be careful with numerical stability!) + """ + raise NotImplementedError("Student implementation required") + + def __call__(self, x: Tensor) -> Tensor: + return self.forward(x) + +# %% ../../modules/layers/layers_dev.ipynb 12 +class Sigmoid: + """Sigmoid Activation: f(x) = 1 / (1 + e^(-x))""" + + def forward(self, x: Tensor) -> Tensor: + """Apply Sigmoid with numerical stability""" + # Use the numerically stable version to avoid overflow + # For x >= 0: sigmoid(x) = 1 / (1 + exp(-x)) + # For x < 0: sigmoid(x) = exp(x) / (1 + exp(x)) + x_data = x.data + result = np.zeros_like(x_data) + + # Stable computation + positive_mask = x_data >= 0 + result[positive_mask] = 1.0 / (1.0 + np.exp(-x_data[positive_mask])) + result[~positive_mask] = np.exp(x_data[~positive_mask]) / (1.0 + np.exp(x_data[~positive_mask])) + + return Tensor(result) + + def __call__(self, x: Tensor) -> Tensor: + return self.forward(x) + +# %% ../../modules/layers/layers_dev.ipynb 13 +class Tanh: + """ + Tanh Activation: f(x) = tanh(x) + + Squashes input to range (-1, 1). Zero-centered output. + + TODO: Implement Tanh activation function. + """ + + def forward(self, x: Tensor) -> Tensor: + """ + Apply Tanh: f(x) = tanh(x) + + Args: + x: Input tensor + + Returns: + Output tensor with Tanh applied element-wise + + TODO: Implement tanh function + """ + raise NotImplementedError("Student implementation required") + + def __call__(self, x: Tensor) -> Tensor: + return self.forward(x) + +# %% ../../modules/layers/layers_dev.ipynb 14 +class Tanh: + """Tanh Activation: f(x) = tanh(x)""" + + def forward(self, x: Tensor) -> Tensor: + """Apply Tanh""" + return Tensor(np.tanh(x.data)) + + def __call__(self, x: Tensor) -> Tensor: + return self.forward(x)