From 9247784cb758cf703cdcc34785a18b5840446613 Mon Sep 17 00:00:00 2001 From: Vijay Janapa Reddi Date: Sat, 12 Jul 2025 17:51:00 -0400 Subject: [PATCH] feat: Enhanced tensor and activations modules with comprehensive educational content MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Added package structure documentation explaining modules/source/ vs tinytorch.core. - Enhanced mathematical foundations with linear algebra refresher and Universal Approximation Theorem - Added real-world applications for each activation function (ReLU, Sigmoid, Tanh, Softmax) - Included mathematical properties, derivatives, ranges, and computational costs - Added performance considerations and numerical stability explanations - Connected to production ML systems (PyTorch, TensorFlow, JAX equivalents) - Implemented streamlined 'tito export' command with automatic .py โ†’ .ipynb conversion - All functionality preserved: scripts run correctly, tests pass, package integration works - Ready to continue with remaining modules (layers, networks, cnn, dataloader) --- modules/source/01_tensor/tensor_dev.ipynb | 789 ++++++++++++ modules/source/01_tensor/tensor_dev.py | 1072 +++++++---------- .../source/01_tensor/tensor_dev_enhanced.py | 408 ------- .../02_activations/activations_dev.ipynb | 894 ++++++++++++++ .../source/02_activations/activations_dev.py | 993 ++++++--------- tinytorch/_modidx.py | 59 +- tinytorch/core/activations.py | 246 ++++ tinytorch/core/tensor.py | 297 +++++ tito/commands/export.py | 67 +- tito/main.py | 8 +- 10 files changed, 3148 insertions(+), 1685 deletions(-) create mode 100644 modules/source/01_tensor/tensor_dev.ipynb delete mode 100644 modules/source/01_tensor/tensor_dev_enhanced.py create mode 100644 modules/source/02_activations/activations_dev.ipynb create mode 100644 tinytorch/core/activations.py create mode 100644 tinytorch/core/tensor.py diff --git a/modules/source/01_tensor/tensor_dev.ipynb b/modules/source/01_tensor/tensor_dev.ipynb new file mode 100644 index 00000000..1e1bc023 --- /dev/null +++ b/modules/source/01_tensor/tensor_dev.ipynb @@ -0,0 +1,789 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e37ae542", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "# Module 1: Tensor - Core Data Structure\n", + "\n", + "Welcome to the Tensor module! This is where TinyTorch really begins. You'll implement the fundamental data structure that powers all ML systems.\n", + "\n", + "## Learning Goals\n", + "- Understand tensors as N-dimensional arrays with ML-specific operations\n", + "- Implement a complete Tensor class with arithmetic operations\n", + "- Handle shape management, data types, and memory layout\n", + "- Build the foundation for neural networks and automatic differentiation\n", + "- Master the NBGrader workflow with comprehensive testing\n", + "\n", + "## Build โ†’ Use โ†’ Understand\n", + "1. **Build**: Create the Tensor class with core operations\n", + "2. **Use**: Perform tensor arithmetic and transformations\n", + "3. **Understand**: How tensors form the foundation of ML systems" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "af571489", + "metadata": { + "nbgrader": { + "grade": false, + "grade_id": "tensor-imports", + "locked": false, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "#| default_exp core.tensor\n", + "\n", + "#| export\n", + "import numpy as np\n", + "import sys\n", + "from typing import Union, List, Tuple, Optional, Any" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16eb7a23", + "metadata": { + "nbgrader": { + "grade": false, + "grade_id": "tensor-setup", + "locked": false, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "print(\"๐Ÿ”ฅ TinyTorch Tensor Module\")\n", + "print(f\"NumPy version: {np.__version__}\")\n", + "print(f\"Python version: {sys.version_info.major}.{sys.version_info.minor}\")\n", + "print(\"Ready to build tensors!\")" + ] + }, + { + "cell_type": "markdown", + "id": "79347f07", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "## ๐Ÿ“ฆ Where This Code Lives in the Final Package\n", + "\n", + "**Learning Side:** You work in `modules/source/01_tensor/tensor_dev.py` \n", + "**Building Side:** Code exports to `tinytorch.core.tensor`\n", + "\n", + "```python\n", + "# Final package structure:\n", + "from tinytorch.core.tensor import Tensor # The foundation of everything!\n", + "from tinytorch.core.activations import ReLU, Sigmoid, Tanh\n", + "from tinytorch.core.layers import Dense, Conv2D\n", + "```\n", + "\n", + "**Why this matters:**\n", + "- **Learning:** Focused modules for deep understanding\n", + "- **Production:** Proper organization like PyTorch's `torch.Tensor`\n", + "- **Consistency:** All tensor operations live together in `core.tensor`\n", + "- **Foundation:** Every other module depends on Tensor" + ] + }, + { + "cell_type": "markdown", + "id": "0fb9e8f5", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "## Step 1: What is a Tensor?\n", + "\n", + "### Definition\n", + "A **tensor** is an N-dimensional array with ML-specific operations. Think of it as a container that can hold data in multiple dimensions:\n", + "\n", + "- **Scalar** (0D): A single number - `5.0`\n", + "- **Vector** (1D): A list of numbers - `[1, 2, 3]` \n", + "- **Matrix** (2D): A 2D array - `[[1, 2], [3, 4]]`\n", + "- **Higher dimensions**: 3D, 4D, etc. for images, video, batches\n", + "\n", + "### Why Tensors Matter in ML\n", + "Tensors are the foundation of all machine learning because:\n", + "- **Neural networks** process tensors (images, text, audio)\n", + "- **Batch processing** requires multiple samples at once\n", + "- **GPU acceleration** works efficiently with tensors\n", + "- **Automatic differentiation** needs structured data\n", + "\n", + "### Real-World Examples\n", + "- **Image**: 3D tensor `(height, width, channels)` - `(224, 224, 3)` for RGB images\n", + "- **Batch of images**: 4D tensor `(batch_size, height, width, channels)` - `(32, 224, 224, 3)`\n", + "- **Text**: 2D tensor `(sequence_length, embedding_dim)` - `(100, 768)` for BERT embeddings\n", + "- **Audio**: 2D tensor `(time_steps, features)` - `(16000, 1)` for 1 second of audio\n", + "\n", + "### Why Not Just Use NumPy?\n", + "We will use NumPy internally, but our Tensor class adds:\n", + "- **ML-specific operations** (later: gradients, GPU support)\n", + "- **Consistent API** for neural networks\n", + "- **Type safety** and error checking\n", + "- **Integration** with the rest of TinyTorch\n", + "\n", + "Let's start building!" + ] + }, + { + "cell_type": "markdown", + "id": "211f7216", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "## ๐Ÿง  The Mathematical Foundation\n", + "\n", + "### Linear Algebra Refresher\n", + "Tensors are generalizations of scalars, vectors, and matrices:\n", + "\n", + "```\n", + "Scalar (0D): 5\n", + "Vector (1D): [1, 2, 3]\n", + "Matrix (2D): [[1, 2], [3, 4]]\n", + "Tensor (3D): [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]\n", + "```\n", + "\n", + "### Why This Matters for Neural Networks\n", + "- **Forward Pass**: Matrix multiplication between layers\n", + "- **Batch Processing**: Multiple samples processed simultaneously\n", + "- **Convolutions**: 3D operations on image data\n", + "- **Gradients**: Derivatives computed across all dimensions\n", + "\n", + "### Connection to Real ML Systems\n", + "Every major ML framework uses tensors:\n", + "- **PyTorch**: `torch.Tensor`\n", + "- **TensorFlow**: `tf.Tensor`\n", + "- **JAX**: `jax.numpy.ndarray`\n", + "- **TinyTorch**: `tinytorch.core.tensor.Tensor` (what we're building!)\n", + "\n", + "### Performance Considerations\n", + "- **Memory Layout**: Contiguous arrays for cache efficiency\n", + "- **Vectorization**: SIMD operations for speed\n", + "- **Broadcasting**: Efficient operations on different shapes\n", + "- **Type Consistency**: Avoiding unnecessary conversions" + ] + }, + { + "cell_type": "markdown", + "id": "3b5dc139", + "metadata": { + "cell_marker": "\"\"\"", + "lines_to_next_cell": 1 + }, + "source": [ + "## Step 2: The Tensor Class Foundation\n", + "\n", + "### Core Concept\n", + "Our Tensor class wraps NumPy arrays with ML-specific functionality. It needs to:\n", + "- Handle different input types (scalars, lists, numpy arrays)\n", + "- Provide consistent shape and type information\n", + "- Support arithmetic operations\n", + "- Maintain compatibility with the rest of TinyTorch\n", + "\n", + "### Design Principles\n", + "- **Simplicity**: Easy to create and use\n", + "- **Consistency**: Predictable behavior across operations\n", + "- **Performance**: Efficient NumPy backend\n", + "- **Extensibility**: Ready for future features (gradients, GPU)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5368e89", + "metadata": { + "lines_to_next_cell": 1, + "nbgrader": { + "grade": false, + "grade_id": "tensor-class", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#| export\n", + "class Tensor:\n", + " \"\"\"\n", + " TinyTorch Tensor: N-dimensional array with ML operations.\n", + " \n", + " The fundamental data structure for all TinyTorch operations.\n", + " Wraps NumPy arrays with ML-specific functionality.\n", + " \"\"\"\n", + " \n", + " def __init__(self, data: Union[int, float, List, np.ndarray], dtype: Optional[str] = None):\n", + " \"\"\"\n", + " Create a new tensor from data.\n", + " \n", + " Args:\n", + " data: Input data (scalar, list, or numpy array)\n", + " dtype: Data type ('float32', 'int32', etc.). Defaults to auto-detect.\n", + " \n", + " TODO: Implement tensor creation with proper type handling.\n", + " \n", + " STEP-BY-STEP:\n", + " 1. Check if data is a scalar (int/float) - convert to numpy array\n", + " 2. Check if data is a list - convert to numpy array \n", + " 3. Check if data is already a numpy array - use as-is\n", + " 4. Apply dtype conversion if specified\n", + " 5. Store the result in self._data\n", + " \n", + " EXAMPLE:\n", + " Tensor(5) โ†’ stores np.array(5)\n", + " Tensor([1, 2, 3]) โ†’ stores np.array([1, 2, 3])\n", + " Tensor(np.array([1, 2, 3])) โ†’ stores the array directly\n", + " \n", + " HINTS:\n", + " - Use isinstance() to check data types\n", + " - Use np.array() for conversion\n", + " - Handle dtype parameter for type conversion\n", + " - Store the array in self._data\n", + " \"\"\"\n", + " ### BEGIN SOLUTION\n", + " # Convert input to numpy array\n", + " if isinstance(data, (int, float, np.number)):\n", + " # Handle Python and NumPy scalars\n", + " if dtype is None:\n", + " # Auto-detect type: int for integers, float32 for floats\n", + " if isinstance(data, int) or (isinstance(data, np.number) and np.issubdtype(type(data), np.integer)):\n", + " dtype = 'int32'\n", + " else:\n", + " dtype = 'float32'\n", + " self._data = np.array(data, dtype=dtype)\n", + " elif isinstance(data, list):\n", + " # Let NumPy auto-detect type, then convert if needed\n", + " temp_array = np.array(data)\n", + " if dtype is None:\n", + " # Use NumPy's auto-detected type, but prefer float32 for floats\n", + " if temp_array.dtype == np.float64:\n", + " dtype = 'float32'\n", + " else:\n", + " dtype = str(temp_array.dtype)\n", + " self._data = np.array(data, dtype=dtype)\n", + " elif isinstance(data, np.ndarray):\n", + " # Already a numpy array\n", + " if dtype is None:\n", + " # Keep existing dtype, but prefer float32 for float64\n", + " if data.dtype == np.float64:\n", + " dtype = 'float32'\n", + " else:\n", + " dtype = str(data.dtype)\n", + " self._data = data.astype(dtype) if dtype != data.dtype else data.copy()\n", + " else:\n", + " # Try to convert unknown types\n", + " self._data = np.array(data, dtype=dtype)\n", + " ### END SOLUTION\n", + " \n", + " @property\n", + " def data(self) -> np.ndarray:\n", + " \"\"\"\n", + " Access underlying numpy array.\n", + " \n", + " TODO: Return the stored numpy array.\n", + " \n", + " HINT: Return self._data (the array you stored in __init__)\n", + " \"\"\"\n", + " ### BEGIN SOLUTION\n", + " return self._data\n", + " ### END SOLUTION\n", + " \n", + " @property\n", + " def shape(self) -> Tuple[int, ...]:\n", + " \"\"\"\n", + " Get tensor shape.\n", + " \n", + " TODO: Return the shape of the stored numpy array.\n", + " \n", + " HINT: Use .shape attribute of the numpy array\n", + " EXAMPLE: Tensor([1, 2, 3]).shape should return (3,)\n", + " \"\"\"\n", + " ### BEGIN SOLUTION\n", + " return self._data.shape\n", + " ### END SOLUTION\n", + " \n", + " @property\n", + " def size(self) -> int:\n", + " \"\"\"\n", + " Get total number of elements.\n", + " \n", + " TODO: Return the total number of elements in the tensor.\n", + " \n", + " HINT: Use .size attribute of the numpy array\n", + " EXAMPLE: Tensor([1, 2, 3]).size should return 3\n", + " \"\"\"\n", + " ### BEGIN SOLUTION\n", + " return self._data.size\n", + " ### END SOLUTION\n", + " \n", + " @property\n", + " def dtype(self) -> np.dtype:\n", + " \"\"\"\n", + " Get data type as numpy dtype.\n", + " \n", + " TODO: Return the data type of the stored numpy array.\n", + " \n", + " HINT: Use .dtype attribute of the numpy array\n", + " EXAMPLE: Tensor([1, 2, 3]).dtype should return dtype('int32')\n", + " \"\"\"\n", + " ### BEGIN SOLUTION\n", + " return self._data.dtype\n", + " ### END SOLUTION\n", + " \n", + " def __repr__(self) -> str:\n", + " \"\"\"\n", + " String representation.\n", + " \n", + " TODO: Create a clear string representation of the tensor.\n", + " \n", + " APPROACH:\n", + " 1. Convert the numpy array to a list for readable output\n", + " 2. Include the shape and dtype information\n", + " 3. Format: \"Tensor([data], shape=shape, dtype=dtype)\"\n", + " \n", + " EXAMPLE:\n", + " Tensor([1, 2, 3]) โ†’ \"Tensor([1, 2, 3], shape=(3,), dtype=int32)\"\n", + " \n", + " HINTS:\n", + " - Use .tolist() to convert numpy array to list\n", + " - Include shape and dtype information\n", + " - Keep format consistent and readable\n", + " \"\"\"\n", + " ### BEGIN SOLUTION\n", + " return f\"Tensor({self._data.tolist()}, shape={self.shape}, dtype={self.dtype})\"\n", + " ### END SOLUTION\n", + " \n", + " def add(self, other: 'Tensor') -> 'Tensor':\n", + " \"\"\"\n", + " Add two tensors element-wise.\n", + " \n", + " TODO: Implement tensor addition.\n", + " \n", + " APPROACH:\n", + " 1. Add the numpy arrays using +\n", + " 2. Return a new Tensor with the result\n", + " 3. Handle broadcasting automatically\n", + " \n", + " EXAMPLE:\n", + " Tensor([1, 2]) + Tensor([3, 4]) โ†’ Tensor([4, 6])\n", + " \n", + " HINTS:\n", + " - Use self._data + other._data\n", + " - Return Tensor(result)\n", + " - NumPy handles broadcasting automatically\n", + " \"\"\"\n", + " ### BEGIN SOLUTION\n", + " result = self._data + other._data\n", + " return Tensor(result)\n", + " ### END SOLUTION\n", + "\n", + " def multiply(self, other: 'Tensor') -> 'Tensor':\n", + " \"\"\"\n", + " Multiply two tensors element-wise.\n", + " \n", + " TODO: Implement tensor multiplication.\n", + " \n", + " APPROACH:\n", + " 1. Multiply the numpy arrays using *\n", + " 2. Return a new Tensor with the result\n", + " 3. Handle broadcasting automatically\n", + " \n", + " EXAMPLE:\n", + " Tensor([1, 2]) * Tensor([3, 4]) โ†’ Tensor([3, 8])\n", + " \n", + " HINTS:\n", + " - Use self._data * other._data\n", + " - Return Tensor(result)\n", + " - This is element-wise, not matrix multiplication\n", + " \"\"\"\n", + " ### BEGIN SOLUTION\n", + " result = self._data * other._data\n", + " return Tensor(result)\n", + " ### END SOLUTION\n", + "\n", + " def __add__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n", + " \"\"\"\n", + " Addition operator: tensor + other\n", + " \n", + " TODO: Implement + operator for tensors.\n", + " \n", + " APPROACH:\n", + " 1. If other is a Tensor, use tensor addition\n", + " 2. If other is a scalar, convert to Tensor first\n", + " 3. Return the result\n", + " \n", + " EXAMPLE:\n", + " Tensor([1, 2]) + Tensor([3, 4]) โ†’ Tensor([4, 6])\n", + " Tensor([1, 2]) + 5 โ†’ Tensor([6, 7])\n", + " \"\"\"\n", + " ### BEGIN SOLUTION\n", + " if isinstance(other, Tensor):\n", + " return self.add(other)\n", + " else:\n", + " return self.add(Tensor(other))\n", + " ### END SOLUTION\n", + "\n", + " def __mul__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n", + " \"\"\"\n", + " Multiplication operator: tensor * other\n", + " \n", + " TODO: Implement * operator for tensors.\n", + " \n", + " APPROACH:\n", + " 1. If other is a Tensor, use tensor multiplication\n", + " 2. If other is a scalar, convert to Tensor first\n", + " 3. Return the result\n", + " \n", + " EXAMPLE:\n", + " Tensor([1, 2]) * Tensor([3, 4]) โ†’ Tensor([3, 8])\n", + " Tensor([1, 2]) * 3 โ†’ Tensor([3, 6])\n", + " \"\"\"\n", + " ### BEGIN SOLUTION\n", + " if isinstance(other, Tensor):\n", + " return self.multiply(other)\n", + " else:\n", + " return self.multiply(Tensor(other))\n", + " ### END SOLUTION\n", + "\n", + " def __sub__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n", + " \"\"\"\n", + " Subtraction operator: tensor - other\n", + " \n", + " TODO: Implement - operator for tensors.\n", + " \n", + " APPROACH:\n", + " 1. Convert other to Tensor if needed\n", + " 2. Subtract using numpy arrays\n", + " 3. Return new Tensor with result\n", + " \n", + " EXAMPLE:\n", + " Tensor([5, 6]) - Tensor([1, 2]) โ†’ Tensor([4, 4])\n", + " Tensor([5, 6]) - 1 โ†’ Tensor([4, 5])\n", + " \"\"\"\n", + " ### BEGIN SOLUTION\n", + " if isinstance(other, Tensor):\n", + " result = self._data - other._data\n", + " else:\n", + " result = self._data - other\n", + " return Tensor(result)\n", + " ### END SOLUTION\n", + "\n", + " def __truediv__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n", + " \"\"\"\n", + " Division operator: tensor / other\n", + " \n", + " TODO: Implement / operator for tensors.\n", + " \n", + " APPROACH:\n", + " 1. Convert other to Tensor if needed\n", + " 2. Divide using numpy arrays\n", + " 3. Return new Tensor with result\n", + " \n", + " EXAMPLE:\n", + " Tensor([6, 8]) / Tensor([2, 4]) โ†’ Tensor([3, 2])\n", + " Tensor([6, 8]) / 2 โ†’ Tensor([3, 4])\n", + " \"\"\"\n", + " ### BEGIN SOLUTION\n", + " if isinstance(other, Tensor):\n", + " result = self._data / other._data\n", + " else:\n", + " result = self._data / other\n", + " return Tensor(result)\n", + " ### END SOLUTION" + ] + }, + { + "cell_type": "markdown", + "id": "cebcc1d6", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "## Step 3: Tensor Arithmetic Operations\n", + "\n", + "### Why Arithmetic Matters\n", + "Tensor arithmetic is the foundation of all neural network operations:\n", + "- **Forward pass**: Matrix multiplications and additions\n", + "- **Activation functions**: Element-wise operations\n", + "- **Loss computation**: Differences and squares\n", + "- **Gradient computation**: Chain rule applications\n", + "\n", + "### Operations We'll Implement\n", + "- **Addition**: Element-wise addition of tensors\n", + "- **Multiplication**: Element-wise multiplication\n", + "- **Python operators**: `+`, `-`, `*`, `/` for natural syntax\n", + "- **Broadcasting**: Handle different shapes automatically" + ] + }, + { + "cell_type": "markdown", + "id": "5afc47f3", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "## Step 3: Tensor Arithmetic Methods\n", + "\n", + "The arithmetic methods are now part of the Tensor class above. Let's test them!" + ] + }, + { + "cell_type": "markdown", + "id": "04dc4fac", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "## Step 4: Python Operator Overloading\n", + "\n", + "### Why Operator Overloading?\n", + "Python's magic methods allow us to use natural syntax:\n", + "- `a + b` instead of `a.add(b)`\n", + "- `a * b` instead of `a.multiply(b)`\n", + "- `a - b` for subtraction\n", + "- `a / b` for division\n", + "\n", + "This makes tensor operations feel natural and readable." + ] + }, + { + "cell_type": "markdown", + "id": "35ae8a76", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "## Step 4: Operator Overloading\n", + "\n", + "The operator methods (__add__, __mul__, __sub__, __truediv__) are now part of the Tensor class above. This enables natural syntax like `a + b` and `a * b`." + ] + }, + { + "cell_type": "markdown", + "id": "1a00809c", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "### ๐Ÿงช Test Your Tensor Implementation\n", + "\n", + "Once you implement the Tensor class above, run these cells to test your implementation:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ac88fbc", + "metadata": { + "nbgrader": { + "grade": true, + "grade_id": "test-tensor-creation", + "locked": true, + "points": 25, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "# Test tensor creation and properties\n", + "print(\"Testing tensor creation...\")\n", + "\n", + "# Test scalar creation\n", + "scalar = Tensor(5.0)\n", + "assert scalar.shape == (), f\"Scalar shape should be (), got {scalar.shape}\"\n", + "assert scalar.size == 1, f\"Scalar size should be 1, got {scalar.size}\"\n", + "assert scalar.data.item() == 5.0, f\"Scalar value should be 5.0, got {scalar.data.item()}\"\n", + "\n", + "# Test vector creation\n", + "vector = Tensor([1, 2, 3])\n", + "assert vector.shape == (3,), f\"Vector shape should be (3,), got {vector.shape}\"\n", + "assert vector.size == 3, f\"Vector size should be 3, got {vector.size}\"\n", + "assert np.array_equal(vector.data, np.array([1, 2, 3])), \"Vector data mismatch\"\n", + "\n", + "# Test matrix creation\n", + "matrix = Tensor([[1, 2], [3, 4]])\n", + "assert matrix.shape == (2, 2), f\"Matrix shape should be (2, 2), got {matrix.shape}\"\n", + "assert matrix.size == 4, f\"Matrix size should be 4, got {matrix.size}\"\n", + "assert np.array_equal(matrix.data, np.array([[1, 2], [3, 4]])), \"Matrix data mismatch\"\n", + "\n", + "# Test dtype handling\n", + "float_tensor = Tensor([1.0, 2.0, 3.0])\n", + "assert float_tensor.dtype == np.float32, f\"Float tensor dtype should be float32, got {float_tensor.dtype}\"\n", + "\n", + "int_tensor = Tensor([1, 2, 3])\n", + "# Note: NumPy may default to int64 on some systems, so we check for integer types\n", + "assert int_tensor.dtype in [np.int32, np.int64], f\"Int tensor dtype should be int32 or int64, got {int_tensor.dtype}\"\n", + "\n", + "print(\"โœ… Tensor creation tests passed!\")\n", + "print(f\"โœ… Scalar: {scalar}\")\n", + "print(f\"โœ… Vector: {vector}\")\n", + "print(f\"โœ… Matrix: {matrix}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "edc7519d", + "metadata": { + "nbgrader": { + "grade": true, + "grade_id": "test-tensor-arithmetic", + "locked": true, + "points": 25, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "# Test tensor arithmetic operations\n", + "print(\"Testing tensor arithmetic...\")\n", + "\n", + "# Test addition\n", + "a = Tensor([1, 2, 3])\n", + "b = Tensor([4, 5, 6])\n", + "c = a + b\n", + "expected = np.array([5, 7, 9])\n", + "assert np.array_equal(c.data, expected), f\"Addition failed: expected {expected}, got {c.data}\"\n", + "\n", + "# Test multiplication\n", + "d = a * b\n", + "expected = np.array([4, 10, 18])\n", + "assert np.array_equal(d.data, expected), f\"Multiplication failed: expected {expected}, got {d.data}\"\n", + "\n", + "# Test subtraction\n", + "e = b - a\n", + "expected = np.array([3, 3, 3])\n", + "assert np.array_equal(e.data, expected), f\"Subtraction failed: expected {expected}, got {e.data}\"\n", + "\n", + "# Test division\n", + "f = b / a\n", + "expected = np.array([4.0, 2.5, 2.0])\n", + "assert np.allclose(f.data, expected), f\"Division failed: expected {expected}, got {f.data}\"\n", + "\n", + "# Test scalar operations\n", + "g = a + 10\n", + "expected = np.array([11, 12, 13])\n", + "assert np.array_equal(g.data, expected), f\"Scalar addition failed: expected {expected}, got {g.data}\"\n", + "\n", + "h = a * 2\n", + "expected = np.array([2, 4, 6])\n", + "assert np.array_equal(h.data, expected), f\"Scalar multiplication failed: expected {expected}, got {h.data}\"\n", + "\n", + "print(\"โœ… Tensor arithmetic tests passed!\")\n", + "print(f\"โœ… Addition: {a} + {b} = {c}\")\n", + "print(f\"โœ… Multiplication: {a} * {b} = {d}\")\n", + "print(f\"โœ… Subtraction: {b} - {a} = {e}\")\n", + "print(f\"โœ… Division: {b} / {a} = {f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba87775f", + "metadata": { + "nbgrader": { + "grade": true, + "grade_id": "test-tensor-broadcasting", + "locked": true, + "points": 25, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "# Test tensor broadcasting\n", + "print(\"Testing tensor broadcasting...\")\n", + "\n", + "# Test scalar broadcasting\n", + "matrix = Tensor([[1, 2], [3, 4]])\n", + "scalar = Tensor(10)\n", + "result = matrix + scalar\n", + "expected = np.array([[11, 12], [13, 14]])\n", + "assert np.array_equal(result.data, expected), f\"Scalar broadcasting failed: expected {expected}, got {result.data}\"\n", + "\n", + "# Test vector broadcasting\n", + "vector = Tensor([1, 2])\n", + "result = matrix + vector\n", + "expected = np.array([[2, 4], [4, 6]])\n", + "assert np.array_equal(result.data, expected), f\"Vector broadcasting failed: expected {expected}, got {result.data}\"\n", + "\n", + "# Test different shapes\n", + "a = Tensor([[1], [2], [3]]) # (3, 1)\n", + "b = Tensor([10, 20]) # (2,)\n", + "result = a + b\n", + "expected = np.array([[11, 21], [12, 22], [13, 23]])\n", + "assert np.array_equal(result.data, expected), f\"Shape broadcasting failed: expected {expected}, got {result.data}\"\n", + "\n", + "print(\"โœ… Tensor broadcasting tests passed!\")\n", + "print(f\"โœ… Matrix + Scalar: {matrix} + {scalar} = {result}\")\n", + "print(f\"โœ… Broadcasting works correctly!\")" + ] + }, + { + "cell_type": "markdown", + "id": "8ac93d30", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "## ๐ŸŽฏ Module Summary\n", + "\n", + "Congratulations! You've successfully implemented the core Tensor class for TinyTorch:\n", + "\n", + "### What You've Accomplished\n", + "โœ… **Tensor Creation**: Handle scalars, vectors, matrices, and higher-dimensional arrays \n", + "โœ… **Data Types**: Proper dtype handling with auto-detection and conversion \n", + "โœ… **Properties**: Shape, size, dtype, and data access \n", + "โœ… **Arithmetic**: Addition, multiplication, subtraction, division \n", + "โœ… **Operators**: Natural Python syntax with `+`, `-`, `*`, `/` \n", + "โœ… **Broadcasting**: Automatic shape compatibility like NumPy \n", + "\n", + "### Key Concepts You've Learned\n", + "- **Tensors** are the fundamental data structure for ML systems\n", + "- **NumPy backend** provides efficient computation with ML-friendly API\n", + "- **Operator overloading** makes tensor operations feel natural\n", + "- **Broadcasting** enables flexible operations between different shapes\n", + "- **Type safety** ensures consistent behavior across operations\n", + "\n", + "### Next Steps\n", + "1. **Export your code**: `tito package nbdev --export 01_tensor`\n", + "2. **Test your implementation**: `tito module test 01_tensor`\n", + "3. **Use your tensors**: \n", + " ```python\n", + " from tinytorch.core.tensor import Tensor\n", + " t = Tensor([1, 2, 3])\n", + " print(t + 5) # Your tensor in action!\n", + " ```\n", + "4. **Move to Module 2**: Start building activation functions!\n", + "\n", + "**Ready for the next challenge?** Let's add the mathematical functions that make neural networks powerful!" + ] + } + ], + "metadata": { + "jupytext": { + "main_language": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/modules/source/01_tensor/tensor_dev.py b/modules/source/01_tensor/tensor_dev.py index bc5efd48..c235972e 100644 --- a/modules/source/01_tensor/tensor_dev.py +++ b/modules/source/01_tensor/tensor_dev.py @@ -19,42 +19,49 @@ Welcome to the Tensor module! This is where TinyTorch really begins. You'll impl - Implement a complete Tensor class with arithmetic operations - Handle shape management, data types, and memory layout - Build the foundation for neural networks and automatic differentiation +- Master the NBGrader workflow with comprehensive testing +## Build โ†’ Use โ†’ Understand +1. **Build**: Create the Tensor class with core operations +2. **Use**: Perform tensor arithmetic and transformations +3. **Understand**: How tensors form the foundation of ML systems """ +# %% nbgrader={"grade": false, "grade_id": "tensor-imports", "locked": false, "schema_version": 3, "solution": false, "task": false} +#| default_exp core.tensor + +#| export +import numpy as np +import sys +from typing import Union, List, Tuple, Optional, Any + +# %% nbgrader={"grade": false, "grade_id": "tensor-setup", "locked": false, "schema_version": 3, "solution": false, "task": false} +print("๐Ÿ”ฅ TinyTorch Tensor Module") +print(f"NumPy version: {np.__version__}") +print(f"Python version: {sys.version_info.major}.{sys.version_info.minor}") +print("Ready to build tensors!") + # %% [markdown] """ ## ๐Ÿ“ฆ Where This Code Lives in the Final Package -**Learning Side:** You work in `assignments/source/01_tensor/tensor_dev.py` +**Learning Side:** You work in `modules/source/01_tensor/tensor_dev.py` **Building Side:** Code exports to `tinytorch.core.tensor` ```python # Final package structure: -from tinytorch.core.tensor import Tensor -from tinytorch.core.layers import Dense, Conv2D +from tinytorch.core.tensor import Tensor # The foundation of everything! from tinytorch.core.activations import ReLU, Sigmoid, Tanh +from tinytorch.core.layers import Dense, Conv2D ``` **Why this matters:** - **Learning:** Focused modules for deep understanding -- **Production:** Proper organization like PyTorch's `torch.tensor` -- **Consistency:** Core data structure lives in `core.tensor` +- **Production:** Proper organization like PyTorch's `torch.Tensor` +- **Consistency:** All tensor operations live together in `core.tensor` +- **Foundation:** Every other module depends on Tensor """ -# %% -#| default_exp core.tensor - -# Setup and imports -import numpy as np -import sys -from typing import Union, List, Tuple, Optional, Any - -print("๐Ÿ”ฅ TinyTorch Tensor Module") -print(f"NumPy version: {np.__version__}") -print(f"Python version: {sys.version_info.major}.{sys.version_info.minor}") -print("Ready to build tensors!") - # %% [markdown] """ ## Step 1: What is a Tensor? @@ -87,20 +94,62 @@ We will use NumPy internally, but our Tensor class adds: - **Type safety** and error checking - **Integration** with the rest of TinyTorch -### Visual Intuition -``` -Scalar (0D): 5.0 -Vector (1D): [1, 2, 3, 4] -Matrix (2D): [[1, 2, 3], - [4, 5, 6]] -3D Tensor: [[[1, 2], [3, 4]], - [[5, 6], [7, 8]]] -``` - Let's start building! """ -# %% +# %% [markdown] +""" +## ๐Ÿง  The Mathematical Foundation + +### Linear Algebra Refresher +Tensors are generalizations of scalars, vectors, and matrices: + +``` +Scalar (0D): 5 +Vector (1D): [1, 2, 3] +Matrix (2D): [[1, 2], [3, 4]] +Tensor (3D): [[[1, 2], [3, 4]], [[5, 6], [7, 8]]] +``` + +### Why This Matters for Neural Networks +- **Forward Pass**: Matrix multiplication between layers +- **Batch Processing**: Multiple samples processed simultaneously +- **Convolutions**: 3D operations on image data +- **Gradients**: Derivatives computed across all dimensions + +### Connection to Real ML Systems +Every major ML framework uses tensors: +- **PyTorch**: `torch.Tensor` +- **TensorFlow**: `tf.Tensor` +- **JAX**: `jax.numpy.ndarray` +- **TinyTorch**: `tinytorch.core.tensor.Tensor` (what we're building!) + +### Performance Considerations +- **Memory Layout**: Contiguous arrays for cache efficiency +- **Vectorization**: SIMD operations for speed +- **Broadcasting**: Efficient operations on different shapes +- **Type Consistency**: Avoiding unnecessary conversions +""" + +# %% [markdown] +""" +## Step 2: The Tensor Class Foundation + +### Core Concept +Our Tensor class wraps NumPy arrays with ML-specific functionality. It needs to: +- Handle different input types (scalars, lists, numpy arrays) +- Provide consistent shape and type information +- Support arithmetic operations +- Maintain compatibility with the rest of TinyTorch + +### Design Principles +- **Simplicity**: Easy to create and use +- **Consistency**: Predictable behavior across operations +- **Performance**: Efficient NumPy backend +- **Extensibility**: Ready for future features (gradients, GPU) +""" + +# %% nbgrader={"grade": false, "grade_id": "tensor-class", "locked": false, "schema_version": 3, "solution": true, "task": false} #| export class Tensor: """ @@ -108,24 +157,6 @@ class Tensor: The fundamental data structure for all TinyTorch operations. Wraps NumPy arrays with ML-specific functionality. - - TODO: Implement the core Tensor class with data handling and properties. - - APPROACH: - 1. Store the input data as a NumPy array internally - 2. Handle different input types (scalars, lists, numpy arrays) - 3. Implement properties to access shape, size, and data type - 4. Create a clear string representation - - EXAMPLE: - Input: Tensor([1, 2, 3]) - Expected: Tensor with shape (3,), size 3, dtype int32 - - HINTS: - - Use NumPy's np.array() to convert inputs - - Handle dtype parameter for type conversion - - Store the array in a private attribute like self._data - - Properties should return information about the stored array """ def __init__(self, data: Union[int, float, List, np.ndarray], dtype: Optional[str] = None): @@ -149,8 +180,47 @@ class Tensor: Tensor(5) โ†’ stores np.array(5) Tensor([1, 2, 3]) โ†’ stores np.array([1, 2, 3]) Tensor(np.array([1, 2, 3])) โ†’ stores the array directly + + HINTS: + - Use isinstance() to check data types + - Use np.array() for conversion + - Handle dtype parameter for type conversion + - Store the array in self._data """ - raise NotImplementedError("Student implementation required") + ### BEGIN SOLUTION + # Convert input to numpy array + if isinstance(data, (int, float, np.number)): + # Handle Python and NumPy scalars + if dtype is None: + # Auto-detect type: int for integers, float32 for floats + if isinstance(data, int) or (isinstance(data, np.number) and np.issubdtype(type(data), np.integer)): + dtype = 'int32' + else: + dtype = 'float32' + self._data = np.array(data, dtype=dtype) + elif isinstance(data, list): + # Let NumPy auto-detect type, then convert if needed + temp_array = np.array(data) + if dtype is None: + # Use NumPy's auto-detected type, but prefer float32 for floats + if temp_array.dtype == np.float64: + dtype = 'float32' + else: + dtype = str(temp_array.dtype) + self._data = np.array(data, dtype=dtype) + elif isinstance(data, np.ndarray): + # Already a numpy array + if dtype is None: + # Keep existing dtype, but prefer float32 for float64 + if data.dtype == np.float64: + dtype = 'float32' + else: + dtype = str(data.dtype) + self._data = data.astype(dtype) if dtype != data.dtype else data.copy() + else: + # Try to convert unknown types + self._data = np.array(data, dtype=dtype) + ### END SOLUTION @property def data(self) -> np.ndarray: @@ -161,7 +231,9 @@ class Tensor: HINT: Return self._data (the array you stored in __init__) """ - raise NotImplementedError("Student implementation required") + ### BEGIN SOLUTION + return self._data + ### END SOLUTION @property def shape(self) -> Tuple[int, ...]: @@ -173,7 +245,9 @@ class Tensor: HINT: Use .shape attribute of the numpy array EXAMPLE: Tensor([1, 2, 3]).shape should return (3,) """ - raise NotImplementedError("Student implementation required") + ### BEGIN SOLUTION + return self._data.shape + ### END SOLUTION @property def size(self) -> int: @@ -185,7 +259,9 @@ class Tensor: HINT: Use .size attribute of the numpy array EXAMPLE: Tensor([1, 2, 3]).size should return 3 """ - raise NotImplementedError("Student implementation required") + ### BEGIN SOLUTION + return self._data.size + ### END SOLUTION @property def dtype(self) -> np.dtype: @@ -197,7 +273,9 @@ class Tensor: HINT: Use .dtype attribute of the numpy array EXAMPLE: Tensor([1, 2, 3]).dtype should return dtype('int32') """ - raise NotImplementedError("Student implementation required") + ### BEGIN SOLUTION + return self._data.dtype + ### END SOLUTION def __repr__(self) -> str: """ @@ -212,680 +290,342 @@ class Tensor: EXAMPLE: Tensor([1, 2, 3]) โ†’ "Tensor([1, 2, 3], shape=(3,), dtype=int32)" - """ - raise NotImplementedError("Student implementation required") - -# %% -#| hide -#| export -class Tensor: - """ - TinyTorch Tensor: N-dimensional array with ML operations. - - The fundamental data structure for all TinyTorch operations. - Wraps NumPy arrays with ML-specific functionality. - """ - - def __init__(self, data: Union[int, float, List, np.ndarray], dtype: Optional[str] = None): - """ - Create a new tensor from data. - Args: - data: Input data (scalar, list, or numpy array) - dtype: Data type ('float32', 'int32', etc.). Defaults to auto-detect. + HINTS: + - Use .tolist() to convert numpy array to list + - Include shape and dtype information + - Keep format consistent and readable """ - # Convert input to numpy array - if isinstance(data, (int, float, np.number)): - # Handle Python and NumPy scalars - if dtype is None: - # Auto-detect type: int for integers, float32 for floats - if isinstance(data, int) or (isinstance(data, np.number) and np.issubdtype(type(data), np.integer)): - dtype = 'int32' - else: - dtype = 'float32' - self._data = np.array(data, dtype=dtype) - elif isinstance(data, list): - # Let NumPy auto-detect type, then convert if needed - temp_array = np.array(data) - if dtype is None: - # Keep NumPy's auto-detected type, but prefer common ML types - if np.issubdtype(temp_array.dtype, np.integer): - dtype = 'int32' - elif np.issubdtype(temp_array.dtype, np.floating): - dtype = 'float32' - else: - dtype = temp_array.dtype - self._data = temp_array.astype(dtype) - elif isinstance(data, np.ndarray): - self._data = data.astype(dtype or data.dtype) - else: - raise TypeError(f"Cannot create tensor from {type(data)}") - - @property - def data(self) -> np.ndarray: - """Access underlying numpy array.""" - return self._data - - @property - def shape(self) -> Tuple[int, ...]: - """Get tensor shape.""" - return self._data.shape - - @property - def size(self) -> int: - """Get total number of elements.""" - return self._data.size - - @property - def dtype(self) -> np.dtype: - """Get data type as numpy dtype.""" - return self._data.dtype - - def __repr__(self) -> str: - """String representation.""" + ### BEGIN SOLUTION return f"Tensor({self._data.tolist()}, shape={self.shape}, dtype={self.dtype})" + ### END SOLUTION def add(self, other: 'Tensor') -> 'Tensor': """ - Add another tensor to this tensor. + Add two tensors element-wise. - TODO: Implement tensor addition as a method. + TODO: Implement tensor addition. APPROACH: - 1. Use the add_tensors function you already implemented - 2. Or implement the addition directly using self._data + other._data - 3. Return a new Tensor with the result + 1. Add the numpy arrays using + + 2. Return a new Tensor with the result + 3. Handle broadcasting automatically EXAMPLE: - Tensor([1, 2, 3]).add(Tensor([4, 5, 6])) โ†’ Tensor([5, 7, 9]) + Tensor([1, 2]) + Tensor([3, 4]) โ†’ Tensor([4, 6]) HINTS: - - You can reuse add_tensors(self, other) - - Or implement directly: Tensor(self._data + other._data) + - Use self._data + other._data + - Return Tensor(result) + - NumPy handles broadcasting automatically """ - raise NotImplementedError("Student implementation required") - + ### BEGIN SOLUTION + result = self._data + other._data + return Tensor(result) + ### END SOLUTION + def multiply(self, other: 'Tensor') -> 'Tensor': """ - Multiply this tensor by another tensor. + Multiply two tensors element-wise. - TODO: Implement tensor multiplication as a method. + TODO: Implement tensor multiplication. APPROACH: - 1. Use the multiply_tensors function you already implemented - 2. Or implement the multiplication directly using self._data * other._data - 3. Return a new Tensor with the result + 1. Multiply the numpy arrays using * + 2. Return a new Tensor with the result + 3. Handle broadcasting automatically EXAMPLE: - Tensor([1, 2, 3]).multiply(Tensor([4, 5, 6])) โ†’ Tensor([4, 10, 18]) + Tensor([1, 2]) * Tensor([3, 4]) โ†’ Tensor([3, 8]) HINTS: - - You can reuse multiply_tensors(self, other) - - Or implement directly: Tensor(self._data * other._data) + - Use self._data * other._data + - Return Tensor(result) + - This is element-wise, not matrix multiplication """ - raise NotImplementedError("Student implementation required") - - # Arithmetic operators for natural syntax (a + b, a * b, etc.) + ### BEGIN SOLUTION + result = self._data * other._data + return Tensor(result) + ### END SOLUTION + def __add__(self, other: Union['Tensor', int, float]) -> 'Tensor': - """Addition: tensor + other""" + """ + Addition operator: tensor + other + + TODO: Implement + operator for tensors. + + APPROACH: + 1. If other is a Tensor, use tensor addition + 2. If other is a scalar, convert to Tensor first + 3. Return the result + + EXAMPLE: + Tensor([1, 2]) + Tensor([3, 4]) โ†’ Tensor([4, 6]) + Tensor([1, 2]) + 5 โ†’ Tensor([6, 7]) + """ + ### BEGIN SOLUTION if isinstance(other, Tensor): - return Tensor(self._data + other._data) - else: # scalar - return Tensor(self._data + other) - - def __radd__(self, other: Union[int, float]) -> 'Tensor': - """Reverse addition: scalar + tensor""" - return Tensor(other + self._data) - - def __sub__(self, other: Union['Tensor', int, float]) -> 'Tensor': - """Subtraction: tensor - other""" - if isinstance(other, Tensor): - return Tensor(self._data - other._data) - else: # scalar - return Tensor(self._data - other) - - def __rsub__(self, other: Union[int, float]) -> 'Tensor': - """Reverse subtraction: scalar - tensor""" - return Tensor(other - self._data) - + return self.add(other) + else: + return self.add(Tensor(other)) + ### END SOLUTION + def __mul__(self, other: Union['Tensor', int, float]) -> 'Tensor': - """Multiplication: tensor * other""" + """ + Multiplication operator: tensor * other + + TODO: Implement * operator for tensors. + + APPROACH: + 1. If other is a Tensor, use tensor multiplication + 2. If other is a scalar, convert to Tensor first + 3. Return the result + + EXAMPLE: + Tensor([1, 2]) * Tensor([3, 4]) โ†’ Tensor([3, 8]) + Tensor([1, 2]) * 3 โ†’ Tensor([3, 6]) + """ + ### BEGIN SOLUTION if isinstance(other, Tensor): - return Tensor(self._data * other._data) - else: # scalar - return Tensor(self._data * other) - - def __rmul__(self, other: Union[int, float]) -> 'Tensor': - """Reverse multiplication: scalar * tensor""" - return Tensor(other * self._data) - + return self.multiply(other) + else: + return self.multiply(Tensor(other)) + ### END SOLUTION + + def __sub__(self, other: Union['Tensor', int, float]) -> 'Tensor': + """ + Subtraction operator: tensor - other + + TODO: Implement - operator for tensors. + + APPROACH: + 1. Convert other to Tensor if needed + 2. Subtract using numpy arrays + 3. Return new Tensor with result + + EXAMPLE: + Tensor([5, 6]) - Tensor([1, 2]) โ†’ Tensor([4, 4]) + Tensor([5, 6]) - 1 โ†’ Tensor([4, 5]) + """ + ### BEGIN SOLUTION + if isinstance(other, Tensor): + result = self._data - other._data + else: + result = self._data - other + return Tensor(result) + ### END SOLUTION + def __truediv__(self, other: Union['Tensor', int, float]) -> 'Tensor': - """Division: tensor / other""" + """ + Division operator: tensor / other + + TODO: Implement / operator for tensors. + + APPROACH: + 1. Convert other to Tensor if needed + 2. Divide using numpy arrays + 3. Return new Tensor with result + + EXAMPLE: + Tensor([6, 8]) / Tensor([2, 4]) โ†’ Tensor([3, 2]) + Tensor([6, 8]) / 2 โ†’ Tensor([3, 4]) + """ + ### BEGIN SOLUTION if isinstance(other, Tensor): - return Tensor(self._data / other._data) - else: # scalar - return Tensor(self._data / other) - - def __rtruediv__(self, other: Union[int, float]) -> 'Tensor': - """Reverse division: scalar / tensor""" - return Tensor(other / self._data) + result = self._data / other._data + else: + result = self._data / other + return Tensor(result) + ### END SOLUTION # %% [markdown] """ -### ๐Ÿงช Test Your Tensor Class - -Once you implement the Tensor class above, run this cell to test it: -""" - -# %% -# Test basic tensor creation -print("Testing Tensor creation...") - -try: - # Test scalar - t1 = Tensor(5) - print(f"โœ… Scalar: {t1} (shape: {t1.shape}, size: {t1.size})") - - # Test vector - t2 = Tensor([1, 2, 3, 4]) - print(f"โœ… Vector: {t2} (shape: {t2.shape}, size: {t2.size})") - - # Test matrix - t3 = Tensor([[1, 2], [3, 4]]) - print(f"โœ… Matrix: {t3} (shape: {t3.shape}, size: {t3.size})") - - # Test numpy array - t4 = Tensor(np.array([1.0, 2.0, 3.0])) - print(f"โœ… Numpy: {t4} (shape: {t4.shape}, size: {t4.size})") - - # Test dtype - t5 = Tensor([1, 2, 3], dtype='float32') - print(f"โœ… Dtype: {t5} (dtype: {t5.dtype})") - - print("\n๐ŸŽ‰ All basic tests passed! Your Tensor class is working!") - -except Exception as e: - print(f"โŒ Error: {e}") - print("Make sure to implement all the required methods!") - -# %% [markdown] -""" -## Step 2: Tensor Arithmetic Operations - -Now let's add the ability to perform mathematical operations on tensors. This is where tensors become powerful for ML! +## Step 3: Tensor Arithmetic Operations ### Why Arithmetic Matters -- **Neural networks** perform millions of arithmetic operations -- **Gradients** require addition, multiplication, and other operations -- **Batch processing** needs element-wise operations -- **GPU acceleration** works with parallel arithmetic +Tensor arithmetic is the foundation of all neural network operations: +- **Forward pass**: Matrix multiplications and additions +- **Activation functions**: Element-wise operations +- **Loss computation**: Differences and squares +- **Gradient computation**: Chain rule applications -### Types of Operations -1. **Element-wise**: Add, subtract, multiply, divide -2. **Broadcasting**: Operations between different shapes -3. **Matrix operations**: Matrix multiplication (later) -4. **Reduction**: Sum, mean, max, min (later) - -Let's start with the basics! +### Operations We'll Implement +- **Addition**: Element-wise addition of tensors +- **Multiplication**: Element-wise multiplication +- **Python operators**: `+`, `-`, `*`, `/` for natural syntax +- **Broadcasting**: Handle different shapes automatically """ -# %% -#| export -def add_tensors(a: Tensor, b: Tensor) -> Tensor: - """ - Add two tensors element-wise. - - TODO: Implement element-wise addition of two tensors. - - APPROACH: - 1. Extract the numpy arrays from both tensors - 2. Use NumPy's + operator for element-wise addition - 3. Return a new Tensor with the result - - EXAMPLE: - add_tensors(Tensor([1, 2, 3]), Tensor([4, 5, 6])) - โ†’ Tensor([5, 7, 9]) - - HINTS: - - Use a.data and b.data to get the numpy arrays - - NumPy handles broadcasting automatically - - Return Tensor(result) to wrap the result - """ - raise NotImplementedError("Student implementation required") - -# %% -#| hide -#| export -def add_tensors(a: Tensor, b: Tensor) -> Tensor: - """Add two tensors element-wise.""" - return Tensor(a.data + b.data) - -# %% -#| export -def multiply_tensors(a: Tensor, b: Tensor) -> Tensor: - """ - Multiply two tensors element-wise. - - TODO: Implement element-wise multiplication of two tensors. - - APPROACH: - 1. Extract the numpy arrays from both tensors - 2. Use NumPy's * operator for element-wise multiplication - 3. Return a new Tensor with the result - - EXAMPLE: - multiply_tensors(Tensor([1, 2, 3]), Tensor([4, 5, 6])) - โ†’ Tensor([4, 10, 18]) - - HINTS: - - Use a.data and b.data to get the numpy arrays - - NumPy handles broadcasting automatically - - Return Tensor(result) to wrap the result - """ - raise NotImplementedError("Student implementation required") - -# %% -#| hide -#| export -def multiply_tensors(a: Tensor, b: Tensor) -> Tensor: - """Multiply two tensors element-wise.""" - return Tensor(a.data * b.data) - # %% [markdown] """ -### ๐Ÿงช Test Your Arithmetic Operations +## Step 3: Tensor Arithmetic Methods + +The arithmetic methods are now part of the Tensor class above. Let's test them! """ -# %% -# Test arithmetic operations +# %% [markdown] +""" +## Step 4: Python Operator Overloading + +### Why Operator Overloading? +Python's magic methods allow us to use natural syntax: +- `a + b` instead of `a.add(b)` +- `a * b` instead of `a.multiply(b)` +- `a - b` for subtraction +- `a / b` for division + +This makes tensor operations feel natural and readable. +""" + +# %% [markdown] +""" +## Step 4: Operator Overloading + +The operator methods (__add__, __mul__, __sub__, __truediv__) are now part of the Tensor class above. This enables natural syntax like `a + b` and `a * b`. +""" + +# %% [markdown] +""" +### ๐Ÿงช Test Your Tensor Implementation + +Once you implement the Tensor class above, run these cells to test your implementation: +""" + +# %% nbgrader={"grade": true, "grade_id": "test-tensor-creation", "locked": true, "points": 25, "schema_version": 3, "solution": false, "task": false} +# Test tensor creation and properties +print("Testing tensor creation...") + +# Test scalar creation +scalar = Tensor(5.0) +assert scalar.shape == (), f"Scalar shape should be (), got {scalar.shape}" +assert scalar.size == 1, f"Scalar size should be 1, got {scalar.size}" +assert scalar.data.item() == 5.0, f"Scalar value should be 5.0, got {scalar.data.item()}" + +# Test vector creation +vector = Tensor([1, 2, 3]) +assert vector.shape == (3,), f"Vector shape should be (3,), got {vector.shape}" +assert vector.size == 3, f"Vector size should be 3, got {vector.size}" +assert np.array_equal(vector.data, np.array([1, 2, 3])), "Vector data mismatch" + +# Test matrix creation +matrix = Tensor([[1, 2], [3, 4]]) +assert matrix.shape == (2, 2), f"Matrix shape should be (2, 2), got {matrix.shape}" +assert matrix.size == 4, f"Matrix size should be 4, got {matrix.size}" +assert np.array_equal(matrix.data, np.array([[1, 2], [3, 4]])), "Matrix data mismatch" + +# Test dtype handling +float_tensor = Tensor([1.0, 2.0, 3.0]) +assert float_tensor.dtype == np.float32, f"Float tensor dtype should be float32, got {float_tensor.dtype}" + +int_tensor = Tensor([1, 2, 3]) +# Note: NumPy may default to int64 on some systems, so we check for integer types +assert int_tensor.dtype in [np.int32, np.int64], f"Int tensor dtype should be int32 or int64, got {int_tensor.dtype}" + +print("โœ… Tensor creation tests passed!") +print(f"โœ… Scalar: {scalar}") +print(f"โœ… Vector: {vector}") +print(f"โœ… Matrix: {matrix}") + +# %% nbgrader={"grade": true, "grade_id": "test-tensor-arithmetic", "locked": true, "points": 25, "schema_version": 3, "solution": false, "task": false} +# Test tensor arithmetic operations print("Testing tensor arithmetic...") -try: - # Test addition - a = Tensor([1, 2, 3]) - b = Tensor([4, 5, 6]) - c = add_tensors(a, b) - print(f"โœ… Addition: {a} + {b} = {c}") - - # Test multiplication - d = multiply_tensors(a, b) - print(f"โœ… Multiplication: {a} * {b} = {d}") - - # Test broadcasting (scalar + tensor) - scalar = Tensor(10) - e = add_tensors(scalar, a) - print(f"โœ… Broadcasting: {scalar} + {a} = {e}") - - print("\n๐ŸŽ‰ All arithmetic tests passed!") - -except Exception as e: - print(f"โŒ Error: {e}") - print("Make sure to implement add_tensors and multiply_tensors!") +# Test addition +a = Tensor([1, 2, 3]) +b = Tensor([4, 5, 6]) +c = a + b +expected = np.array([5, 7, 9]) +assert np.array_equal(c.data, expected), f"Addition failed: expected {expected}, got {c.data}" -# %% [markdown] -""" -## Step 3: Tensor Methods (Object-Oriented Approach) +# Test multiplication +d = a * b +expected = np.array([4, 10, 18]) +assert np.array_equal(d.data, expected), f"Multiplication failed: expected {expected}, got {d.data}" -Now let's add methods to the Tensor class itself. This makes the API more intuitive and similar to PyTorch. +# Test subtraction +e = b - a +expected = np.array([3, 3, 3]) +assert np.array_equal(e.data, expected), f"Subtraction failed: expected {expected}, got {e.data}" -### Why Methods Matter -- **Cleaner API**: `tensor.add(other)` instead of `add_tensors(tensor, other)` -- **Method chaining**: `tensor.add(other).multiply(scalar)` -- **Consistency**: Similar to PyTorch's tensor methods -- **Object-oriented**: Encapsulates operations with data -""" +# Test division +f = b / a +expected = np.array([4.0, 2.5, 2.0]) +assert np.allclose(f.data, expected), f"Division failed: expected {expected}, got {f.data}" -# %% -#| export -class Tensor: - """ - TinyTorch Tensor: N-dimensional array with ML operations. - - The fundamental data structure for all TinyTorch operations. - Wraps NumPy arrays with ML-specific functionality. - """ - - def __init__(self, data: Union[int, float, List, np.ndarray], dtype: Optional[str] = None): - """ - Create a new tensor from data. - - Args: - data: Input data (scalar, list, or numpy array) - dtype: Data type ('float32', 'int32', etc.). Defaults to auto-detect. - """ - # Convert input to numpy array - if isinstance(data, (int, float, np.number)): - # Handle Python and NumPy scalars - if dtype is None: - # Auto-detect type: int for integers, float32 for floats - if isinstance(data, int) or (isinstance(data, np.number) and np.issubdtype(type(data), np.integer)): - dtype = 'int32' - else: - dtype = 'float32' - self._data = np.array(data, dtype=dtype) - elif isinstance(data, list): - # Let NumPy auto-detect type, then convert if needed - temp_array = np.array(data) - if dtype is None: - # Keep NumPy's auto-detected type, but prefer common ML types - if np.issubdtype(temp_array.dtype, np.integer): - dtype = 'int32' - elif np.issubdtype(temp_array.dtype, np.floating): - dtype = 'float32' - else: - dtype = temp_array.dtype - self._data = temp_array.astype(dtype) - elif isinstance(data, np.ndarray): - self._data = data.astype(dtype or data.dtype) - else: - raise TypeError(f"Cannot create tensor from {type(data)}") - - @property - def data(self) -> np.ndarray: - """Access underlying numpy array.""" - return self._data - - @property - def shape(self) -> Tuple[int, ...]: - """Get tensor shape.""" - return self._data.shape - - @property - def size(self) -> int: - """Get total number of elements.""" - return self._data.size - - @property - def dtype(self) -> np.dtype: - """Get data type as numpy dtype.""" - return self._data.dtype - - def __repr__(self) -> str: - """String representation.""" - return f"Tensor({self._data.tolist()}, shape={self.shape}, dtype={self.dtype})" - - def add(self, other: 'Tensor') -> 'Tensor': - """ - Add another tensor to this tensor. - - TODO: Implement tensor addition as a method. - - APPROACH: - 1. Use the add_tensors function you already implemented - 2. Or implement the addition directly using self._data + other._data - 3. Return a new Tensor with the result - - EXAMPLE: - Tensor([1, 2, 3]).add(Tensor([4, 5, 6])) โ†’ Tensor([5, 7, 9]) - - HINTS: - - You can reuse add_tensors(self, other) - - Or implement directly: Tensor(self._data + other._data) - """ - raise NotImplementedError("Student implementation required") - - def multiply(self, other: 'Tensor') -> 'Tensor': - """ - Multiply this tensor by another tensor. - - TODO: Implement tensor multiplication as a method. - - APPROACH: - 1. Use the multiply_tensors function you already implemented - 2. Or implement the multiplication directly using self._data * other._data - 3. Return a new Tensor with the result - - EXAMPLE: - Tensor([1, 2, 3]).multiply(Tensor([4, 5, 6])) โ†’ Tensor([4, 10, 18]) - - HINTS: - - You can reuse multiply_tensors(self, other) - - Or implement directly: Tensor(self._data * other._data) - """ - raise NotImplementedError("Student implementation required") - - # Arithmetic operators for natural syntax (a + b, a * b, etc.) - def __add__(self, other: Union['Tensor', int, float]) -> 'Tensor': - """Addition: tensor + other""" - if isinstance(other, Tensor): - return Tensor(self._data + other._data) - else: # scalar - return Tensor(self._data + other) - - def __radd__(self, other: Union[int, float]) -> 'Tensor': - """Reverse addition: scalar + tensor""" - return Tensor(other + self._data) - - def __sub__(self, other: Union['Tensor', int, float]) -> 'Tensor': - """Subtraction: tensor - other""" - if isinstance(other, Tensor): - return Tensor(self._data - other._data) - else: # scalar - return Tensor(self._data - other) - - def __rsub__(self, other: Union[int, float]) -> 'Tensor': - """Reverse subtraction: scalar - tensor""" - return Tensor(other - self._data) - - def __mul__(self, other: Union['Tensor', int, float]) -> 'Tensor': - """Multiplication: tensor * other""" - if isinstance(other, Tensor): - return Tensor(self._data * other._data) - else: # scalar - return Tensor(self._data * other) - - def __rmul__(self, other: Union[int, float]) -> 'Tensor': - """Reverse multiplication: scalar * tensor""" - return Tensor(other * self._data) - - def __truediv__(self, other: Union['Tensor', int, float]) -> 'Tensor': - """Division: tensor / other""" - if isinstance(other, Tensor): - return Tensor(self._data / other._data) - else: # scalar - return Tensor(self._data / other) - - def __rtruediv__(self, other: Union[int, float]) -> 'Tensor': - """Reverse division: scalar / tensor""" - return Tensor(other / self._data) +# Test scalar operations +g = a + 10 +expected = np.array([11, 12, 13]) +assert np.array_equal(g.data, expected), f"Scalar addition failed: expected {expected}, got {g.data}" -# %% -#| hide -#| export -class Tensor: - """ - TinyTorch Tensor: N-dimensional array with ML operations. - - The fundamental data structure for all TinyTorch operations. - Wraps NumPy arrays with ML-specific functionality. - """ - - def __init__(self, data: Union[int, float, List, np.ndarray], dtype: Optional[str] = None): - """ - Create a new tensor from data. - - Args: - data: Input data (scalar, list, or numpy array) - dtype: Data type ('float32', 'int32', etc.). Defaults to auto-detect. - """ - # Convert input to numpy array - if isinstance(data, (int, float, np.number)): - # Handle Python and NumPy scalars - if dtype is None: - # Auto-detect type: int for integers, float32 for floats - if isinstance(data, int) or (isinstance(data, np.number) and np.issubdtype(type(data), np.integer)): - dtype = 'int32' - else: - dtype = 'float32' - self._data = np.array(data, dtype=dtype) - elif isinstance(data, list): - # Let NumPy auto-detect type, then convert if needed - temp_array = np.array(data) - if dtype is None: - # Keep NumPy's auto-detected type, but prefer common ML types - if np.issubdtype(temp_array.dtype, np.integer): - dtype = 'int32' - elif np.issubdtype(temp_array.dtype, np.floating): - dtype = 'float32' - else: - dtype = temp_array.dtype - self._data = temp_array.astype(dtype) - elif isinstance(data, np.ndarray): - self._data = data.astype(dtype or data.dtype) - else: - raise TypeError(f"Cannot create tensor from {type(data)}") - - @property - def data(self) -> np.ndarray: - """Access underlying numpy array.""" - return self._data - - @property - def shape(self) -> Tuple[int, ...]: - """Get tensor shape.""" - return self._data.shape - - @property - def size(self) -> int: - """Get total number of elements.""" - return self._data.size - - @property - def dtype(self) -> np.dtype: - """Get data type as numpy dtype.""" - return self._data.dtype - - def __repr__(self) -> str: - """String representation.""" - return f"Tensor({self._data.tolist()}, shape={self.shape}, dtype={self.dtype})" - - def add(self, other: 'Tensor') -> 'Tensor': - """Add another tensor to this tensor.""" - return Tensor(self._data + other._data) - - def multiply(self, other: 'Tensor') -> 'Tensor': - """Multiply this tensor by another tensor.""" - return Tensor(self._data * other._data) - - # Arithmetic operators for natural syntax (a + b, a * b, etc.) - def __add__(self, other: Union['Tensor', int, float]) -> 'Tensor': - """Addition: tensor + other""" - if isinstance(other, Tensor): - return Tensor(self._data + other._data) - else: # scalar - return Tensor(self._data + other) - - def __radd__(self, other: Union[int, float]) -> 'Tensor': - """Reverse addition: scalar + tensor""" - return Tensor(other + self._data) - - def __sub__(self, other: Union['Tensor', int, float]) -> 'Tensor': - """Subtraction: tensor - other""" - if isinstance(other, Tensor): - return Tensor(self._data - other._data) - else: # scalar - return Tensor(self._data - other) - - def __rsub__(self, other: Union[int, float]) -> 'Tensor': - """Reverse subtraction: scalar - tensor""" - return Tensor(other - self._data) - - def __mul__(self, other: Union['Tensor', int, float]) -> 'Tensor': - """Multiplication: tensor * other""" - if isinstance(other, Tensor): - return Tensor(self._data * other._data) - else: # scalar - return Tensor(self._data * other) - - def __rmul__(self, other: Union[int, float]) -> 'Tensor': - """Reverse multiplication: scalar * tensor""" - return Tensor(other * self._data) - - def __truediv__(self, other: Union['Tensor', int, float]) -> 'Tensor': - """Division: tensor / other""" - if isinstance(other, Tensor): - return Tensor(self._data / other._data) - else: # scalar - return Tensor(self._data / other) - - def __rtruediv__(self, other: Union[int, float]) -> 'Tensor': - """Reverse division: scalar / tensor""" - return Tensor(other / self._data) +h = a * 2 +expected = np.array([2, 4, 6]) +assert np.array_equal(h.data, expected), f"Scalar multiplication failed: expected {expected}, got {h.data}" -# %% [markdown] -""" -### ๐Ÿงช Test Your Tensor Methods -""" +print("โœ… Tensor arithmetic tests passed!") +print(f"โœ… Addition: {a} + {b} = {c}") +print(f"โœ… Multiplication: {a} * {b} = {d}") +print(f"โœ… Subtraction: {b} - {a} = {e}") +print(f"โœ… Division: {b} / {a} = {f}") -# %% -# Test tensor methods -print("Testing tensor methods...") +# %% nbgrader={"grade": true, "grade_id": "test-tensor-broadcasting", "locked": true, "points": 25, "schema_version": 3, "solution": false, "task": false} +# Test tensor broadcasting +print("Testing tensor broadcasting...") -try: - # Test method-based operations - a = Tensor([1, 2, 3]) - b = Tensor([4, 5, 6]) - - c = a.add(b) - print(f"โœ… Method addition: {a}.add({b}) = {c}") - - d = a.multiply(b) - print(f"โœ… Method multiplication: {a}.multiply({b}) = {d}") - - # Test method chaining - e = a.add(b).multiply(Tensor(2)) - print(f"โœ… Method chaining: {a}.add({b}).multiply(2) = {e}") - - print("\n๐ŸŽ‰ All method tests passed!") - -except Exception as e: - print(f"โŒ Error: {e}") - print("Make sure to implement the add and multiply methods!") +# Test scalar broadcasting +matrix = Tensor([[1, 2], [3, 4]]) +scalar = Tensor(10) +result = matrix + scalar +expected = np.array([[11, 12], [13, 14]]) +assert np.array_equal(result.data, expected), f"Scalar broadcasting failed: expected {expected}, got {result.data}" + +# Test vector broadcasting +vector = Tensor([1, 2]) +result = matrix + vector +expected = np.array([[2, 4], [4, 6]]) +assert np.array_equal(result.data, expected), f"Vector broadcasting failed: expected {expected}, got {result.data}" + +# Test different shapes +a = Tensor([[1], [2], [3]]) # (3, 1) +b = Tensor([10, 20]) # (2,) +result = a + b +expected = np.array([[11, 21], [12, 22], [13, 23]]) +assert np.array_equal(result.data, expected), f"Shape broadcasting failed: expected {expected}, got {result.data}" + +print("โœ… Tensor broadcasting tests passed!") +print(f"โœ… Matrix + Scalar: {matrix} + {scalar} = {result}") +print(f"โœ… Broadcasting works correctly!") # %% [markdown] """ ## ๐ŸŽฏ Module Summary -Congratulations! You've built the foundation of TinyTorch: +Congratulations! You've successfully implemented the core Tensor class for TinyTorch: ### What You've Accomplished -โœ… **Tensor Creation**: Handle scalars, lists, and numpy arrays -โœ… **Properties**: Access shape, size, and data type -โœ… **Arithmetic**: Element-wise addition and multiplication -โœ… **Methods**: Object-oriented API for operations -โœ… **Testing**: Immediate feedback on your implementation +โœ… **Tensor Creation**: Handle scalars, vectors, matrices, and higher-dimensional arrays +โœ… **Data Types**: Proper dtype handling with auto-detection and conversion +โœ… **Properties**: Shape, size, dtype, and data access +โœ… **Arithmetic**: Addition, multiplication, subtraction, division +โœ… **Operators**: Natural Python syntax with `+`, `-`, `*`, `/` +โœ… **Broadcasting**: Automatic shape compatibility like NumPy ### Key Concepts You've Learned -- **Tensors** are N-dimensional arrays with ML operations -- **NumPy integration** provides efficient computation -- **Element-wise operations** work on corresponding elements -- **Broadcasting** automatically handles different shapes -- **Object-oriented design** makes APIs intuitive +- **Tensors** are the fundamental data structure for ML systems +- **NumPy backend** provides efficient computation with ML-friendly API +- **Operator overloading** makes tensor operations feel natural +- **Broadcasting** enables flexible operations between different shapes +- **Type safety** ensures consistent behavior across operations -### What's Next -In the next modules, you'll build on this foundation: -- **Layers**: Transform tensors with weights and biases -- **Activations**: Add nonlinearity to your networks -- **Networks**: Compose layers into complete models -- **Training**: Learn parameters with gradients and optimization +### Next Steps +1. **Export your code**: `tito package nbdev --export 01_tensor` +2. **Test your implementation**: `tito module test 01_tensor` +3. **Use your tensors**: + ```python + from tinytorch.core.tensor import Tensor + t = Tensor([1, 2, 3]) + print(t + 5) # Your tensor in action! + ``` +4. **Move to Module 2**: Start building activation functions! -### Real-World Connection -Your Tensor class is now ready to: -- Store neural network weights and biases -- Process batches of data efficiently -- Handle different data types (images, text, audio) -- Integrate with the rest of the TinyTorch ecosystem - -**Ready for the next challenge?** Let's move on to building layers that can transform your tensors! -""" - -# %% -# Final verification -print("\n" + "="*50) -print("๐ŸŽ‰ TENSOR MODULE COMPLETE!") -print("="*50) -print("โœ… Tensor creation and properties") -print("โœ… Arithmetic operations") -print("โœ… Method-based API") -print("โœ… Comprehensive testing") -print("\n๐Ÿš€ Ready to build layers in the next module!") \ No newline at end of file +**Ready for the next challenge?** Let's add the mathematical functions that make neural networks powerful! +""" \ No newline at end of file diff --git a/modules/source/01_tensor/tensor_dev_enhanced.py b/modules/source/01_tensor/tensor_dev_enhanced.py deleted file mode 100644 index f21e9dde..00000000 --- a/modules/source/01_tensor/tensor_dev_enhanced.py +++ /dev/null @@ -1,408 +0,0 @@ -# --- -# jupyter: -# jupytext: -# text_representation: -# extension: .py -# format_name: percent -# format_version: '1.3' -# jupytext_version: 1.17.1 -# --- - -# %% [markdown] -""" -# Module 1: Tensor - Enhanced with nbgrader Support - -This is an enhanced version of the tensor module that demonstrates dual-purpose content creation: -- **Self-learning**: Rich educational content with guided implementation -- **Auto-grading**: nbgrader-compatible assignments with hidden tests - -## Dual System Benefits - -1. **Single Source**: One file generates both learning and assignment materials -2. **Consistent Quality**: Same instructor solutions in both contexts -3. **Flexible Assessment**: Choose between self-paced learning or formal grading -4. **Scalable**: Handle large courses with automated feedback - -## How It Works - -- **TinyTorch markers**: `#| exercise_start/end` for educational content -- **nbgrader markers**: `### BEGIN/END SOLUTION` for auto-grading -- **Hidden tests**: `### BEGIN/END HIDDEN TESTS` for automatic verification -- **Dual generation**: One command creates both student notebooks and assignments -""" - -# %% -#| default_exp core.tensor - -# %% -#| export -import numpy as np -from typing import Union, List, Tuple, Optional - -# %% [markdown] -""" -## Enhanced Tensor Class - -This implementation shows how to create dual-purpose educational content: - -### For Self-Learning Students -- Rich explanations and step-by-step guidance -- Detailed hints and examples -- Progressive difficulty with scaffolding - -### For Formal Assessment -- Auto-graded with hidden tests -- Immediate feedback on correctness -- Partial credit for complex methods -""" - -# %% -#| export -class Tensor: - """ - TinyTorch Tensor: N-dimensional array with ML operations. - - This enhanced version demonstrates dual-purpose educational content - suitable for both self-learning and formal assessment. - """ - - def __init__(self, data: Union[int, float, List, np.ndarray], dtype: Optional[str] = None): - """ - Create a new tensor from data. - - Args: - data: Input data (scalar, list, or numpy array) - dtype: Data type ('float32', 'int32', etc.). Defaults to auto-detect. - """ - #| exercise_start - #| hint: Use np.array() to convert input data to numpy array - #| solution_test: tensor.shape should match input shape - #| difficulty: easy - - ### BEGIN SOLUTION - # Convert input to numpy array - if isinstance(data, (int, float)): - self._data = np.array(data) - elif isinstance(data, list): - self._data = np.array(data) - elif isinstance(data, np.ndarray): - self._data = data.copy() - else: - self._data = np.array(data) - - # Apply dtype conversion if specified - if dtype is not None: - self._data = self._data.astype(dtype) - ### END SOLUTION - - #| exercise_end - - @property - def data(self) -> np.ndarray: - """Access underlying numpy array.""" - #| exercise_start - #| hint: Return the stored numpy array (_data attribute) - #| solution_test: tensor.data should return numpy array - #| difficulty: easy - - ### BEGIN SOLUTION - return self._data - ### END SOLUTION - - #| exercise_end - - @property - def shape(self) -> Tuple[int, ...]: - """Get tensor shape.""" - #| exercise_start - #| hint: Use the .shape attribute of the numpy array - #| solution_test: tensor.shape should return tuple of dimensions - #| difficulty: easy - - ### BEGIN SOLUTION - return self._data.shape - ### END SOLUTION - - #| exercise_end - - @property - def size(self) -> int: - """Get total number of elements.""" - #| exercise_start - #| hint: Use the .size attribute of the numpy array - #| solution_test: tensor.size should return total element count - #| difficulty: easy - - ### BEGIN SOLUTION - return self._data.size - ### END SOLUTION - - #| exercise_end - - @property - def dtype(self) -> np.dtype: - """Get data type as numpy dtype.""" - #| exercise_start - #| hint: Use the .dtype attribute of the numpy array - #| solution_test: tensor.dtype should return numpy dtype - #| difficulty: easy - - ### BEGIN SOLUTION - return self._data.dtype - ### END SOLUTION - - #| exercise_end - - def __repr__(self) -> str: - """String representation of the tensor.""" - #| exercise_start - #| hint: Format as "Tensor([data], shape=shape, dtype=dtype)" - #| solution_test: repr should include data, shape, and dtype - #| difficulty: medium - - ### BEGIN SOLUTION - data_str = self._data.tolist() - return f"Tensor({data_str}, shape={self.shape}, dtype={self.dtype})" - ### END SOLUTION - - #| exercise_end - - def add(self, other: 'Tensor') -> 'Tensor': - """ - Add two tensors element-wise. - - Args: - other: Another tensor to add - - Returns: - New tensor with element-wise sum - """ - #| exercise_start - #| hint: Use numpy's + operator for element-wise addition - #| solution_test: result should be new Tensor with correct values - #| difficulty: medium - - ### BEGIN SOLUTION - result_data = self._data + other._data - return Tensor(result_data) - ### END SOLUTION - - #| exercise_end - - def multiply(self, other: 'Tensor') -> 'Tensor': - """ - Multiply two tensors element-wise. - - Args: - other: Another tensor to multiply - - Returns: - New tensor with element-wise product - """ - #| exercise_start - #| hint: Use numpy's * operator for element-wise multiplication - #| solution_test: result should be new Tensor with correct values - #| difficulty: medium - - ### BEGIN SOLUTION - result_data = self._data * other._data - return Tensor(result_data) - ### END SOLUTION - - #| exercise_end - - def matmul(self, other: 'Tensor') -> 'Tensor': - """ - Matrix multiplication of two tensors. - - Args: - other: Another tensor for matrix multiplication - - Returns: - New tensor with matrix product - - Raises: - ValueError: If shapes are incompatible for matrix multiplication - """ - #| exercise_start - #| hint: Use np.dot() for matrix multiplication, check shapes first - #| solution_test: result should handle shape validation and matrix multiplication - #| difficulty: hard - - ### BEGIN SOLUTION - # Check shape compatibility - if len(self.shape) != 2 or len(other.shape) != 2: - raise ValueError("Matrix multiplication requires 2D tensors") - - if self.shape[1] != other.shape[0]: - raise ValueError(f"Cannot multiply shapes {self.shape} and {other.shape}") - - result_data = np.dot(self._data, other._data) - return Tensor(result_data) - ### END SOLUTION - - #| exercise_end - -# %% [markdown] -""" -## Hidden Tests for Auto-Grading - -These tests are hidden from students but used for automatic grading. -They provide comprehensive coverage and immediate feedback. -""" - -# %% -### BEGIN HIDDEN TESTS -def test_tensor_creation_basic(): - """Test basic tensor creation (2 points)""" - t = Tensor([1, 2, 3]) - assert t.shape == (3,) - assert t.data.tolist() == [1, 2, 3] - assert t.size == 3 - -def test_tensor_creation_scalar(): - """Test scalar tensor creation (2 points)""" - t = Tensor(5) - assert t.shape == () - assert t.data.item() == 5 - assert t.size == 1 - -def test_tensor_creation_2d(): - """Test 2D tensor creation (2 points)""" - t = Tensor([[1, 2], [3, 4]]) - assert t.shape == (2, 2) - assert t.data.tolist() == [[1, 2], [3, 4]] - assert t.size == 4 - -def test_tensor_dtype(): - """Test dtype handling (2 points)""" - t = Tensor([1, 2, 3], dtype='float32') - assert t.dtype == np.float32 - assert t.data.dtype == np.float32 - -def test_tensor_properties(): - """Test tensor properties (2 points)""" - t = Tensor([[1, 2, 3], [4, 5, 6]]) - assert t.shape == (2, 3) - assert t.size == 6 - assert isinstance(t.data, np.ndarray) - -def test_tensor_repr(): - """Test string representation (2 points)""" - t = Tensor([1, 2, 3]) - repr_str = repr(t) - assert "Tensor" in repr_str - assert "shape" in repr_str - assert "dtype" in repr_str - -def test_tensor_add(): - """Test tensor addition (3 points)""" - t1 = Tensor([1, 2, 3]) - t2 = Tensor([4, 5, 6]) - result = t1.add(t2) - assert result.data.tolist() == [5, 7, 9] - assert result.shape == (3,) - -def test_tensor_multiply(): - """Test tensor multiplication (3 points)""" - t1 = Tensor([1, 2, 3]) - t2 = Tensor([4, 5, 6]) - result = t1.multiply(t2) - assert result.data.tolist() == [4, 10, 18] - assert result.shape == (3,) - -def test_tensor_matmul(): - """Test matrix multiplication (4 points)""" - t1 = Tensor([[1, 2], [3, 4]]) - t2 = Tensor([[5, 6], [7, 8]]) - result = t1.matmul(t2) - expected = [[19, 22], [43, 50]] - assert result.data.tolist() == expected - assert result.shape == (2, 2) - -def test_tensor_matmul_error(): - """Test matrix multiplication error handling (2 points)""" - t1 = Tensor([[1, 2, 3]]) # Shape (1, 3) - t2 = Tensor([[4, 5]]) # Shape (1, 2) - - try: - t1.matmul(t2) - assert False, "Should have raised ValueError" - except ValueError as e: - assert "Cannot multiply shapes" in str(e) - -def test_tensor_immutability(): - """Test that operations create new tensors (2 points)""" - t1 = Tensor([1, 2, 3]) - t2 = Tensor([4, 5, 6]) - original_data = t1.data.copy() - - result = t1.add(t2) - - # Original tensor should be unchanged - assert np.array_equal(t1.data, original_data) - # Result should be different object - assert result is not t1 - assert result.data is not t1.data - -### END HIDDEN TESTS - -# %% [markdown] -""" -## Usage Examples - -### Self-Learning Mode -Students work through the educational content step by step: - -```python -# Create tensors -t1 = Tensor([1, 2, 3]) -t2 = Tensor([4, 5, 6]) - -# Basic operations -result = t1.add(t2) -print(f"Addition: {result}") - -# Matrix operations -matrix1 = Tensor([[1, 2], [3, 4]]) -matrix2 = Tensor([[5, 6], [7, 8]]) -product = matrix1.matmul(matrix2) -print(f"Matrix multiplication: {product}") -``` - -### Assignment Mode -Students submit implementations that are automatically graded: - -1. **Immediate feedback**: Know if implementation is correct -2. **Partial credit**: Earn points for each working method -3. **Hidden tests**: Comprehensive coverage beyond visible examples -4. **Error handling**: Points for proper edge case handling - -### Benefits of Dual System - -1. **Single source**: One implementation serves both purposes -2. **Consistent quality**: Same instructor solutions everywhere -3. **Flexible assessment**: Choose the right tool for each situation -4. **Scalable**: Handle large courses with automated feedback - -This approach transforms TinyTorch from a learning framework into a complete course management solution. -""" - -# %% -# Test the implementation -if __name__ == "__main__": - # Basic testing - t1 = Tensor([1, 2, 3]) - t2 = Tensor([4, 5, 6]) - - print(f"t1: {t1}") - print(f"t2: {t2}") - print(f"t1 + t2: {t1.add(t2)}") - print(f"t1 * t2: {t1.multiply(t2)}") - - # Matrix multiplication - m1 = Tensor([[1, 2], [3, 4]]) - m2 = Tensor([[5, 6], [7, 8]]) - print(f"Matrix multiplication: {m1.matmul(m2)}") - - print("โœ… Enhanced tensor module working!") \ No newline at end of file diff --git a/modules/source/02_activations/activations_dev.ipynb b/modules/source/02_activations/activations_dev.ipynb new file mode 100644 index 00000000..27839437 --- /dev/null +++ b/modules/source/02_activations/activations_dev.ipynb @@ -0,0 +1,894 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "720f94f1", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "# Module 2: Activations - Nonlinearity in Neural Networks\n", + "\n", + "Welcome to the Activations module! This is where neural networks get their power through nonlinearity.\n", + "\n", + "## Learning Goals\n", + "- Understand why activation functions are essential for neural networks\n", + "- Implement the four most important activation functions: ReLU, Sigmoid, Tanh, and Softmax\n", + "- Visualize how activations transform data and enable complex learning\n", + "- See how activations work with layers to build powerful networks\n", + "- Master the NBGrader workflow with comprehensive testing\n", + "\n", + "## Build โ†’ Use โ†’ Understand\n", + "1. **Build**: Activation functions that add nonlinearity\n", + "2. **Use**: Transform tensors and see immediate results\n", + "3. **Understand**: How nonlinearity enables complex pattern learning" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3c0ecb71", + "metadata": { + "lines_to_next_cell": 1, + "nbgrader": { + "grade": false, + "grade_id": "activations-imports", + "locked": false, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "#| default_exp core.activations\n", + "\n", + "#| export\n", + "import math\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import os\n", + "import sys\n", + "from typing import Union, List\n", + "\n", + "# Import our Tensor class - try from package first, then from local module\n", + "try:\n", + " from tinytorch.core.tensor import Tensor\n", + "except ImportError:\n", + " # For development, import from local tensor module\n", + " sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor'))\n", + " from tensor_dev import Tensor" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dd3c4277", + "metadata": { + "lines_to_next_cell": 1, + "nbgrader": { + "grade": false, + "grade_id": "activations-setup", + "locked": false, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "#| hide\n", + "#| export\n", + "def _should_show_plots():\n", + " \"\"\"Check if we should show plots (disable during testing)\"\"\"\n", + " # Check multiple conditions that indicate we're in test mode\n", + " is_pytest = (\n", + " 'pytest' in sys.modules or\n", + " 'test' in sys.argv or\n", + " os.environ.get('PYTEST_CURRENT_TEST') is not None or\n", + " any('test' in arg for arg in sys.argv) or\n", + " any('pytest' in arg for arg in sys.argv)\n", + " )\n", + " \n", + " # Show plots in development mode (when not in test mode)\n", + " return not is_pytest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d08aa85", + "metadata": { + "lines_to_next_cell": 1, + "nbgrader": { + "grade": false, + "grade_id": "activations-visualization", + "locked": false, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "#| hide\n", + "#| export\n", + "def visualize_activation_function(activation_fn, name: str, x_range: tuple = (-5, 5), num_points: int = 100):\n", + " \"\"\"Visualize an activation function's behavior\"\"\"\n", + " if not _should_show_plots():\n", + " return\n", + " \n", + " try:\n", + " \n", + " # Generate input values\n", + " x_vals = np.linspace(x_range[0], x_range[1], num_points)\n", + " \n", + " # Apply activation function\n", + " y_vals = []\n", + " for x in x_vals:\n", + " input_tensor = Tensor([[x]])\n", + " output = activation_fn(input_tensor)\n", + " y_vals.append(output.data.item())\n", + " \n", + " # Create plot\n", + " plt.figure(figsize=(10, 6))\n", + " plt.plot(x_vals, y_vals, 'b-', linewidth=2, label=f'{name} Activation')\n", + " plt.grid(True, alpha=0.3)\n", + " plt.xlabel('Input (x)')\n", + " plt.ylabel(f'{name}(x)')\n", + " plt.title(f'{name} Activation Function')\n", + " plt.legend()\n", + " plt.show()\n", + " \n", + " except ImportError:\n", + " print(\" ๐Ÿ“Š Matplotlib not available - skipping visualization\")\n", + " except Exception as e:\n", + " print(f\" โš ๏ธ Visualization error: {e}\")\n", + "\n", + "def visualize_activation_on_data(activation_fn, name: str, data: Tensor):\n", + " \"\"\"Show activation function applied to sample data\"\"\"\n", + " if not _should_show_plots():\n", + " return\n", + " \n", + " try:\n", + " output = activation_fn(data)\n", + " print(f\" ๐Ÿ“Š {name} Example:\")\n", + " print(f\" Input: {data.data.flatten()}\")\n", + " print(f\" Output: {output.data.flatten()}\")\n", + " print(f\" Range: [{output.data.min():.3f}, {output.data.max():.3f}]\")\n", + " \n", + " except Exception as e:\n", + " print(f\" โš ๏ธ Data visualization error: {e}\")" + ] + }, + { + "cell_type": "markdown", + "id": "a29b0c94", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "## Step 1: What is an Activation Function?\n", + "\n", + "### Definition\n", + "An **activation function** is a mathematical function that adds nonlinearity to neural networks. It transforms the output of a layer before passing it to the next layer.\n", + "\n", + "### Why Activation Functions Matter\n", + "**Without activation functions, neural networks are just linear transformations!**\n", + "\n", + "```\n", + "Linear โ†’ Linear โ†’ Linear = Still Linear\n", + "```\n", + "\n", + "No matter how many layers you stack, without activation functions, you can only learn linear relationships. Activation functions introduce the nonlinearity that allows neural networks to:\n", + "- Learn complex patterns\n", + "- Approximate any continuous function\n", + "- Solve non-linear problems\n", + "\n", + "### Visual Analogy\n", + "Think of activation functions as **decision makers** at each neuron:\n", + "- **ReLU**: \"If positive, pass it through; if negative, block it\"\n", + "- **Sigmoid**: \"Squash everything between 0 and 1\"\n", + "- **Tanh**: \"Squash everything between -1 and 1\"\n", + "- **Softmax**: \"Convert to probabilities that sum to 1\"\n", + "\n", + "### Connection to Previous Modules\n", + "In Module 1 (Tensor), we learned how to store and manipulate data. Now we add the nonlinear functions that make neural networks powerful." + ] + }, + { + "cell_type": "markdown", + "id": "2b3cce52", + "metadata": { + "cell_marker": "\"\"\"", + "lines_to_next_cell": 1 + }, + "source": [ + "## Step 2: ReLU - The Workhorse of Deep Learning\n", + "\n", + "### What is ReLU?\n", + "**ReLU (Rectified Linear Unit)** is the most popular activation function in deep learning.\n", + "\n", + "**Mathematical Definition:**\n", + "```\n", + "f(x) = max(0, x)\n", + "```\n", + "\n", + "**In Plain English:**\n", + "- If input is positive โ†’ pass it through unchanged\n", + "- If input is negative โ†’ output zero\n", + "\n", + "### Why ReLU is Popular\n", + "1. **Simple**: Easy to compute and understand\n", + "2. **Fast**: No expensive operations (no exponentials)\n", + "3. **Sparse**: Outputs many zeros, creating sparse representations\n", + "4. **Gradient-friendly**: Gradient is either 0 or 1 (no vanishing gradient for positive inputs)\n", + "\n", + "### Real-World Analogy\n", + "ReLU is like a **one-way valve** - it only lets positive \"pressure\" through, blocking negative values completely.\n", + "\n", + "### When to Use ReLU\n", + "- **Hidden layers** in most neural networks\n", + "- **Convolutional layers** in image processing\n", + "- **When you want sparse activations**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4300f9b3", + "metadata": { + "lines_to_next_cell": 1, + "nbgrader": { + "grade": false, + "grade_id": "relu-class", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#| export\n", + "class ReLU:\n", + " \"\"\"\n", + " ReLU Activation Function: f(x) = max(0, x)\n", + " \n", + " The most popular activation function in deep learning.\n", + " Simple, fast, and effective for most applications.\n", + " \"\"\"\n", + " \n", + " def forward(self, x: Tensor) -> Tensor:\n", + " \"\"\"\n", + " Apply ReLU activation: f(x) = max(0, x)\n", + " \n", + " TODO: Implement ReLU activation\n", + " \n", + " APPROACH:\n", + " 1. For each element in the input tensor, apply max(0, element)\n", + " 2. Return a new Tensor with the results\n", + " \n", + " EXAMPLE:\n", + " Input: Tensor([[-1, 0, 1, 2, -3]])\n", + " Expected: Tensor([[0, 0, 1, 2, 0]])\n", + " \n", + " HINTS:\n", + " - Use np.maximum(0, x.data) for element-wise max\n", + " - Remember to return a new Tensor object\n", + " - The shape should remain the same as input\n", + " \"\"\"\n", + " ### BEGIN SOLUTION\n", + " result = np.maximum(0, x.data)\n", + " return Tensor(result)\n", + " ### END SOLUTION\n", + " \n", + " def __call__(self, x: Tensor) -> Tensor:\n", + " \"\"\"Make the class callable: relu(x) instead of relu.forward(x)\"\"\"\n", + " return self.forward(x)" + ] + }, + { + "cell_type": "markdown", + "id": "533c471b", + "metadata": { + "cell_marker": "\"\"\"", + "lines_to_next_cell": 1 + }, + "source": [ + "## Step 3: Sigmoid - The Smooth Squasher\n", + "\n", + "### What is Sigmoid?\n", + "**Sigmoid** is a smooth S-shaped function that squashes inputs to the range (0, 1).\n", + "\n", + "**Mathematical Definition:**\n", + "```\n", + "f(x) = 1 / (1 + e^(-x))\n", + "```\n", + "\n", + "**Properties:**\n", + "- **Range**: (0, 1) - never exactly 0 or 1\n", + "- **Smooth**: Differentiable everywhere\n", + "- **Monotonic**: Always increasing\n", + "- **Centered**: Around 0.5\n", + "\n", + "### Why Sigmoid is Useful\n", + "1. **Probabilistic**: Output can be interpreted as probabilities\n", + "2. **Bounded**: Output is always between 0 and 1\n", + "3. **Smooth**: Good for gradient-based optimization\n", + "4. **Historical**: Was the standard before ReLU\n", + "\n", + "### Real-World Analogy\n", + "Sigmoid is like a **soft switch** - it gradually turns on as input increases, unlike ReLU's hard cutoff.\n", + "\n", + "### When to Use Sigmoid\n", + "- **Binary classification** (output layer)\n", + "- **Gates** in LSTM/GRU networks\n", + "- **When you need probabilistic outputs**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cbe9f91c", + "metadata": { + "lines_to_next_cell": 1, + "nbgrader": { + "grade": false, + "grade_id": "sigmoid-class", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#| export\n", + "class Sigmoid:\n", + " \"\"\"\n", + " Sigmoid Activation Function: f(x) = 1 / (1 + e^(-x))\n", + " \n", + " Smooth S-shaped function that squashes inputs to (0, 1).\n", + " Useful for binary classification and probabilistic outputs.\n", + " \"\"\"\n", + " \n", + " def forward(self, x: Tensor) -> Tensor:\n", + " \"\"\"\n", + " Apply Sigmoid activation: f(x) = 1 / (1 + e^(-x))\n", + " \n", + " TODO: Implement Sigmoid activation with numerical stability\n", + " \n", + " APPROACH:\n", + " 1. Clip input values to prevent overflow (e.g., between -500 and 500)\n", + " 2. Apply the sigmoid formula: 1 / (1 + exp(-x))\n", + " 3. Return a new Tensor with the results\n", + " \n", + " EXAMPLE:\n", + " Input: Tensor([[-2, 0, 2]])\n", + " Expected: Tensor([[0.119, 0.5, 0.881]]) (approximately)\n", + " \n", + " HINTS:\n", + " - Use np.clip(x.data, -500, 500) for numerical stability\n", + " - Use np.exp() for the exponential function\n", + " - Be careful with very large/small inputs to avoid overflow\n", + " \"\"\"\n", + " ### BEGIN SOLUTION\n", + " # Clip for numerical stability\n", + " clipped = np.clip(x.data, -500, 500)\n", + " result = 1 / (1 + np.exp(-clipped))\n", + " return Tensor(result)\n", + " ### END SOLUTION\n", + " \n", + " def __call__(self, x: Tensor) -> Tensor:\n", + " \"\"\"Make the class callable: sigmoid(x) instead of sigmoid.forward(x)\"\"\"\n", + " return self.forward(x)" + ] + }, + { + "cell_type": "markdown", + "id": "67dc777f", + "metadata": { + "cell_marker": "\"\"\"", + "lines_to_next_cell": 1 + }, + "source": [ + "## Step 4: Tanh - The Zero-Centered Squasher\n", + "\n", + "### What is Tanh?\n", + "**Tanh (Hyperbolic Tangent)** is similar to Sigmoid but centered around zero.\n", + "\n", + "**Mathematical Definition:**\n", + "```\n", + "f(x) = tanh(x) = (e^x - e^(-x)) / (e^x + e^(-x))\n", + "```\n", + "\n", + "**Properties:**\n", + "- **Range**: (-1, 1) - symmetric around zero\n", + "- **Zero-centered**: Output averages to zero\n", + "- **Smooth**: Differentiable everywhere\n", + "- **Stronger gradients**: Than sigmoid in some regions\n", + "\n", + "### Why Tanh is Useful\n", + "1. **Zero-centered**: Better for training (gradients don't all have same sign)\n", + "2. **Symmetric**: Treats positive and negative inputs equally\n", + "3. **Stronger gradients**: Can help with training dynamics\n", + "4. **Bounded**: Output is always between -1 and 1\n", + "\n", + "### Real-World Analogy\n", + "Tanh is like a **balanced scale** - it can tip positive or negative, with zero as the neutral point.\n", + "\n", + "### When to Use Tanh\n", + "- **Hidden layers** (alternative to ReLU)\n", + "- **RNNs** (traditional choice)\n", + "- **When you need zero-centered outputs**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e982bfbd", + "metadata": { + "lines_to_next_cell": 1, + "nbgrader": { + "grade": false, + "grade_id": "tanh-class", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#| export\n", + "class Tanh:\n", + " \"\"\"\n", + " Tanh Activation Function: f(x) = tanh(x)\n", + " \n", + " Zero-centered S-shaped function that squashes inputs to (-1, 1).\n", + " Better than sigmoid for hidden layers due to zero-centered outputs.\n", + " \"\"\"\n", + " \n", + " def forward(self, x: Tensor) -> Tensor:\n", + " \"\"\"\n", + " Apply Tanh activation: f(x) = tanh(x)\n", + " \n", + " TODO: Implement Tanh activation\n", + " \n", + " APPROACH:\n", + " 1. Use NumPy's tanh function for numerical stability\n", + " 2. Apply to the tensor data\n", + " 3. Return a new Tensor with the results\n", + " \n", + " EXAMPLE:\n", + " Input: Tensor([[-2, 0, 2]])\n", + " Expected: Tensor([[-0.964, 0.0, 0.964]]) (approximately)\n", + " \n", + " HINTS:\n", + " - Use np.tanh(x.data) - NumPy handles the math\n", + " - Much simpler than implementing the formula manually\n", + " - NumPy's tanh is numerically stable\n", + " \"\"\"\n", + " ### BEGIN SOLUTION\n", + " result = np.tanh(x.data)\n", + " return Tensor(result)\n", + " ### END SOLUTION\n", + " \n", + " def __call__(self, x: Tensor) -> Tensor:\n", + " \"\"\"Make the class callable: tanh(x) instead of tanh.forward(x)\"\"\"\n", + " return self.forward(x)" + ] + }, + { + "cell_type": "markdown", + "id": "726ae88b", + "metadata": { + "cell_marker": "\"\"\"", + "lines_to_next_cell": 1 + }, + "source": [ + "## Step 5: Softmax - The Probability Converter\n", + "\n", + "### What is Softmax?\n", + "**Softmax** converts a vector of numbers into a probability distribution.\n", + "\n", + "**Mathematical Definition:**\n", + "```\n", + "f(x_i) = e^(x_i) / ฮฃ(e^(x_j)) for all j\n", + "```\n", + "\n", + "**Properties:**\n", + "- **Probabilities**: All outputs sum to 1\n", + "- **Non-negative**: All outputs are โ‰ฅ 0\n", + "- **Differentiable**: Smooth everywhere\n", + "- **Competitive**: Amplifies differences between inputs\n", + "\n", + "### Why Softmax is Essential\n", + "1. **Multi-class classification**: Converts logits to probabilities\n", + "2. **Attention mechanisms**: Focuses on important elements\n", + "3. **Interpretable**: Output can be understood as confidence\n", + "4. **Competitive**: Emphasizes the largest input\n", + "\n", + "### Real-World Analogy\n", + "Softmax is like **dividing a pie** - it takes any set of numbers and converts them into slices that sum to 100%.\n", + "\n", + "### When to Use Softmax\n", + "- **Multi-class classification** (output layer)\n", + "- **Attention mechanisms** in transformers\n", + "- **When you need probability distributions**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a99d93cc", + "metadata": { + "lines_to_next_cell": 1, + "nbgrader": { + "grade": false, + "grade_id": "softmax-class", + "locked": false, + "schema_version": 3, + "solution": true, + "task": false + } + }, + "outputs": [], + "source": [ + "#| export\n", + "class Softmax:\n", + " \"\"\"\n", + " Softmax Activation Function: f(x_i) = e^(x_i) / ฮฃ(e^(x_j))\n", + " \n", + " Converts a vector of numbers into a probability distribution.\n", + " Essential for multi-class classification and attention mechanisms.\n", + " \"\"\"\n", + " \n", + " def forward(self, x: Tensor) -> Tensor:\n", + " \"\"\"\n", + " Apply Softmax activation: f(x_i) = e^(x_i) / ฮฃ(e^(x_j))\n", + " \n", + " TODO: Implement Softmax activation with numerical stability\n", + " \n", + " APPROACH:\n", + " 1. Subtract max value from inputs for numerical stability\n", + " 2. Compute exponentials: e^(x_i - max)\n", + " 3. Divide by sum of exponentials\n", + " 4. Return a new Tensor with the results\n", + " \n", + " EXAMPLE:\n", + " Input: Tensor([[1, 2, 3]])\n", + " Expected: Tensor([[0.09, 0.24, 0.67]]) (approximately, sums to 1)\n", + " \n", + " HINTS:\n", + " - Use np.max(x.data, axis=-1, keepdims=True) for stability\n", + " - Use np.exp() for exponentials\n", + " - Use np.sum() for the denominator\n", + " - Make sure the result sums to 1 along the last axis\n", + " \"\"\"\n", + " ### BEGIN SOLUTION\n", + " # Subtract max for numerical stability\n", + " x_max = np.max(x.data, axis=-1, keepdims=True)\n", + " x_shifted = x.data - x_max\n", + " \n", + " # Compute softmax\n", + " exp_x = np.exp(x_shifted)\n", + " sum_exp = np.sum(exp_x, axis=-1, keepdims=True)\n", + " result = exp_x / sum_exp\n", + " \n", + " return Tensor(result)\n", + " ### END SOLUTION\n", + " \n", + " def __call__(self, x: Tensor) -> Tensor:\n", + " \"\"\"Make the class callable: softmax(x) instead of softmax.forward(x)\"\"\"\n", + " return self.forward(x)" + ] + }, + { + "cell_type": "markdown", + "id": "d37cb352", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "### ๐Ÿงช Test Your Activation Functions\n", + "\n", + "Once you implement the activation functions above, run these cells to test them:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "067e766c", + "metadata": { + "nbgrader": { + "grade": true, + "grade_id": "test-relu", + "locked": true, + "points": 20, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "# Test ReLU activation\n", + "print(\"Testing ReLU activation...\")\n", + "\n", + "relu = ReLU()\n", + "\n", + "# Test basic functionality\n", + "input_tensor = Tensor([[-2, -1, 0, 1, 2]])\n", + "output = relu(input_tensor)\n", + "expected = np.array([[0, 0, 0, 1, 2]])\n", + "assert np.array_equal(output.data, expected), f\"ReLU failed: expected {expected}, got {output.data}\"\n", + "\n", + "# Test with matrix\n", + "matrix_input = Tensor([[-1, 2], [3, -4]])\n", + "matrix_output = relu(matrix_input)\n", + "expected_matrix = np.array([[0, 2], [3, 0]])\n", + "assert np.array_equal(matrix_output.data, expected_matrix), f\"ReLU matrix failed: expected {expected_matrix}, got {matrix_output.data}\"\n", + "\n", + "# Test shape preservation\n", + "assert output.shape == input_tensor.shape, f\"ReLU should preserve shape: input {input_tensor.shape}, output {output.shape}\"\n", + "\n", + "print(\"โœ… ReLU tests passed!\")\n", + "print(f\"โœ… ReLU({input_tensor.data.flatten()}) = {output.data.flatten()}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e01b7261", + "metadata": { + "nbgrader": { + "grade": true, + "grade_id": "test-sigmoid", + "locked": true, + "points": 20, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "# Test Sigmoid activation\n", + "print(\"Testing Sigmoid activation...\")\n", + "\n", + "sigmoid = Sigmoid()\n", + "\n", + "# Test basic functionality\n", + "input_tensor = Tensor([[0]])\n", + "output = sigmoid(input_tensor)\n", + "expected_value = 0.5\n", + "assert abs(output.data.item() - expected_value) < 1e-6, f\"Sigmoid(0) should be 0.5, got {output.data.item()}\"\n", + "\n", + "# Test range bounds (allowing for floating-point precision at extremes)\n", + "large_input = Tensor([[100]])\n", + "large_output = sigmoid(large_input)\n", + "assert 0 < large_output.data.item() <= 1, f\"Sigmoid output should be in (0,1], got {large_output.data.item()}\"\n", + "\n", + "small_input = Tensor([[-100]])\n", + "small_output = sigmoid(small_input)\n", + "assert 0 <= small_output.data.item() < 1, f\"Sigmoid output should be in [0,1), got {small_output.data.item()}\"\n", + "\n", + "# Test with multiple values\n", + "multi_input = Tensor([[-2, 0, 2]])\n", + "multi_output = sigmoid(multi_input)\n", + "assert multi_output.shape == multi_input.shape, \"Sigmoid should preserve shape\"\n", + "assert np.all((multi_output.data > 0) & (multi_output.data < 1)), \"All sigmoid outputs should be in (0,1)\"\n", + "\n", + "print(\"โœ… Sigmoid tests passed!\")\n", + "print(f\"โœ… Sigmoid({multi_input.data.flatten()}) = {multi_output.data.flatten()}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8ca2fa6f", + "metadata": { + "nbgrader": { + "grade": true, + "grade_id": "test-tanh", + "locked": true, + "points": 20, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "# Test Tanh activation\n", + "print(\"Testing Tanh activation...\")\n", + "\n", + "tanh = Tanh()\n", + "\n", + "# Test basic functionality\n", + "input_tensor = Tensor([[0]])\n", + "output = tanh(input_tensor)\n", + "expected_value = 0.0\n", + "assert abs(output.data.item() - expected_value) < 1e-6, f\"Tanh(0) should be 0.0, got {output.data.item()}\"\n", + "\n", + "# Test range bounds (allowing for floating-point precision at extremes)\n", + "large_input = Tensor([[100]])\n", + "large_output = tanh(large_input)\n", + "assert -1 <= large_output.data.item() <= 1, f\"Tanh output should be in [-1,1], got {large_output.data.item()}\"\n", + "\n", + "small_input = Tensor([[-100]])\n", + "small_output = tanh(small_input)\n", + "assert -1 <= small_output.data.item() <= 1, f\"Tanh output should be in [-1,1], got {small_output.data.item()}\"\n", + "\n", + "# Test symmetry: tanh(-x) = -tanh(x)\n", + "test_input = Tensor([[2]])\n", + "pos_output = tanh(test_input)\n", + "neg_input = Tensor([[-2]])\n", + "neg_output = tanh(neg_input)\n", + "assert abs(pos_output.data.item() + neg_output.data.item()) < 1e-6, \"Tanh should be symmetric: tanh(-x) = -tanh(x)\"\n", + "\n", + "print(\"โœ… Tanh tests passed!\")\n", + "print(f\"โœ… Tanh(ยฑ2) = ยฑ{abs(pos_output.data.item()):.3f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "50795506", + "metadata": { + "nbgrader": { + "grade": true, + "grade_id": "test-softmax", + "locked": true, + "points": 20, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "# Test Softmax activation\n", + "print(\"Testing Softmax activation...\")\n", + "\n", + "softmax = Softmax()\n", + "\n", + "# Test basic functionality\n", + "input_tensor = Tensor([[1, 2, 3]])\n", + "output = softmax(input_tensor)\n", + "\n", + "# Check that outputs sum to 1\n", + "sum_output = np.sum(output.data)\n", + "assert abs(sum_output - 1.0) < 1e-6, f\"Softmax outputs should sum to 1, got {sum_output}\"\n", + "\n", + "# Check that all outputs are positive\n", + "assert np.all(output.data > 0), \"All softmax outputs should be positive\"\n", + "\n", + "# Check that larger inputs give larger outputs\n", + "assert output.data[0, 2] > output.data[0, 1] > output.data[0, 0], \"Softmax should preserve order\"\n", + "\n", + "# Test with matrix (multiple rows)\n", + "matrix_input = Tensor([[1, 2], [3, 4]])\n", + "matrix_output = softmax(matrix_input)\n", + "row_sums = np.sum(matrix_output.data, axis=1)\n", + "assert np.allclose(row_sums, 1.0), f\"Each row should sum to 1, got {row_sums}\"\n", + "\n", + "print(\"โœ… Softmax tests passed!\")\n", + "print(f\"โœ… Softmax({input_tensor.data.flatten()}) = {output.data.flatten()}\")\n", + "print(f\"โœ… Sum = {np.sum(output.data):.6f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c8dfc085", + "metadata": { + "nbgrader": { + "grade": true, + "grade_id": "test-activation-integration", + "locked": true, + "points": 20, + "schema_version": 3, + "solution": false, + "task": false + } + }, + "outputs": [], + "source": [ + "# Test activation function integration\n", + "print(\"Testing activation function integration...\")\n", + "\n", + "# Create test data\n", + "test_data = Tensor([[-2, -1, 0, 1, 2]])\n", + "\n", + "# Test all activations\n", + "relu = ReLU()\n", + "sigmoid = Sigmoid()\n", + "tanh = Tanh()\n", + "softmax = Softmax()\n", + "\n", + "# Apply all activations\n", + "relu_out = relu(test_data)\n", + "sigmoid_out = sigmoid(test_data)\n", + "tanh_out = tanh(test_data)\n", + "softmax_out = softmax(test_data)\n", + "\n", + "# Check shapes are preserved\n", + "assert relu_out.shape == test_data.shape, \"ReLU should preserve shape\"\n", + "assert sigmoid_out.shape == test_data.shape, \"Sigmoid should preserve shape\"\n", + "assert tanh_out.shape == test_data.shape, \"Tanh should preserve shape\"\n", + "assert softmax_out.shape == test_data.shape, \"Softmax should preserve shape\"\n", + "\n", + "# Check ranges (allowing for floating-point precision at extremes)\n", + "assert np.all(relu_out.data >= 0), \"ReLU outputs should be non-negative\"\n", + "assert np.all((sigmoid_out.data >= 0) & (sigmoid_out.data <= 1)), \"Sigmoid outputs should be in [0,1]\"\n", + "assert np.all((tanh_out.data >= -1) & (tanh_out.data <= 1)), \"Tanh outputs should be in [-1,1]\"\n", + "assert np.all(softmax_out.data > 0), \"Softmax outputs should be positive\"\n", + "\n", + "# Test chaining (composition)\n", + "chained = relu(sigmoid(test_data))\n", + "assert chained.shape == test_data.shape, \"Chained activations should preserve shape\"\n", + "\n", + "print(\"โœ… Activation integration tests passed!\")\n", + "print(f\"โœ… All activation functions work correctly\")\n", + "print(f\"โœ… Input: {test_data.data.flatten()}\")\n", + "print(f\"โœ… ReLU: {relu_out.data.flatten()}\")\n", + "print(f\"โœ… Sigmoid: {sigmoid_out.data.flatten()}\")\n", + "print(f\"โœ… Tanh: {tanh_out.data.flatten()}\")\n", + "print(f\"โœ… Softmax: {softmax_out.data.flatten()}\")" + ] + }, + { + "cell_type": "markdown", + "id": "fa5f40bb", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "## ๐ŸŽฏ Module Summary\n", + "\n", + "Congratulations! You've successfully implemented the core activation functions for TinyTorch:\n", + "\n", + "### What You've Accomplished\n", + "โœ… **ReLU**: The workhorse activation for hidden layers \n", + "โœ… **Sigmoid**: Smooth probabilistic outputs for binary classification \n", + "โœ… **Tanh**: Zero-centered activation for better training dynamics \n", + "โœ… **Softmax**: Probability distributions for multi-class classification \n", + "โœ… **Integration**: All functions work together and preserve tensor shapes \n", + "\n", + "### Key Concepts You've Learned\n", + "- **Nonlinearity** is essential for neural networks to learn complex patterns\n", + "- **ReLU** is simple, fast, and effective for most hidden layers\n", + "- **Sigmoid** squashes outputs to (0,1) for probabilistic interpretation\n", + "- **Tanh** is zero-centered and often better than sigmoid for hidden layers\n", + "- **Softmax** converts logits to probability distributions\n", + "- **Numerical stability** is crucial for functions with exponentials\n", + "\n", + "### Next Steps\n", + "1. **Export your code**: `tito package nbdev --export 02_activations`\n", + "2. **Test your implementation**: `tito module test 02_activations`\n", + "3. **Use your activations**: \n", + " ```python\n", + " from tinytorch.core.activations import ReLU, Sigmoid, Tanh, Softmax\n", + " from tinytorch.core.tensor import Tensor\n", + " \n", + " relu = ReLU()\n", + " x = Tensor([[-1, 0, 1, 2]])\n", + " y = relu(x) # Your activation in action!\n", + " ```\n", + "4. **Move to Module 3**: Start building neural network layers!\n", + "\n", + "**Ready for the next challenge?** Let's combine tensors and activations to build the fundamental building blocks of neural networks!" + ] + } + ], + "metadata": { + "jupytext": { + "main_language": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/modules/source/02_activations/activations_dev.py b/modules/source/02_activations/activations_dev.py index 8c656cbd..d0d1d7b6 100644 --- a/modules/source/02_activations/activations_dev.py +++ b/modules/source/02_activations/activations_dev.py @@ -10,25 +10,26 @@ # %% [markdown] """ -# Module 3: Activation Functions - The Spark of Intelligence +# Module 2: Activations - Nonlinearity in Neural Networks -**Learning Goals:** +Welcome to the Activations module! This is where neural networks get their power through nonlinearity. + +## Learning Goals - Understand why activation functions are essential for neural networks -- Implement four fundamental activation functions from scratch -- Learn the mathematical properties and use cases of each activation -- Visualize activation function behavior and understand their impact +- Implement the four most important activation functions: ReLU, Sigmoid, Tanh, and Softmax +- Visualize how activations transform data and enable complex learning +- See how activations work with layers to build powerful networks +- Master the NBGrader workflow with comprehensive testing -**Why This Matters:** -Without activation functions, neural networks would just be linear transformations - no matter how many layers you stack, you'd only get linear relationships. Activation functions introduce the nonlinearity that allows neural networks to learn complex patterns and approximate any function. - -**Real-World Context:** -Every neural network you've heard of - from image recognition to language models - relies on activation functions. Understanding them deeply is crucial for designing effective architectures and debugging training issues. +## Build โ†’ Use โ†’ Understand +1. **Build**: Activation functions that add nonlinearity +2. **Use**: Transform tensors and see immediate results +3. **Understand**: How nonlinearity enables complex pattern learning """ -# %% +# %% nbgrader={"grade": false, "grade_id": "activations-imports", "locked": false, "schema_version": 3, "solution": false, "task": false} #| default_exp core.activations -# %% #| export import math import numpy as np @@ -37,10 +38,15 @@ import os import sys from typing import Union, List -# Import our Tensor class from the main package (rock solid foundation) -from tinytorch.core.tensor import Tensor +# Import our Tensor class - try from package first, then from local module +try: + from tinytorch.core.tensor import Tensor +except ImportError: + # For development, import from local tensor module + sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor')) + from tensor_dev import Tensor -# %% +# %% nbgrader={"grade": false, "grade_id": "activations-setup", "locked": false, "schema_version": 3, "solution": false, "task": false} #| hide #| export def _should_show_plots(): @@ -57,7 +63,7 @@ def _should_show_plots(): # Show plots in development mode (when not in test mode) return not is_pytest -# %% +# %% nbgrader={"grade": false, "grade_id": "activations-visualization", "locked": false, "schema_version": 3, "solution": false, "task": false} #| hide #| export def visualize_activation_function(activation_fn, name: str, x_range: tuple = (-5, 5), num_points: int = 100): @@ -107,6 +113,59 @@ def visualize_activation_on_data(activation_fn, name: str, data: Tensor): except Exception as e: print(f" โš ๏ธ Data visualization error: {e}") +# %% [markdown] +""" +## ๐Ÿ“ฆ Where This Code Lives in the Final Package + +**Learning Side:** You work in `modules/source/02_activations/activations_dev.py` +**Building Side:** Code exports to `tinytorch.core.activations` + +```python +# Final package structure: +from tinytorch.core.activations import ReLU, Sigmoid, Tanh, Softmax # All activations together! +from tinytorch.core.tensor import Tensor # The foundation +from tinytorch.core.layers import Dense, Conv2D # Coming next! +``` + +**Why this matters:** +- **Learning:** Focused modules for deep understanding +- **Production:** Proper organization like PyTorch's `torch.nn.functional` +- **Consistency:** All activation functions live together in `core.activations` +- **Integration:** Works seamlessly with tensors and layers +""" + +# %% [markdown] +""" +## ๐Ÿง  The Mathematical Foundation of Nonlinearity + +### The Universal Approximation Theorem +**Key Insight:** Neural networks with nonlinear activation functions can approximate any continuous function! + +``` +Without activation: f(x) = Wโ‚ƒ(Wโ‚‚(Wโ‚x + bโ‚) + bโ‚‚) + bโ‚ƒ = Wx + b (still linear!) +With activation: f(x) = Wโ‚ƒฯƒ(Wโ‚‚ฯƒ(Wโ‚x + bโ‚) + bโ‚‚) + bโ‚ƒ (nonlinear!) +``` + +### Why Nonlinearity is Critical +- **Linear Limitations**: Without activations, any deep network collapses to a single linear transformation +- **Feature Learning**: Nonlinear functions create complex decision boundaries +- **Representation Power**: Each layer can learn different levels of abstraction +- **Biological Inspiration**: Neurons fire (activate) only above certain thresholds + +### Mathematical Properties We Care About +- **Differentiability**: For gradient-based optimization +- **Computational Efficiency**: Fast forward and backward passes +- **Numerical Stability**: Avoiding vanishing/exploding gradients +- **Sparsity**: Some activations (like ReLU) produce sparse representations + +### Connection to Real ML Systems +Every major framework has these same activations: +- **PyTorch**: `torch.nn.ReLU()`, `torch.nn.Sigmoid()`, etc. +- **TensorFlow**: `tf.nn.relu()`, `tf.nn.sigmoid()`, etc. +- **JAX**: `jax.nn.relu()`, `jax.nn.sigmoid()`, etc. +- **TinyTorch**: `tinytorch.core.activations.ReLU()` (what we're building!) +""" + # %% [markdown] """ ## Step 1: What is an Activation Function? @@ -134,7 +193,7 @@ Think of activation functions as **decision makers** at each neuron: - **Softmax**: "Convert to probabilities that sum to 1" ### Connection to Previous Modules -In Module 2 (Layers), we learned how to transform data through linear operations (matrix multiplication + bias). Now we add the nonlinear activation functions that make neural networks powerful. +In Module 1 (Tensor), we learned how to store and manipulate data. Now we add the nonlinear functions that make neural networks powerful. """ # %% [markdown] @@ -163,12 +222,25 @@ f(x) = max(0, x) ReLU is like a **one-way valve** - it only lets positive "pressure" through, blocking negative values completely. ### When to Use ReLU -- **Hidden layers** in most neural networks -- **Convolutional layers** in image processing -- **When you want sparse activations** +- **Hidden layers** in most neural networks (90% of cases) +- **Convolutional layers** in image processing (CNNs) +- **When you want sparse activations** (many zeros) +- **Deep networks** (doesn't suffer from vanishing gradients) + +### Real-World Applications +- **Image Classification**: ResNet, VGG, AlexNet all use ReLU +- **Object Detection**: YOLO, R-CNN use ReLU in backbone networks +- **Natural Language Processing**: Transformer models use ReLU in feedforward layers +- **Recommendation Systems**: Deep collaborative filtering with ReLU + +### Mathematical Properties +- **Derivative**: f'(x) = 1 if x > 0, else 0 +- **Range**: [0, โˆž) +- **Sparsity**: Outputs exactly 0 for negative inputs +- **Computational Cost**: O(1) - just a max operation """ -# %% +# %% nbgrader={"grade": false, "grade_id": "relu-class", "locked": false, "schema_version": 3, "solution": true, "task": false} #| export class ReLU: """ @@ -197,738 +269,455 @@ class ReLU: - Remember to return a new Tensor object - The shape should remain the same as input """ - raise NotImplementedError("Student implementation required") - - def __call__(self, x: Tensor) -> Tensor: - """Allow calling the activation like a function: relu(x)""" - return self.forward(x) - -# %% -#| hide -#| export -class ReLU: - """ReLU Activation: f(x) = max(0, x)""" - - def forward(self, x: Tensor) -> Tensor: + ### BEGIN SOLUTION result = np.maximum(0, x.data) return Tensor(result) - + ### END SOLUTION + def __call__(self, x: Tensor) -> Tensor: + """Make the class callable: relu(x) instead of relu.forward(x)""" return self.forward(x) # %% [markdown] """ -### ๐Ÿงช Test Your ReLU Implementation - -Let's test your ReLU implementation right away to make sure it's working correctly: -""" - -# %% -try: - # Create ReLU activation - relu = ReLU() - - # Test 1: Basic functionality - print("๐Ÿ”ง Testing ReLU Implementation") - print("=" * 40) - - # Test with mixed positive/negative values - test_input = Tensor([[-2, -1, 0, 1, 2]]) - expected = Tensor([[0, 0, 0, 1, 2]]) - - result = relu(test_input) - print(f"Input: {test_input.data.flatten()}") - print(f"Output: {result.data.flatten()}") - print(f"Expected: {expected.data.flatten()}") - - # Verify correctness - if np.allclose(result.data, expected.data): - print("โœ… Basic ReLU test passed!") - else: - print("โŒ Basic ReLU test failed!") - print(" Check your max(0, x) implementation") - - # Test 2: Edge cases - edge_cases = Tensor([[-100, -0.1, 0, 0.1, 100]]) - edge_result = relu(edge_cases) - expected_edge = np.array([[0, 0, 0, 0.1, 100]]) - - print(f"\nEdge cases: {edge_cases.data.flatten()}") - print(f"Output: {edge_result.data.flatten()}") - - if np.allclose(edge_result.data, expected_edge): - print("โœ… Edge case test passed!") - else: - print("โŒ Edge case test failed!") - - # Test 3: Shape preservation - multi_dim = Tensor([[1, -1], [2, -2], [0, 3]]) - multi_result = relu(multi_dim) - - if multi_result.data.shape == multi_dim.data.shape: - print("โœ… Shape preservation test passed!") - else: - print("โŒ Shape preservation test failed!") - print(f" Expected shape: {multi_dim.data.shape}, got: {multi_result.data.shape}") - - print("โœ… ReLU tests complete!") - -except NotImplementedError: - print("โš ๏ธ ReLU not implemented yet - complete the forward method above!") -except Exception as e: - print(f"โŒ Error in ReLU: {e}") - print(" Check your implementation in the forward method") - -print() # Add spacing - -# %% -# ๐ŸŽจ ReLU Visualization (development only - not exported) -if _should_show_plots(): - try: - relu = ReLU() - print("๐ŸŽจ Visualizing ReLU behavior...") - visualize_activation_function(relu, "ReLU", x_range=(-3, 3)) - - # Show ReLU with real data - sample_data = Tensor([[-2.5, -1.0, -0.5, 0.0, 0.5, 1.0, 2.5]]) - visualize_activation_on_data(relu, "ReLU", sample_data) - except: - pass # Skip if ReLU not implemented - -# %% [markdown] -""" -## Step 3: Sigmoid - The Smooth Classifier +## Step 3: Sigmoid - The Smooth Squasher ### What is Sigmoid? -**Sigmoid** is a smooth, S-shaped activation function that squashes inputs to the range (0, 1). +**Sigmoid** is a smooth S-shaped function that squashes inputs to the range (0, 1). **Mathematical Definition:** ``` f(x) = 1 / (1 + e^(-x)) ``` -**Key Properties:** +**Properties:** - **Range**: (0, 1) - never exactly 0 or 1 - **Smooth**: Differentiable everywhere - **Monotonic**: Always increasing -- **Symmetric**: Around the point (0, 0.5) +- **Centered**: Around 0.5 ### Why Sigmoid is Useful -1. **Probability interpretation**: Output can be interpreted as probability -2. **Smooth gradients**: Nice for optimization -3. **Bounded output**: Prevents extreme values +1. **Probabilistic**: Output can be interpreted as probabilities +2. **Bounded**: Output is always between 0 and 1 +3. **Smooth**: Good for gradient-based optimization +4. **Historical**: Was the standard before ReLU ### Real-World Analogy -Sigmoid is like a **smooth dimmer switch** - it gradually transitions from "off" (near 0) to "on" (near 1), unlike ReLU's sharp cutoff. +Sigmoid is like a **soft switch** - it gradually turns on as input increases, unlike ReLU's hard cutoff. + +### Real-World Applications +- **Binary Classification**: Final layer for yes/no decisions (spam detection, medical diagnosis) +- **Logistic Regression**: The classic ML algorithm uses sigmoid +- **Attention Mechanisms**: Gating mechanisms in LSTM/GRU +- **Probability Estimation**: When you need outputs between 0 and 1 + +### Mathematical Properties +- **Derivative**: f'(x) = f(x)(1 - f(x)) - elegant and efficient! +- **Range**: (0, 1) - never exactly 0 or 1 +- **Symmetry**: Sigmoid(0) = 0.5 (centered) +- **Saturation**: Gradients approach 0 for large |x| (vanishing gradient problem) ### When to Use Sigmoid - **Binary classification** (output layer) -- **Gate mechanisms** (in LSTMs) -- **When you need probabilities** - -### Numerical Stability Note -For very large positive or negative inputs, sigmoid can cause numerical issues. We'll handle this with clipping. +- **Gates** in LSTM/GRU networks +- **When you need probabilistic outputs** +- **Avoid in deep networks** (vanishing gradients) """ -# %% +# %% nbgrader={"grade": false, "grade_id": "sigmoid-class", "locked": false, "schema_version": 3, "solution": true, "task": false} #| export class Sigmoid: """ Sigmoid Activation Function: f(x) = 1 / (1 + e^(-x)) - Squashes inputs to the range (0, 1), useful for binary classification - and probability interpretation. + Smooth S-shaped function that squashes inputs to (0, 1). + Useful for binary classification and probabilistic outputs. """ def forward(self, x: Tensor) -> Tensor: """ Apply Sigmoid activation: f(x) = 1 / (1 + e^(-x)) - TODO: Implement Sigmoid activation + TODO: Implement Sigmoid activation with numerical stability APPROACH: - 1. For numerical stability, clip x to reasonable range (e.g., -500 to 500) - 2. Compute 1 / (1 + exp(-x)) for each element + 1. Clip input values to prevent overflow (e.g., between -500 and 500) + 2. Apply the sigmoid formula: 1 / (1 + exp(-x)) 3. Return a new Tensor with the results EXAMPLE: - Input: Tensor([[-2, -1, 0, 1, 2]]) - Expected: Tensor([[0.119, 0.269, 0.5, 0.731, 0.881]]) (approximately) + Input: Tensor([[-2, 0, 2]]) + Expected: Tensor([[0.119, 0.5, 0.881]]) (approximately) HINTS: - Use np.clip(x.data, -500, 500) for numerical stability - - Use np.exp(-clipped_x) for the exponential - - Formula: 1 / (1 + np.exp(-clipped_x)) - - Remember to return a new Tensor object + - Use np.exp() for the exponential function + - Be careful with very large/small inputs to avoid overflow """ - raise NotImplementedError("Student implementation required") - - def __call__(self, x: Tensor) -> Tensor: - """Allow calling the activation like a function: sigmoid(x)""" - return self.forward(x) - -# %% -#| hide -#| export -class Sigmoid: - """Sigmoid Activation: f(x) = 1 / (1 + e^(-x))""" - - def forward(self, x: Tensor) -> Tensor: + ### BEGIN SOLUTION # Clip for numerical stability clipped = np.clip(x.data, -500, 500) result = 1 / (1 + np.exp(-clipped)) return Tensor(result) - + ### END SOLUTION + def __call__(self, x: Tensor) -> Tensor: + """Make the class callable: sigmoid(x) instead of sigmoid.forward(x)""" return self.forward(x) # %% [markdown] """ -### ๐Ÿงช Test Your Sigmoid Implementation - -Let's test your Sigmoid implementation to ensure it's working correctly: -""" - -# %% -try: - # Create Sigmoid activation - sigmoid = Sigmoid() - - print("๐Ÿ”ง Testing Sigmoid Implementation") - print("=" * 40) - - # Test 1: Basic functionality - test_input = Tensor([[-2, -1, 0, 1, 2]]) - result = sigmoid(test_input) - - print(f"Input: {test_input.data.flatten()}") - print(f"Output: {result.data.flatten()}") - - # Check properties - # 1. All outputs should be between 0 and 1 - if np.all(result.data >= 0) and np.all(result.data <= 1): - print("โœ… Range test passed: all outputs in (0, 1)") - else: - print("โŒ Range test failed: outputs should be in (0, 1)") - - # 2. Sigmoid(0) should be 0.5 - zero_input = Tensor([[0]]) - zero_result = sigmoid(zero_input) - if abs(zero_result.data.item() - 0.5) < 1e-6: - print("โœ… Sigmoid(0) = 0.5 test passed!") - else: - print(f"โŒ Sigmoid(0) should be 0.5, got {zero_result.data.item()}") - - # 3. Test symmetry: sigmoid(-x) = 1 - sigmoid(x) - x_val = 2.0 - pos_result = sigmoid(Tensor([[x_val]])).data.item() - neg_result = sigmoid(Tensor([[-x_val]])).data.item() - - if abs(pos_result + neg_result - 1.0) < 1e-6: - print("โœ… Symmetry test passed!") - else: - print(f"โŒ Symmetry test failed: sigmoid({x_val}) + sigmoid({-x_val}) should equal 1") - - # 4. Test numerical stability with extreme values - extreme_input = Tensor([[-1000, 1000]]) - extreme_result = sigmoid(extreme_input) - - # Should not produce NaN or inf - if not np.any(np.isnan(extreme_result.data)) and not np.any(np.isinf(extreme_result.data)): - print("โœ… Numerical stability test passed!") - else: - print("โŒ Numerical stability test failed: extreme values produced NaN/inf") - - print("โœ… Sigmoid tests complete!") - - # ๐ŸŽจ Visualize Sigmoid behavior (development only) - if _should_show_plots(): - print("\n๐ŸŽจ Visualizing Sigmoid behavior...") - visualize_activation_function(sigmoid, "Sigmoid", x_range=(-5, 5)) - - # Show Sigmoid with real data - sample_data = Tensor([[-3.0, -1.0, 0.0, 1.0, 3.0]]) - visualize_activation_on_data(sigmoid, "Sigmoid", sample_data) - -except NotImplementedError: - print("โš ๏ธ Sigmoid not implemented yet - complete the forward method above!") -except Exception as e: - print(f"โŒ Error in Sigmoid: {e}") - print(" Check your implementation in the forward method") - -print() # Add spacing - -# %% [markdown] -""" -## Step 4: Tanh - The Centered Alternative +## Step 4: Tanh - The Zero-Centered Squasher ### What is Tanh? -**Tanh (Hyperbolic Tangent)** is similar to Sigmoid but centered around zero, with range (-1, 1). +**Tanh (Hyperbolic Tangent)** is similar to Sigmoid but centered around zero. **Mathematical Definition:** ``` -f(x) = (e^x - e^(-x)) / (e^x + e^(-x)) +f(x) = tanh(x) = (e^x - e^(-x)) / (e^x + e^(-x)) ``` -**Alternative form:** -``` -f(x) = 2 * sigmoid(2x) - 1 -``` - -**Key Properties:** +**Properties:** - **Range**: (-1, 1) - symmetric around zero -- **Zero-centered**: Output has mean closer to zero +- **Zero-centered**: Output averages to zero - **Smooth**: Differentiable everywhere -- **Stronger gradients**: Steeper than sigmoid +- **Stronger gradients**: Than sigmoid in some regions -### Why Tanh is Better Than Sigmoid -1. **Zero-centered**: Helps with gradient flow in deep networks -2. **Stronger gradients**: Faster convergence in some cases -3. **Symmetric**: Better for certain applications +### Why Tanh is Useful +1. **Zero-centered**: Better for training (gradients don't all have same sign) +2. **Symmetric**: Treats positive and negative inputs equally +3. **Stronger gradients**: Can help with training dynamics +4. **Bounded**: Output is always between -1 and 1 ### Real-World Analogy -Tanh is like a **balanced scale** - it can tip strongly in either direction (-1 to +1) but defaults to neutral (0). +Tanh is like a **balanced scale** - it can tip positive or negative, with zero as the neutral point. ### When to Use Tanh - **Hidden layers** (alternative to ReLU) -- **Recurrent networks** (RNNs, LSTMs) +- **RNNs** (traditional choice) - **When you need zero-centered outputs** """ -# %% +# %% nbgrader={"grade": false, "grade_id": "tanh-class", "locked": false, "schema_version": 3, "solution": true, "task": false} #| export class Tanh: """ - Tanh Activation Function: f(x) = (e^x - e^(-x)) / (e^x + e^(-x)) + Tanh Activation Function: f(x) = tanh(x) - Zero-centered activation function with range (-1, 1). - Often preferred over Sigmoid for hidden layers. + Zero-centered S-shaped function that squashes inputs to (-1, 1). + Better than sigmoid for hidden layers due to zero-centered outputs. """ def forward(self, x: Tensor) -> Tensor: """ - Apply Tanh activation: f(x) = (e^x - e^(-x)) / (e^x + e^(-x)) + Apply Tanh activation: f(x) = tanh(x) TODO: Implement Tanh activation APPROACH: - 1. Use numpy's built-in tanh function: np.tanh(x.data) - 2. Return a new Tensor with the results - - ALTERNATIVE APPROACH: - 1. Compute e^x and e^(-x) - 2. Use formula: (e^x - e^(-x)) / (e^x + e^(-x)) + 1. Use NumPy's tanh function for numerical stability + 2. Apply to the tensor data + 3. Return a new Tensor with the results EXAMPLE: - Input: Tensor([[-2, -1, 0, 1, 2]]) - Expected: Tensor([[-0.964, -0.762, 0.0, 0.762, 0.964]]) (approximately) + Input: Tensor([[-2, 0, 2]]) + Expected: Tensor([[-0.964, 0.0, 0.964]]) (approximately) HINTS: - - np.tanh() is the simplest approach - - Output range is (-1, 1) - - tanh(0) = 0 (zero-centered) - - Remember to return a new Tensor object + - Use np.tanh(x.data) - NumPy handles the math + - Much simpler than implementing the formula manually + - NumPy's tanh is numerically stable """ - raise NotImplementedError("Student implementation required") - - def __call__(self, x: Tensor) -> Tensor: - """Allow calling the activation like a function: tanh(x)""" - return self.forward(x) - -# %% -#| hide -#| export -class Tanh: - """Tanh Activation: f(x) = (e^x - e^(-x)) / (e^x + e^(-x))""" - - def forward(self, x: Tensor) -> Tensor: + ### BEGIN SOLUTION result = np.tanh(x.data) return Tensor(result) - + ### END SOLUTION + def __call__(self, x: Tensor) -> Tensor: + """Make the class callable: tanh(x) instead of tanh.forward(x)""" return self.forward(x) # %% [markdown] """ -### ๐Ÿงช Test Your Tanh Implementation - -Let's test your Tanh implementation to ensure it's working correctly: -""" - -# %% -try: - # Create Tanh activation - tanh = Tanh() - - print("๐Ÿ”ง Testing Tanh Implementation") - print("=" * 40) - - # Test 1: Basic functionality - test_input = Tensor([[-2, -1, 0, 1, 2]]) - result = tanh(test_input) - - print(f"Input: {test_input.data.flatten()}") - print(f"Output: {result.data.flatten()}") - - # Check properties - # 1. All outputs should be between -1 and 1 - if np.all(result.data >= -1) and np.all(result.data <= 1): - print("โœ… Range test passed: all outputs in (-1, 1)") - else: - print("โŒ Range test failed: outputs should be in (-1, 1)") - - # 2. Tanh(0) should be 0 - zero_input = Tensor([[0]]) - zero_result = tanh(zero_input) - if abs(zero_result.data.item()) < 1e-6: - print("โœ… Tanh(0) = 0 test passed!") - else: - print(f"โŒ Tanh(0) should be 0, got {zero_result.data.item()}") - - # 3. Test antisymmetry: tanh(-x) = -tanh(x) - x_val = 1.5 - pos_result = tanh(Tensor([[x_val]])).data.item() - neg_result = tanh(Tensor([[-x_val]])).data.item() - - if abs(pos_result + neg_result) < 1e-6: - print("โœ… Antisymmetry test passed!") - else: - print(f"โŒ Antisymmetry test failed: tanh({x_val}) + tanh({-x_val}) should equal 0") - - # 4. Test that tanh is stronger than sigmoid - # For the same input, |tanh(x)| should be > |sigmoid(x) - 0.5| - test_val = 1.0 - tanh_result = abs(tanh(Tensor([[test_val]])).data.item()) - sigmoid_result = abs(sigmoid(Tensor([[test_val]])).data.item() - 0.5) - - if tanh_result > sigmoid_result: - print("โœ… Stronger gradient test passed!") - else: - print("โŒ Tanh should have stronger gradients than sigmoid") - - print("โœ… Tanh tests complete!") - - # ๐ŸŽจ Visualize Tanh behavior (development only) - if _should_show_plots(): - print("\n๐ŸŽจ Visualizing Tanh behavior...") - visualize_activation_function(tanh, "Tanh", x_range=(-3, 3)) - - # Show Tanh with real data - sample_data = Tensor([[-2.0, -1.0, 0.0, 1.0, 2.0]]) - visualize_activation_on_data(tanh, "Tanh", sample_data) - -except NotImplementedError: - print("โš ๏ธ Tanh not implemented yet - complete the forward method above!") -except Exception as e: - print(f"โŒ Error in Tanh: {e}") - print(" Check your implementation in the forward method") - -print() # Add spacing - -# %% [markdown] -""" -## Step 5: Softmax - The Probability Maker +## Step 5: Softmax - The Probability Converter ### What is Softmax? -**Softmax** converts a vector of real numbers into a probability distribution. It's essential for multi-class classification. +**Softmax** converts a vector of numbers into a probability distribution. **Mathematical Definition:** ``` f(x_i) = e^(x_i) / ฮฃ(e^(x_j)) for all j ``` -**Key Properties:** -- **Probability distribution**: All outputs sum to 1 -- **Non-negative**: All outputs โ‰ฅ 0 -- **Differentiable**: Smooth for optimization -- **Relative**: Emphasizes the largest input +**Properties:** +- **Probabilities**: All outputs sum to 1 +- **Non-negative**: All outputs are โ‰ฅ 0 +- **Differentiable**: Smooth everywhere +- **Competitive**: Amplifies differences between inputs -### Why Softmax is Special -1. **Probability interpretation**: Perfect for classification -2. **Competitive**: Emphasizes the winner (largest input) -3. **Differentiable**: Works well with gradient descent +### Why Softmax is Essential +1. **Multi-class classification**: Converts logits to probabilities +2. **Attention mechanisms**: Focuses on important elements +3. **Interpretable**: Output can be understood as confidence +4. **Competitive**: Emphasizes the largest input ### Real-World Analogy -Softmax is like **voting with enthusiasm** - not only does the most popular choice win, but the "votes" are weighted by how much more popular it is. +Softmax is like **dividing a pie** - it takes any set of numbers and converts them into slices that sum to 100%. ### When to Use Softmax - **Multi-class classification** (output layer) -- **Attention mechanisms** (in Transformers) +- **Attention mechanisms** in transformers - **When you need probability distributions** - -### Numerical Stability Note -For numerical stability, we subtract the maximum value before computing exponentials. """ -# %% +# %% nbgrader={"grade": false, "grade_id": "softmax-class", "locked": false, "schema_version": 3, "solution": true, "task": false} #| export class Softmax: """ Softmax Activation Function: f(x_i) = e^(x_i) / ฮฃ(e^(x_j)) - Converts a vector of real numbers into a probability distribution. - Essential for multi-class classification. + Converts a vector of numbers into a probability distribution. + Essential for multi-class classification and attention mechanisms. """ def forward(self, x: Tensor) -> Tensor: """ Apply Softmax activation: f(x_i) = e^(x_i) / ฮฃ(e^(x_j)) - TODO: Implement Softmax activation + TODO: Implement Softmax activation with numerical stability APPROACH: - 1. For numerical stability, subtract the maximum value from each row - 2. Compute exponentials of the shifted values - 3. Divide each exponential by the sum of exponentials in its row + 1. Subtract max value from inputs for numerical stability + 2. Compute exponentials: e^(x_i - max) + 3. Divide by sum of exponentials 4. Return a new Tensor with the results EXAMPLE: Input: Tensor([[1, 2, 3]]) - Expected: Tensor([[0.090, 0.245, 0.665]]) (approximately) - Sum should be 1.0 + Expected: Tensor([[0.09, 0.24, 0.67]]) (approximately, sums to 1) HINTS: - - Use np.max(x.data, axis=1, keepdims=True) to find row maximums - - Subtract max from x.data for numerical stability + - Use np.max(x.data, axis=-1, keepdims=True) for stability - Use np.exp() for exponentials - - Use np.sum(exp_vals, axis=1, keepdims=True) for row sums - - Remember to return a new Tensor object + - Use np.sum() for the denominator + - Make sure the result sums to 1 along the last axis """ - raise NotImplementedError("Student implementation required") - - def __call__(self, x: Tensor) -> Tensor: - """Allow calling the activation like a function: softmax(x)""" - return self.forward(x) - -# %% -#| hide -#| export -class Softmax: - """Softmax Activation: f(x_i) = e^(x_i) / ฮฃ(e^(x_j))""" - - def forward(self, x: Tensor) -> Tensor: + ### BEGIN SOLUTION # Subtract max for numerical stability - shifted = x.data - np.max(x.data, axis=1, keepdims=True) - exp_vals = np.exp(shifted) - result = exp_vals / np.sum(exp_vals, axis=1, keepdims=True) - return Tensor(result) + x_max = np.max(x.data, axis=-1, keepdims=True) + x_shifted = x.data - x_max + # Compute softmax + exp_x = np.exp(x_shifted) + sum_exp = np.sum(exp_x, axis=-1, keepdims=True) + result = exp_x / sum_exp + + return Tensor(result) + ### END SOLUTION + def __call__(self, x: Tensor) -> Tensor: + """Make the class callable: softmax(x) instead of softmax.forward(x)""" return self.forward(x) # %% [markdown] """ -### ๐Ÿงช Test Your Softmax Implementation +### ๐Ÿงช Test Your Activation Functions -Let's test your Softmax implementation to ensure it's working correctly: +Once you implement the activation functions above, run these cells to test them: """ -# %% -try: - # Create Softmax activation - softmax = Softmax() - - print("๐Ÿ”ง Testing Softmax Implementation") - print("=" * 40) - - # Test 1: Basic functionality - test_input = Tensor([[1, 2, 3]]) - result = softmax(test_input) - - print(f"Input: {test_input.data.flatten()}") - print(f"Output: {result.data.flatten()}") - - # Check properties - # 1. All outputs should be non-negative - if np.all(result.data >= 0): - print("โœ… Non-negative test passed!") - else: - print("โŒ Non-negative test failed: all outputs should be โ‰ฅ 0") - - # 2. Sum should equal 1 (probability distribution) - row_sums = np.sum(result.data, axis=1) - if np.allclose(row_sums, 1.0): - print("โœ… Probability distribution test passed!") - else: - print(f"โŒ Sum test failed: sum should be 1.0, got {row_sums}") - - # 3. Test with multiple rows - multi_input = Tensor([[1, 2, 3], [0, 0, 0], [10, 20, 30]]) - multi_result = softmax(multi_input) - multi_sums = np.sum(multi_result.data, axis=1) - - if np.allclose(multi_sums, 1.0): - print("โœ… Multi-row test passed!") - else: - print(f"โŒ Multi-row test failed: all row sums should be 1.0, got {multi_sums}") - - # 4. Test numerical stability - large_input = Tensor([[1000, 1001, 1002]]) - large_result = softmax(large_input) - - # Should not produce NaN or inf - if not np.any(np.isnan(large_result.data)) and not np.any(np.isinf(large_result.data)): - print("โœ… Numerical stability test passed!") - else: - print("โŒ Numerical stability test failed: large values produced NaN/inf") - - # 5. Test that largest input gets highest probability - test_logits = Tensor([[1, 5, 2]]) - test_probs = softmax(test_logits) - max_idx = np.argmax(test_probs.data) - - if max_idx == 1: # Second element (index 1) should be largest - print("โœ… Max probability test passed!") - else: - print("โŒ Max probability test failed: largest input should get highest probability") - - print("โœ… Softmax tests complete!") - - # ๐ŸŽจ Visualize Softmax behavior (development only) - if _should_show_plots(): - print("\n๐ŸŽจ Visualizing Softmax behavior...") - # Note: Softmax is different - it's a vector function, so we show it differently - sample_logits = Tensor([[1.0, 2.0, 3.0]]) # Simple 3-class example - softmax_output = softmax(sample_logits) - - print(f" Example: logits {sample_logits.data.flatten()} โ†’ probabilities {softmax_output.data.flatten()}") - print(f" Sum of probabilities: {softmax_output.data.sum():.6f} (should be 1.0)") - - # Show how different input scales affect output - scale_examples = [ - Tensor([[1.0, 2.0, 3.0]]), # Original - Tensor([[2.0, 4.0, 6.0]]), # Scaled up - Tensor([[0.1, 0.2, 0.3]]), # Scaled down - ] - - print("\n ๐Ÿ“Š Scale sensitivity:") - for i, example in enumerate(scale_examples): - output = softmax(example) - print(f" Scale {i+1}: {example.data.flatten()} โ†’ {output.data.flatten()}") - -except NotImplementedError: - print("โš ๏ธ Softmax not implemented yet - complete the forward method above!") -except Exception as e: - print(f"โŒ Error in Softmax: {e}") - print(" Check your implementation in the forward method") +# %% nbgrader={"grade": true, "grade_id": "test-relu", "locked": true, "points": 20, "schema_version": 3, "solution": false, "task": false} +# Test ReLU activation +print("Testing ReLU activation...") -print() # Add spacing +relu = ReLU() + +# Test basic functionality +input_tensor = Tensor([[-2, -1, 0, 1, 2]]) +output = relu(input_tensor) +expected = np.array([[0, 0, 0, 1, 2]]) +assert np.array_equal(output.data, expected), f"ReLU failed: expected {expected}, got {output.data}" + +# Test with matrix +matrix_input = Tensor([[-1, 2], [3, -4]]) +matrix_output = relu(matrix_input) +expected_matrix = np.array([[0, 2], [3, 0]]) +assert np.array_equal(matrix_output.data, expected_matrix), f"ReLU matrix failed: expected {expected_matrix}, got {matrix_output.data}" + +# Test shape preservation +assert output.shape == input_tensor.shape, f"ReLU should preserve shape: input {input_tensor.shape}, output {output.shape}" + +print("โœ… ReLU tests passed!") +print(f"โœ… ReLU({input_tensor.data.flatten()}) = {output.data.flatten()}") + +# %% nbgrader={"grade": true, "grade_id": "test-sigmoid", "locked": true, "points": 20, "schema_version": 3, "solution": false, "task": false} +# Test Sigmoid activation +print("Testing Sigmoid activation...") + +sigmoid = Sigmoid() + +# Test basic functionality +input_tensor = Tensor([[0]]) +output = sigmoid(input_tensor) +expected_value = 0.5 +assert abs(output.data.item() - expected_value) < 1e-6, f"Sigmoid(0) should be 0.5, got {output.data.item()}" + +# Test range bounds (allowing for floating-point precision at extremes) +large_input = Tensor([[100]]) +large_output = sigmoid(large_input) +assert 0 < large_output.data.item() <= 1, f"Sigmoid output should be in (0,1], got {large_output.data.item()}" + +small_input = Tensor([[-100]]) +small_output = sigmoid(small_input) +assert 0 <= small_output.data.item() < 1, f"Sigmoid output should be in [0,1), got {small_output.data.item()}" + +# Test with multiple values +multi_input = Tensor([[-2, 0, 2]]) +multi_output = sigmoid(multi_input) +assert multi_output.shape == multi_input.shape, "Sigmoid should preserve shape" +assert np.all((multi_output.data > 0) & (multi_output.data < 1)), "All sigmoid outputs should be in (0,1)" + +print("โœ… Sigmoid tests passed!") +print(f"โœ… Sigmoid({multi_input.data.flatten()}) = {multi_output.data.flatten()}") + +# %% nbgrader={"grade": true, "grade_id": "test-tanh", "locked": true, "points": 20, "schema_version": 3, "solution": false, "task": false} +# Test Tanh activation +print("Testing Tanh activation...") + +tanh = Tanh() + +# Test basic functionality +input_tensor = Tensor([[0]]) +output = tanh(input_tensor) +expected_value = 0.0 +assert abs(output.data.item() - expected_value) < 1e-6, f"Tanh(0) should be 0.0, got {output.data.item()}" + +# Test range bounds (allowing for floating-point precision at extremes) +large_input = Tensor([[100]]) +large_output = tanh(large_input) +assert -1 <= large_output.data.item() <= 1, f"Tanh output should be in [-1,1], got {large_output.data.item()}" + +small_input = Tensor([[-100]]) +small_output = tanh(small_input) +assert -1 <= small_output.data.item() <= 1, f"Tanh output should be in [-1,1], got {small_output.data.item()}" + +# Test symmetry: tanh(-x) = -tanh(x) +test_input = Tensor([[2]]) +pos_output = tanh(test_input) +neg_input = Tensor([[-2]]) +neg_output = tanh(neg_input) +assert abs(pos_output.data.item() + neg_output.data.item()) < 1e-6, "Tanh should be symmetric: tanh(-x) = -tanh(x)" + +print("โœ… Tanh tests passed!") +print(f"โœ… Tanh(ยฑ2) = ยฑ{abs(pos_output.data.item()):.3f}") + +# %% nbgrader={"grade": true, "grade_id": "test-softmax", "locked": true, "points": 20, "schema_version": 3, "solution": false, "task": false} +# Test Softmax activation +print("Testing Softmax activation...") + +softmax = Softmax() + +# Test basic functionality +input_tensor = Tensor([[1, 2, 3]]) +output = softmax(input_tensor) + +# Check that outputs sum to 1 +sum_output = np.sum(output.data) +assert abs(sum_output - 1.0) < 1e-6, f"Softmax outputs should sum to 1, got {sum_output}" + +# Check that all outputs are positive +assert np.all(output.data > 0), "All softmax outputs should be positive" + +# Check that larger inputs give larger outputs +assert output.data[0, 2] > output.data[0, 1] > output.data[0, 0], "Softmax should preserve order" + +# Test with matrix (multiple rows) +matrix_input = Tensor([[1, 2], [3, 4]]) +matrix_output = softmax(matrix_input) +row_sums = np.sum(matrix_output.data, axis=1) +assert np.allclose(row_sums, 1.0), f"Each row should sum to 1, got {row_sums}" + +print("โœ… Softmax tests passed!") +print(f"โœ… Softmax({input_tensor.data.flatten()}) = {output.data.flatten()}") +print(f"โœ… Sum = {np.sum(output.data):.6f}") + +# %% nbgrader={"grade": true, "grade_id": "test-activation-integration", "locked": true, "points": 20, "schema_version": 3, "solution": false, "task": false} +# Test activation function integration +print("Testing activation function integration...") + +# Create test data +test_data = Tensor([[-2, -1, 0, 1, 2]]) + +# Test all activations +relu = ReLU() +sigmoid = Sigmoid() +tanh = Tanh() +softmax = Softmax() + +# Apply all activations +relu_out = relu(test_data) +sigmoid_out = sigmoid(test_data) +tanh_out = tanh(test_data) +softmax_out = softmax(test_data) + +# Check shapes are preserved +assert relu_out.shape == test_data.shape, "ReLU should preserve shape" +assert sigmoid_out.shape == test_data.shape, "Sigmoid should preserve shape" +assert tanh_out.shape == test_data.shape, "Tanh should preserve shape" +assert softmax_out.shape == test_data.shape, "Softmax should preserve shape" + +# Check ranges (allowing for floating-point precision at extremes) +assert np.all(relu_out.data >= 0), "ReLU outputs should be non-negative" +assert np.all((sigmoid_out.data >= 0) & (sigmoid_out.data <= 1)), "Sigmoid outputs should be in [0,1]" +assert np.all((tanh_out.data >= -1) & (tanh_out.data <= 1)), "Tanh outputs should be in [-1,1]" +assert np.all(softmax_out.data > 0), "Softmax outputs should be positive" + +# Test chaining (composition) +chained = relu(sigmoid(test_data)) +assert chained.shape == test_data.shape, "Chained activations should preserve shape" + +print("โœ… Activation integration tests passed!") +print(f"โœ… All activation functions work correctly") +print(f"โœ… Input: {test_data.data.flatten()}") +print(f"โœ… ReLU: {relu_out.data.flatten()}") +print(f"โœ… Sigmoid: {sigmoid_out.data.flatten()}") +print(f"โœ… Tanh: {tanh_out.data.flatten()}") +print(f"โœ… Softmax: {softmax_out.data.flatten()}") # %% [markdown] """ -## ๐ŸŽจ Comprehensive Activation Function Comparison +## ๐ŸŽฏ Module Summary -Now that we've implemented all four activation functions, let's compare them side by side to understand their differences and use cases. -""" +Congratulations! You've successfully implemented the core activation functions for TinyTorch: -# %% -# Comprehensive comparison of all activation functions -print("๐ŸŽจ Comprehensive Activation Function Comparison") -print("=" * 60) +### What You've Accomplished +โœ… **ReLU**: The workhorse activation for hidden layers +โœ… **Sigmoid**: Smooth probabilistic outputs for binary classification +โœ… **Tanh**: Zero-centered activation for better training dynamics +โœ… **Softmax**: Probability distributions for multi-class classification +โœ… **Integration**: All functions work together and preserve tensor shapes -try: - # Create all activation functions - activations = { - 'ReLU': ReLU(), - 'Sigmoid': Sigmoid(), - 'Tanh': Tanh(), - 'Softmax': Softmax() - } - - # Test with sample data - test_data = Tensor([[-2, -1, 0, 1, 2]]) - - print("๐Ÿ“Š Activation Function Outputs:") - print(f"Input: {test_data.data.flatten()}") - print("-" * 40) - - for name, activation in activations.items(): - try: - result = activation(test_data) - print(f"{name:8}: {result.data.flatten()}") - except Exception as e: - print(f"{name:8}: Error - {e}") - - print("\n๐Ÿ“ˆ Key Properties Summary:") - print("-" * 40) - print("ReLU : Range [0, โˆž), sparse, fast") - print("Sigmoid : Range (0, 1), smooth, probability-like") - print("Tanh : Range (-1, 1), zero-centered, symmetric") - print("Softmax : Probability distribution, sums to 1") - - print("\n๐ŸŽฏ When to Use Each:") - print("-" * 40) - print("ReLU : Hidden layers, CNNs, most deep networks") - print("Sigmoid : Binary classification, gates, probabilities") - print("Tanh : RNNs, when you need zero-centered output") - print("Softmax : Multi-class classification, attention") - - # Show comprehensive visualization if available - if _should_show_plots(): - print("\n๐ŸŽจ Generating comprehensive comparison plot...") - try: - import matplotlib.pyplot as plt - - fig, axes = plt.subplots(2, 2, figsize=(12, 10)) - fig.suptitle('Activation Function Comparison', fontsize=16) - - x_vals = np.linspace(-5, 5, 100) - - # Plot each activation function - for i, (name, activation) in enumerate(list(activations.items())[:3]): # Skip Softmax for now - row, col = i // 2, i % 2 - ax = axes[row, col] - - y_vals = [] - for x in x_vals: - try: - input_tensor = Tensor([[x]]) - output = activation(input_tensor) - y_vals.append(output.data.item()) - except: - y_vals.append(0) - - ax.plot(x_vals, y_vals, 'b-', linewidth=2) - ax.set_title(f'{name} Activation') - ax.grid(True, alpha=0.3) - ax.set_xlabel('Input (x)') - ax.set_ylabel(f'{name}(x)') - - # Special handling for Softmax - ax = axes[1, 1] - sample_inputs = np.array([[1, 2, 3], [0, 0, 0], [-1, 0, 1]]) - softmax_results = [] - - for inp in sample_inputs: - result = softmax(Tensor([inp])) - softmax_results.append(result.data.flatten()) - - x_pos = np.arange(len(sample_inputs)) - width = 0.25 - - for i in range(3): # 3 classes - values = [result[i] for result in softmax_results] - ax.bar(x_pos + i * width, values, width, label=f'Class {i+1}') - - ax.set_title('Softmax Activation') - ax.set_xlabel('Input Examples') - ax.set_ylabel('Probability') - ax.set_xticks(x_pos + width) - ax.set_xticklabels(['[1,2,3]', '[0,0,0]', '[-1,0,1]']) - ax.legend() - - plt.tight_layout() - plt.show() - - except ImportError: - print(" ๐Ÿ“Š Matplotlib not available - skipping comprehensive plot") - except Exception as e: - print(f" โš ๏ธ Comprehensive plot error: {e}") - -except Exception as e: - print(f"โŒ Error in comprehensive comparison: {e}") +### Key Concepts You've Learned +- **Nonlinearity** is essential for neural networks to learn complex patterns +- **ReLU** is simple, fast, and effective for most hidden layers +- **Sigmoid** squashes outputs to (0,1) for probabilistic interpretation +- **Tanh** is zero-centered and often better than sigmoid for hidden layers +- **Softmax** converts logits to probability distributions +- **Numerical stability** is crucial for functions with exponentials -print("\n" + "=" * 60) -print("๐ŸŽ‰ Congratulations! You've implemented all four activation functions!") -print("You now understand the building blocks that make neural networks intelligent.") -print("=" * 60) \ No newline at end of file +### Next Steps +1. **Export your code**: `tito package nbdev --export 02_activations` +2. **Test your implementation**: `tito module test 02_activations` +3. **Use your activations**: + ```python + from tinytorch.core.activations import ReLU, Sigmoid, Tanh, Softmax + from tinytorch.core.tensor import Tensor + + relu = ReLU() + x = Tensor([[-1, 0, 1, 2]]) + y = relu(x) # Your activation in action! + ``` +4. **Move to Module 3**: Start building neural network layers! + +**Ready for the next challenge?** Let's combine tensors and activations to build the fundamental building blocks of neural networks! +""" \ No newline at end of file diff --git a/tinytorch/_modidx.py b/tinytorch/_modidx.py index 1058a423..a44771fa 100644 --- a/tinytorch/_modidx.py +++ b/tinytorch/_modidx.py @@ -5,7 +5,62 @@ d = { 'settings': { 'branch': 'main', 'doc_host': 'https://tinytorch.github.io', 'git_url': 'https://github.com/tinytorch/TinyTorch/', 'lib_path': 'tinytorch'}, - 'syms': { 'tinytorch.core.setup': { 'tinytorch.core.setup.personal_info': ( '00_setup/setup_dev.html#personal_info', + 'syms': { 'tinytorch.core.activations': { 'tinytorch.core.activations.ReLU': ( '02_activations/activations_dev.html#relu', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.ReLU.__call__': ( '02_activations/activations_dev.html#relu.__call__', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.ReLU.forward': ( '02_activations/activations_dev.html#relu.forward', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.Sigmoid': ( '02_activations/activations_dev.html#sigmoid', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.Sigmoid.__call__': ( '02_activations/activations_dev.html#sigmoid.__call__', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.Sigmoid.forward': ( '02_activations/activations_dev.html#sigmoid.forward', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.Softmax': ( '02_activations/activations_dev.html#softmax', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.Softmax.__call__': ( '02_activations/activations_dev.html#softmax.__call__', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.Softmax.forward': ( '02_activations/activations_dev.html#softmax.forward', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.Tanh': ( '02_activations/activations_dev.html#tanh', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.Tanh.__call__': ( '02_activations/activations_dev.html#tanh.__call__', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.Tanh.forward': ( '02_activations/activations_dev.html#tanh.forward', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations._should_show_plots': ( '02_activations/activations_dev.html#_should_show_plots', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.visualize_activation_function': ( '02_activations/activations_dev.html#visualize_activation_function', + 'tinytorch/core/activations.py'), + 'tinytorch.core.activations.visualize_activation_on_data': ( '02_activations/activations_dev.html#visualize_activation_on_data', + 'tinytorch/core/activations.py')}, + 'tinytorch.core.setup': { 'tinytorch.core.setup.personal_info': ( '00_setup/setup_dev.html#personal_info', 'tinytorch/core/setup.py'), 'tinytorch.core.setup.system_info': ( '00_setup/setup_dev.html#system_info', - 'tinytorch/core/setup.py')}}} + 'tinytorch/core/setup.py')}, + 'tinytorch.core.tensor': { 'tinytorch.core.tensor.Tensor': ('01_tensor/tensor_dev.html#tensor', 'tinytorch/core/tensor.py'), + 'tinytorch.core.tensor.Tensor.__add__': ( '01_tensor/tensor_dev.html#tensor.__add__', + 'tinytorch/core/tensor.py'), + 'tinytorch.core.tensor.Tensor.__init__': ( '01_tensor/tensor_dev.html#tensor.__init__', + 'tinytorch/core/tensor.py'), + 'tinytorch.core.tensor.Tensor.__mul__': ( '01_tensor/tensor_dev.html#tensor.__mul__', + 'tinytorch/core/tensor.py'), + 'tinytorch.core.tensor.Tensor.__repr__': ( '01_tensor/tensor_dev.html#tensor.__repr__', + 'tinytorch/core/tensor.py'), + 'tinytorch.core.tensor.Tensor.__sub__': ( '01_tensor/tensor_dev.html#tensor.__sub__', + 'tinytorch/core/tensor.py'), + 'tinytorch.core.tensor.Tensor.__truediv__': ( '01_tensor/tensor_dev.html#tensor.__truediv__', + 'tinytorch/core/tensor.py'), + 'tinytorch.core.tensor.Tensor.add': ( '01_tensor/tensor_dev.html#tensor.add', + 'tinytorch/core/tensor.py'), + 'tinytorch.core.tensor.Tensor.data': ( '01_tensor/tensor_dev.html#tensor.data', + 'tinytorch/core/tensor.py'), + 'tinytorch.core.tensor.Tensor.dtype': ( '01_tensor/tensor_dev.html#tensor.dtype', + 'tinytorch/core/tensor.py'), + 'tinytorch.core.tensor.Tensor.multiply': ( '01_tensor/tensor_dev.html#tensor.multiply', + 'tinytorch/core/tensor.py'), + 'tinytorch.core.tensor.Tensor.shape': ( '01_tensor/tensor_dev.html#tensor.shape', + 'tinytorch/core/tensor.py'), + 'tinytorch.core.tensor.Tensor.size': ( '01_tensor/tensor_dev.html#tensor.size', + 'tinytorch/core/tensor.py')}}} diff --git a/tinytorch/core/activations.py b/tinytorch/core/activations.py new file mode 100644 index 00000000..39604bdf --- /dev/null +++ b/tinytorch/core/activations.py @@ -0,0 +1,246 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/02_activations/activations_dev.ipynb. + +# %% auto 0 +__all__ = ['visualize_activation_function', 'visualize_activation_on_data', 'ReLU', 'Sigmoid', 'Tanh', 'Softmax'] + +# %% ../../modules/source/02_activations/activations_dev.ipynb 1 +import math +import numpy as np +import matplotlib.pyplot as plt +import os +import sys +from typing import Union, List + +# Import our Tensor class - try from package first, then from local module +try: + from tinytorch.core.tensor import Tensor +except ImportError: + # For development, import from local tensor module + sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor')) + from tensor_dev import Tensor + +# %% ../../modules/source/02_activations/activations_dev.ipynb 2 +def _should_show_plots(): + """Check if we should show plots (disable during testing)""" + # Check multiple conditions that indicate we're in test mode + is_pytest = ( + 'pytest' in sys.modules or + 'test' in sys.argv or + os.environ.get('PYTEST_CURRENT_TEST') is not None or + any('test' in arg for arg in sys.argv) or + any('pytest' in arg for arg in sys.argv) + ) + + # Show plots in development mode (when not in test mode) + return not is_pytest + +# %% ../../modules/source/02_activations/activations_dev.ipynb 3 +def visualize_activation_function(activation_fn, name: str, x_range: tuple = (-5, 5), num_points: int = 100): + """Visualize an activation function's behavior""" + if not _should_show_plots(): + return + + try: + + # Generate input values + x_vals = np.linspace(x_range[0], x_range[1], num_points) + + # Apply activation function + y_vals = [] + for x in x_vals: + input_tensor = Tensor([[x]]) + output = activation_fn(input_tensor) + y_vals.append(output.data.item()) + + # Create plot + plt.figure(figsize=(10, 6)) + plt.plot(x_vals, y_vals, 'b-', linewidth=2, label=f'{name} Activation') + plt.grid(True, alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel(f'{name}(x)') + plt.title(f'{name} Activation Function') + plt.legend() + plt.show() + + except ImportError: + print(" ๐Ÿ“Š Matplotlib not available - skipping visualization") + except Exception as e: + print(f" โš ๏ธ Visualization error: {e}") + +def visualize_activation_on_data(activation_fn, name: str, data: Tensor): + """Show activation function applied to sample data""" + if not _should_show_plots(): + return + + try: + output = activation_fn(data) + print(f" ๐Ÿ“Š {name} Example:") + print(f" Input: {data.data.flatten()}") + print(f" Output: {output.data.flatten()}") + print(f" Range: [{output.data.min():.3f}, {output.data.max():.3f}]") + + except Exception as e: + print(f" โš ๏ธ Data visualization error: {e}") + +# %% ../../modules/source/02_activations/activations_dev.ipynb 6 +class ReLU: + """ + ReLU Activation Function: f(x) = max(0, x) + + The most popular activation function in deep learning. + Simple, fast, and effective for most applications. + """ + + def forward(self, x: Tensor) -> Tensor: + """ + Apply ReLU activation: f(x) = max(0, x) + + TODO: Implement ReLU activation + + APPROACH: + 1. For each element in the input tensor, apply max(0, element) + 2. Return a new Tensor with the results + + EXAMPLE: + Input: Tensor([[-1, 0, 1, 2, -3]]) + Expected: Tensor([[0, 0, 1, 2, 0]]) + + HINTS: + - Use np.maximum(0, x.data) for element-wise max + - Remember to return a new Tensor object + - The shape should remain the same as input + """ + ### BEGIN SOLUTION + result = np.maximum(0, x.data) + return Tensor(result) + ### END SOLUTION + + def __call__(self, x: Tensor) -> Tensor: + """Make the class callable: relu(x) instead of relu.forward(x)""" + return self.forward(x) + +# %% ../../modules/source/02_activations/activations_dev.ipynb 8 +class Sigmoid: + """ + Sigmoid Activation Function: f(x) = 1 / (1 + e^(-x)) + + Smooth S-shaped function that squashes inputs to (0, 1). + Useful for binary classification and probabilistic outputs. + """ + + def forward(self, x: Tensor) -> Tensor: + """ + Apply Sigmoid activation: f(x) = 1 / (1 + e^(-x)) + + TODO: Implement Sigmoid activation with numerical stability + + APPROACH: + 1. Clip input values to prevent overflow (e.g., between -500 and 500) + 2. Apply the sigmoid formula: 1 / (1 + exp(-x)) + 3. Return a new Tensor with the results + + EXAMPLE: + Input: Tensor([[-2, 0, 2]]) + Expected: Tensor([[0.119, 0.5, 0.881]]) (approximately) + + HINTS: + - Use np.clip(x.data, -500, 500) for numerical stability + - Use np.exp() for the exponential function + - Be careful with very large/small inputs to avoid overflow + """ + ### BEGIN SOLUTION + # Clip for numerical stability + clipped = np.clip(x.data, -500, 500) + result = 1 / (1 + np.exp(-clipped)) + return Tensor(result) + ### END SOLUTION + + def __call__(self, x: Tensor) -> Tensor: + """Make the class callable: sigmoid(x) instead of sigmoid.forward(x)""" + return self.forward(x) + +# %% ../../modules/source/02_activations/activations_dev.ipynb 10 +class Tanh: + """ + Tanh Activation Function: f(x) = tanh(x) + + Zero-centered S-shaped function that squashes inputs to (-1, 1). + Better than sigmoid for hidden layers due to zero-centered outputs. + """ + + def forward(self, x: Tensor) -> Tensor: + """ + Apply Tanh activation: f(x) = tanh(x) + + TODO: Implement Tanh activation + + APPROACH: + 1. Use NumPy's tanh function for numerical stability + 2. Apply to the tensor data + 3. Return a new Tensor with the results + + EXAMPLE: + Input: Tensor([[-2, 0, 2]]) + Expected: Tensor([[-0.964, 0.0, 0.964]]) (approximately) + + HINTS: + - Use np.tanh(x.data) - NumPy handles the math + - Much simpler than implementing the formula manually + - NumPy's tanh is numerically stable + """ + ### BEGIN SOLUTION + result = np.tanh(x.data) + return Tensor(result) + ### END SOLUTION + + def __call__(self, x: Tensor) -> Tensor: + """Make the class callable: tanh(x) instead of tanh.forward(x)""" + return self.forward(x) + +# %% ../../modules/source/02_activations/activations_dev.ipynb 12 +class Softmax: + """ + Softmax Activation Function: f(x_i) = e^(x_i) / ฮฃ(e^(x_j)) + + Converts a vector of numbers into a probability distribution. + Essential for multi-class classification and attention mechanisms. + """ + + def forward(self, x: Tensor) -> Tensor: + """ + Apply Softmax activation: f(x_i) = e^(x_i) / ฮฃ(e^(x_j)) + + TODO: Implement Softmax activation with numerical stability + + APPROACH: + 1. Subtract max value from inputs for numerical stability + 2. Compute exponentials: e^(x_i - max) + 3. Divide by sum of exponentials + 4. Return a new Tensor with the results + + EXAMPLE: + Input: Tensor([[1, 2, 3]]) + Expected: Tensor([[0.09, 0.24, 0.67]]) (approximately, sums to 1) + + HINTS: + - Use np.max(x.data, axis=-1, keepdims=True) for stability + - Use np.exp() for exponentials + - Use np.sum() for the denominator + - Make sure the result sums to 1 along the last axis + """ + ### BEGIN SOLUTION + # Subtract max for numerical stability + x_max = np.max(x.data, axis=-1, keepdims=True) + x_shifted = x.data - x_max + + # Compute softmax + exp_x = np.exp(x_shifted) + sum_exp = np.sum(exp_x, axis=-1, keepdims=True) + result = exp_x / sum_exp + + return Tensor(result) + ### END SOLUTION + + def __call__(self, x: Tensor) -> Tensor: + """Make the class callable: softmax(x) instead of softmax.forward(x)""" + return self.forward(x) diff --git a/tinytorch/core/tensor.py b/tinytorch/core/tensor.py new file mode 100644 index 00000000..fe51c114 --- /dev/null +++ b/tinytorch/core/tensor.py @@ -0,0 +1,297 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/01_tensor/tensor_dev.ipynb. + +# %% auto 0 +__all__ = ['Tensor'] + +# %% ../../modules/source/01_tensor/tensor_dev.ipynb 1 +import numpy as np +import sys +from typing import Union, List, Tuple, Optional, Any + +# %% ../../modules/source/01_tensor/tensor_dev.ipynb 7 +class Tensor: + """ + TinyTorch Tensor: N-dimensional array with ML operations. + + The fundamental data structure for all TinyTorch operations. + Wraps NumPy arrays with ML-specific functionality. + """ + + def __init__(self, data: Union[int, float, List, np.ndarray], dtype: Optional[str] = None): + """ + Create a new tensor from data. + + Args: + data: Input data (scalar, list, or numpy array) + dtype: Data type ('float32', 'int32', etc.). Defaults to auto-detect. + + TODO: Implement tensor creation with proper type handling. + + STEP-BY-STEP: + 1. Check if data is a scalar (int/float) - convert to numpy array + 2. Check if data is a list - convert to numpy array + 3. Check if data is already a numpy array - use as-is + 4. Apply dtype conversion if specified + 5. Store the result in self._data + + EXAMPLE: + Tensor(5) โ†’ stores np.array(5) + Tensor([1, 2, 3]) โ†’ stores np.array([1, 2, 3]) + Tensor(np.array([1, 2, 3])) โ†’ stores the array directly + + HINTS: + - Use isinstance() to check data types + - Use np.array() for conversion + - Handle dtype parameter for type conversion + - Store the array in self._data + """ + ### BEGIN SOLUTION + # Convert input to numpy array + if isinstance(data, (int, float, np.number)): + # Handle Python and NumPy scalars + if dtype is None: + # Auto-detect type: int for integers, float32 for floats + if isinstance(data, int) or (isinstance(data, np.number) and np.issubdtype(type(data), np.integer)): + dtype = 'int32' + else: + dtype = 'float32' + self._data = np.array(data, dtype=dtype) + elif isinstance(data, list): + # Let NumPy auto-detect type, then convert if needed + temp_array = np.array(data) + if dtype is None: + # Use NumPy's auto-detected type, but prefer float32 for floats + if temp_array.dtype == np.float64: + dtype = 'float32' + else: + dtype = str(temp_array.dtype) + self._data = np.array(data, dtype=dtype) + elif isinstance(data, np.ndarray): + # Already a numpy array + if dtype is None: + # Keep existing dtype, but prefer float32 for float64 + if data.dtype == np.float64: + dtype = 'float32' + else: + dtype = str(data.dtype) + self._data = data.astype(dtype) if dtype != data.dtype else data.copy() + else: + # Try to convert unknown types + self._data = np.array(data, dtype=dtype) + ### END SOLUTION + + @property + def data(self) -> np.ndarray: + """ + Access underlying numpy array. + + TODO: Return the stored numpy array. + + HINT: Return self._data (the array you stored in __init__) + """ + ### BEGIN SOLUTION + return self._data + ### END SOLUTION + + @property + def shape(self) -> Tuple[int, ...]: + """ + Get tensor shape. + + TODO: Return the shape of the stored numpy array. + + HINT: Use .shape attribute of the numpy array + EXAMPLE: Tensor([1, 2, 3]).shape should return (3,) + """ + ### BEGIN SOLUTION + return self._data.shape + ### END SOLUTION + + @property + def size(self) -> int: + """ + Get total number of elements. + + TODO: Return the total number of elements in the tensor. + + HINT: Use .size attribute of the numpy array + EXAMPLE: Tensor([1, 2, 3]).size should return 3 + """ + ### BEGIN SOLUTION + return self._data.size + ### END SOLUTION + + @property + def dtype(self) -> np.dtype: + """ + Get data type as numpy dtype. + + TODO: Return the data type of the stored numpy array. + + HINT: Use .dtype attribute of the numpy array + EXAMPLE: Tensor([1, 2, 3]).dtype should return dtype('int32') + """ + ### BEGIN SOLUTION + return self._data.dtype + ### END SOLUTION + + def __repr__(self) -> str: + """ + String representation. + + TODO: Create a clear string representation of the tensor. + + APPROACH: + 1. Convert the numpy array to a list for readable output + 2. Include the shape and dtype information + 3. Format: "Tensor([data], shape=shape, dtype=dtype)" + + EXAMPLE: + Tensor([1, 2, 3]) โ†’ "Tensor([1, 2, 3], shape=(3,), dtype=int32)" + + HINTS: + - Use .tolist() to convert numpy array to list + - Include shape and dtype information + - Keep format consistent and readable + """ + ### BEGIN SOLUTION + return f"Tensor({self._data.tolist()}, shape={self.shape}, dtype={self.dtype})" + ### END SOLUTION + + def add(self, other: 'Tensor') -> 'Tensor': + """ + Add two tensors element-wise. + + TODO: Implement tensor addition. + + APPROACH: + 1. Add the numpy arrays using + + 2. Return a new Tensor with the result + 3. Handle broadcasting automatically + + EXAMPLE: + Tensor([1, 2]) + Tensor([3, 4]) โ†’ Tensor([4, 6]) + + HINTS: + - Use self._data + other._data + - Return Tensor(result) + - NumPy handles broadcasting automatically + """ + ### BEGIN SOLUTION + result = self._data + other._data + return Tensor(result) + ### END SOLUTION + + def multiply(self, other: 'Tensor') -> 'Tensor': + """ + Multiply two tensors element-wise. + + TODO: Implement tensor multiplication. + + APPROACH: + 1. Multiply the numpy arrays using * + 2. Return a new Tensor with the result + 3. Handle broadcasting automatically + + EXAMPLE: + Tensor([1, 2]) * Tensor([3, 4]) โ†’ Tensor([3, 8]) + + HINTS: + - Use self._data * other._data + - Return Tensor(result) + - This is element-wise, not matrix multiplication + """ + ### BEGIN SOLUTION + result = self._data * other._data + return Tensor(result) + ### END SOLUTION + + def __add__(self, other: Union['Tensor', int, float]) -> 'Tensor': + """ + Addition operator: tensor + other + + TODO: Implement + operator for tensors. + + APPROACH: + 1. If other is a Tensor, use tensor addition + 2. If other is a scalar, convert to Tensor first + 3. Return the result + + EXAMPLE: + Tensor([1, 2]) + Tensor([3, 4]) โ†’ Tensor([4, 6]) + Tensor([1, 2]) + 5 โ†’ Tensor([6, 7]) + """ + ### BEGIN SOLUTION + if isinstance(other, Tensor): + return self.add(other) + else: + return self.add(Tensor(other)) + ### END SOLUTION + + def __mul__(self, other: Union['Tensor', int, float]) -> 'Tensor': + """ + Multiplication operator: tensor * other + + TODO: Implement * operator for tensors. + + APPROACH: + 1. If other is a Tensor, use tensor multiplication + 2. If other is a scalar, convert to Tensor first + 3. Return the result + + EXAMPLE: + Tensor([1, 2]) * Tensor([3, 4]) โ†’ Tensor([3, 8]) + Tensor([1, 2]) * 3 โ†’ Tensor([3, 6]) + """ + ### BEGIN SOLUTION + if isinstance(other, Tensor): + return self.multiply(other) + else: + return self.multiply(Tensor(other)) + ### END SOLUTION + + def __sub__(self, other: Union['Tensor', int, float]) -> 'Tensor': + """ + Subtraction operator: tensor - other + + TODO: Implement - operator for tensors. + + APPROACH: + 1. Convert other to Tensor if needed + 2. Subtract using numpy arrays + 3. Return new Tensor with result + + EXAMPLE: + Tensor([5, 6]) - Tensor([1, 2]) โ†’ Tensor([4, 4]) + Tensor([5, 6]) - 1 โ†’ Tensor([4, 5]) + """ + ### BEGIN SOLUTION + if isinstance(other, Tensor): + result = self._data - other._data + else: + result = self._data - other + return Tensor(result) + ### END SOLUTION + + def __truediv__(self, other: Union['Tensor', int, float]) -> 'Tensor': + """ + Division operator: tensor / other + + TODO: Implement / operator for tensors. + + APPROACH: + 1. Convert other to Tensor if needed + 2. Divide using numpy arrays + 3. Return new Tensor with result + + EXAMPLE: + Tensor([6, 8]) / Tensor([2, 4]) โ†’ Tensor([3, 2]) + Tensor([6, 8]) / 2 โ†’ Tensor([3, 4]) + """ + ### BEGIN SOLUTION + if isinstance(other, Tensor): + result = self._data / other._data + else: + result = self._data / other + return Tensor(result) + ### END SOLUTION diff --git a/tito/commands/export.py b/tito/commands/export.py index f6a6d991..aa55a68f 100644 --- a/tito/commands/export.py +++ b/tito/commands/export.py @@ -113,6 +113,45 @@ class ExportCommand(BaseCommand): console.print(Panel(exports_text, title="Export Summary", border_style="bright_green")) + def _convert_py_to_notebook(self, module_path: Path) -> bool: + """Convert .py dev file to .ipynb using Jupytext.""" + module_name = module_path.name + short_name = module_name[3:] if module_name.startswith(tuple(f"{i:02d}_" for i in range(100))) else module_name + + dev_file = module_path / f"{short_name}_dev.py" + if not dev_file.exists(): + return False + + notebook_file = module_path / f"{short_name}_dev.ipynb" + + # Check if notebook is newer than .py file + if notebook_file.exists(): + py_mtime = dev_file.stat().st_mtime + nb_mtime = notebook_file.stat().st_mtime + if nb_mtime > py_mtime: + return True # Notebook is up to date + + try: + result = subprocess.run([ + "jupytext", "--to", "ipynb", str(dev_file) + ], capture_output=True, text=True, cwd=module_path) + + return result.returncode == 0 + except FileNotFoundError: + return False + + def _convert_all_modules(self) -> list: + """Convert all modules' .py files to .ipynb files.""" + modules = self._discover_modules() + converted = [] + + for module_name in modules: + module_path = Path(f"modules/source/{module_name}") + if self._convert_py_to_notebook(module_path): + converted.append(module_name) + + return converted + def run(self, args: Namespace) -> int: console = self.console @@ -136,17 +175,35 @@ class ExportCommand(BaseCommand): return 1 console.print(Panel(f"๐Ÿ”„ Exporting Module: {args.module}", - title="nbdev Export", border_style="bright_cyan")) + title="Complete Export Workflow", border_style="bright_cyan")) + + # Step 1: Convert .py to .ipynb + console.print(f"๐Ÿ“ Converting {args.module} Python file to notebook...") + if not self._convert_py_to_notebook(module_path): + console.print(Panel("[red]โŒ Failed to convert .py file to notebook. Is jupytext installed?[/red]", + title="Conversion Error", border_style="red")) + return 1 + console.print(f"๐Ÿ”„ Exporting {args.module} notebook to tinytorch package...") - # Use nbdev_export with --path for specific module + # Step 2: Use nbdev_export with --path for specific module cmd = ["nbdev_export", "--path", str(module_path)] elif hasattr(args, 'all') and args.all: - console.print(Panel("๐Ÿ”„ Exporting All Notebooks to Package", - title="nbdev Export", border_style="bright_cyan")) + console.print(Panel("๐Ÿ”„ Exporting All Modules to Package", + title="Complete Export Workflow", border_style="bright_cyan")) + + # Step 1: Convert all .py files to .ipynb + console.print("๐Ÿ“ Converting all Python files to notebooks...") + converted = self._convert_all_modules() + if not converted: + console.print(Panel("[red]โŒ No modules converted. Check if jupytext is installed and .py files exist.[/red]", + title="Conversion Error", border_style="red")) + return 1 + + console.print(f"โœ… Converted {len(converted)} modules: {', '.join(converted)}") console.print("๐Ÿ”„ Exporting all notebook code to tinytorch package...") - # Use nbdev_export for all modules + # Step 2: Use nbdev_export for all modules cmd = ["nbdev_export"] else: console.print(Panel("[red]โŒ Must specify either a module name or --all[/red]", diff --git a/tito/main.py b/tito/main.py index 1d9230a1..aa3e10d1 100644 --- a/tito/main.py +++ b/tito/main.py @@ -59,6 +59,8 @@ class TinyTorchCLI: 'module': ModuleCommand, 'package': PackageCommand, 'nbgrader': NBGraderCommand, + # Convenience commands + 'export': ExportCommand, } def create_parser(self) -> argparse.ArgumentParser: @@ -77,7 +79,8 @@ Command Groups: Examples: tito system info Show system information tito module status --metadata Module status with metadata - tito package export Export notebooks to package + tito export 01_tensor Export specific module to package + tito export --all Export all modules to package tito nbgrader generate setup Generate assignment from setup module """ ) @@ -174,7 +177,8 @@ Examples: "[bold]Quick Start:[/bold]\n" " [dim]tito system info[/dim] - Show system information\n" " [dim]tito module status --metadata[/dim] - Module status with metadata\n" - " [dim]tito package export[/dim] - Export notebooks to package\n" + " [dim]tito export 01_tensor[/dim] - Export specific module to package\n" + " [dim]tito export --all[/dim] - Export all modules to package\n" " [dim]tito nbgrader generate setup[/dim] - Generate assignment from setup module\n\n" "[bold]Get Help:[/bold]\n" " [dim]tito system[/dim] - Show system subcommands\n"