diff --git a/bin/tito.py b/bin/tito.py index 8f5e0a77..a3453d09 100755 --- a/bin/tito.py +++ b/bin/tito.py @@ -343,7 +343,7 @@ def cmd_info(args): def cmd_test(args): """Run tests for a specific module.""" - valid_modules = ["setup", "tensor", "layers", "cnn", "data", "training", + valid_modules = ["setup", "tensor", "activations", "layers", "cnn", "data", "training", "profiling", "compression", "kernels", "benchmarking", "mlops"] if args.all: diff --git a/modules/activations/README.md b/modules/activations/README.md new file mode 100644 index 00000000..9f37c496 --- /dev/null +++ b/modules/activations/README.md @@ -0,0 +1,237 @@ +# πŸ”₯ TinyTorch Activations Module + +Welcome to the **Activations** module! This is where you'll implement the mathematical functions that give neural networks their power to learn complex patterns. + +## 🎯 Learning Objectives + +By the end of this module, you will: +1. **Understand** why activation functions are essential for neural networks +2. **Implement** the three most important activation functions: ReLU, Sigmoid, and Tanh +3. **Test** your functions with various inputs to understand their behavior +4. **Grasp** the mathematical properties that make each function useful + +## 🧠 Why This Module Matters + +**Without activation functions, neural networks are just linear transformations!** + +``` +Linear β†’ Linear β†’ Linear = Still just Linear +Linear β†’ Activation β†’ Linear = Can learn complex patterns! +``` + +This module teaches you the mathematical foundations that make deep learning possible. + +## πŸ“š What You'll Build + +### 1. **ReLU** (Rectified Linear Unit) +- **Formula**: `f(x) = max(0, x)` +- **Properties**: Simple, sparse, unbounded +- **Use case**: Hidden layers (most common) + +### 2. **Sigmoid** +- **Formula**: `f(x) = 1 / (1 + e^(-x))` +- **Properties**: Bounded to (0,1), smooth, probabilistic +- **Use case**: Binary classification, gates + +### 3. **Tanh** (Hyperbolic Tangent) +- **Formula**: `f(x) = tanh(x)` +- **Properties**: Bounded to (-1,1), zero-centered, smooth +- **Use case**: Hidden layers, RNNs + +## πŸš€ Getting Started + +### Development Workflow + +1. **Open the development file**: + ```bash + python bin/tito.py jupyter + # Then open modules/activations/activations_dev.py + ``` + +2. **Implement the functions**: + - Start with ReLU (simplest) + - Move to Sigmoid (numerical stability challenge) + - Finish with Tanh (symmetry properties) + +3. **Visualize your functions**: + - Each function has plotting sections + - See how your implementation transforms inputs + - Compare all functions side-by-side + +4. **Test as you go**: + ```bash + python bin/tito.py test --module activations + ``` + +5. **Export to package**: + ```bash + python bin/tito.py sync + ``` + +### πŸ“Š Visual Learning Features + +This module includes comprehensive plotting sections to help you understand: + +- **Individual Function Plots**: See each activation function's curve +- **Implementation Comparison**: Your implementation vs ideal side-by-side +- **Mathematical Explanations**: Visual breakdown of function properties +- **Error Analysis**: Quantitative feedback on implementation accuracy +- **Comprehensive Comparison**: All functions analyzed together + +**Enhanced Features**: +- **4-Panel Plots**: Implementation vs ideal, mathematical definition, properties, error analysis +- **Real-time Feedback**: Immediate accuracy scores with color-coded status +- **Mathematical Insights**: Detailed explanations of function properties +- **Numerical Stability Testing**: Verification with extreme values +- **Property Verification**: Symmetry, monotonicity, and zero-centering tests + +**Why enhanced plots matter**: +- **Visual Debugging**: See exactly where your implementation differs +- **Quantitative Feedback**: Get precise error measurements +- **Mathematical Understanding**: Connect formulas to visual behavior +- **Implementation Confidence**: Know immediately if your code is correct +- **Learning Reinforcement**: Multiple visual perspectives of the same concept + +### Implementation Tips + +#### ReLU Implementation +```python +def forward(self, x: Tensor) -> Tensor: + return Tensor(np.maximum(0, x.data)) +``` + +#### Sigmoid Implementation (Numerical Stability) +```python +def forward(self, x: Tensor) -> Tensor: + # For x >= 0: sigmoid(x) = 1 / (1 + exp(-x)) + # For x < 0: sigmoid(x) = exp(x) / (1 + exp(x)) + x_data = x.data + result = np.zeros_like(x_data) + + positive_mask = x_data >= 0 + result[positive_mask] = 1.0 / (1.0 + np.exp(-x_data[positive_mask])) + result[~positive_mask] = np.exp(x_data[~positive_mask]) / (1.0 + np.exp(x_data[~positive_mask])) + + return Tensor(result) +``` + +#### Tanh Implementation +```python +def forward(self, x: Tensor) -> Tensor: + return Tensor(np.tanh(x.data)) +``` + +## πŸ§ͺ Testing Your Implementation + +### Unit Tests +```bash +python bin/tito.py test --module activations +``` + +**Test Coverage**: +- βœ… Mathematical correctness +- βœ… Numerical stability +- βœ… Shape preservation +- βœ… Edge cases +- βœ… Function properties + +### Manual Testing +```python +# Test all activations +from tinytorch.core.tensor import Tensor +from modules.activations.activations_dev import ReLU, Sigmoid, Tanh + +x = Tensor([[-2.0, -1.0, 0.0, 1.0, 2.0]]) + +relu = ReLU() +sigmoid = Sigmoid() +tanh = Tanh() + +print("Input:", x.data) +print("ReLU:", relu(x).data) +print("Sigmoid:", sigmoid(x).data) +print("Tanh:", tanh(x).data) +``` + +## πŸ“Š Understanding Function Properties + +### Range Comparison +| Function | Input Range | Output Range | Zero Point | +|----------|-------------|--------------|------------| +| ReLU | (-∞, ∞) | [0, ∞) | f(0) = 0 | +| Sigmoid | (-∞, ∞) | (0, 1) | f(0) = 0.5 | +| Tanh | (-∞, ∞) | (-1, 1) | f(0) = 0 | + +### Key Properties +- **ReLU**: Sparse (zeros out negatives), unbounded, simple +- **Sigmoid**: Probabilistic (0-1 range), smooth, saturating +- **Tanh**: Zero-centered, symmetric, stronger gradients than sigmoid + +## πŸ”§ Integration with TinyTorch + +After implementation, your activations will be available as: + +```python +from tinytorch.core.activations import ReLU, Sigmoid, Tanh + +# Use in neural networks +relu = ReLU() +output = relu(input_tensor) +``` + +## 🎯 Common Issues & Solutions + +### Issue 1: Sigmoid Overflow +**Problem**: `exp()` overflow with large inputs +**Solution**: Use numerically stable implementation (see code above) + +### Issue 2: Wrong Output Range +**Problem**: Sigmoid/Tanh outputs outside expected range +**Solution**: Check your mathematical implementation + +### Issue 3: Shape Mismatch +**Problem**: Output shape differs from input shape +**Solution**: Ensure element-wise operations preserve shape + +### Issue 4: Import Errors +**Problem**: Cannot import after implementation +**Solution**: Run `python bin/tito.py sync` to export to package + +## πŸ“ˆ Performance Considerations + +- **ReLU**: Fastest (simple max operation) +- **Sigmoid**: Moderate (exponential computation) +- **Tanh**: Moderate (hyperbolic function) + +All implementations use NumPy for vectorized operations. + +## πŸš€ What's Next + +After mastering activations, you'll use them in: +1. **Layers Module**: Building neural network layers +2. **Loss Functions**: Computing training objectives +3. **Advanced Architectures**: CNNs, RNNs, and more + +These functions are the mathematical foundation for everything that follows! + +## πŸ“š Further Reading + +**Mathematical Background**: +- [Activation Functions in Neural Networks](https://en.wikipedia.org/wiki/Activation_function) +- [Deep Learning Book - Chapter 6](http://www.deeplearningbook.org/) + +**Advanced Topics**: +- ReLU variants (Leaky ReLU, ELU, Swish) +- Activation function choice and impact +- Gradient flow and vanishing gradients + +## πŸŽ‰ Success Criteria + +You've mastered this module when: +- [ ] All tests pass (`python bin/tito.py test --module activations`) +- [ ] You understand why each function is useful +- [ ] You can explain the mathematical properties +- [ ] You can use activations in neural networks +- [ ] You appreciate the importance of nonlinearity + +**Great work! You've built the mathematical foundation of neural networks!** πŸŽ‰ \ No newline at end of file diff --git a/modules/activations/activations_dev.py b/modules/activations/activations_dev.py new file mode 100644 index 00000000..582933ff --- /dev/null +++ b/modules/activations/activations_dev.py @@ -0,0 +1,1162 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.17.1 +# --- + +# %% [markdown] +""" +# πŸ”₯ TinyTorch Activations Module + +Welcome to the **Activations** module! This is where you'll implement the mathematical functions that give neural networks their power. + +## 🎯 Learning Objectives + +By the end of this module, you will: +1. **Understand** why activation functions are essential for neural networks +2. **Implement** the three most important activation functions: ReLU, Sigmoid, and Tanh +3. **Test** your functions with various inputs to understand their behavior +4. **Use** these functions as building blocks for neural networks + +## 🧠 Why Activation Functions Matter + +**Without activation functions, neural networks are just linear transformations!** + +``` +Linear β†’ Linear β†’ Linear = Still just Linear +Linear β†’ Activation β†’ Linear = Can learn complex patterns! +``` + +**Key insight**: Activation functions add **nonlinearity**, allowing networks to learn complex patterns that linear functions cannot capture. + +## πŸ“š What You'll Build + +- **ReLU**: `f(x) = max(0, x)` - The workhorse of deep learning +- **Sigmoid**: `f(x) = 1 / (1 + e^(-x))` - Squashes to (0, 1) +- **Tanh**: `f(x) = tanh(x)` - Squashes to (-1, 1) + +Each function serves different purposes and has different mathematical properties. + +--- + +Let's start building! πŸš€ +""" + +# %% +#| default_exp core.activations + +# Standard library imports +import math +import numpy as np +import matplotlib.pyplot as plt +import os +import sys + +# TinyTorch imports +from tinytorch.core.tensor import Tensor + +# %% +# Helper function to detect if we're in a testing environment +def _should_show_plots(): + """ + Determine if we should show plots based on the execution context. + + Returns False if: + - Running in pytest (detected by 'pytest' in sys.modules) + - Running in test environment (detected by environment variables) + - Running from command line test runner + + Returns True if: + - Running in Jupyter notebook + - Running interactively in Python + """ + # Check if we're running in pytest + if 'pytest' in sys.modules: + return False + + # Check if we're in a test environment + if os.environ.get('PYTEST_CURRENT_TEST'): + return False + + # Check if we're running from a test file (more specific check) + if any(arg.endswith('.py') and 'test_' in os.path.basename(arg) and 'tests/' in arg for arg in sys.argv): + return False + + # Check if we're running from the tito CLI test command + if len(sys.argv) > 0 and 'tito.py' in sys.argv[0] and 'test' in sys.argv: + return False + + # Default to showing plots (notebook/interactive environment) + return True + +# %% [markdown] +""" +## Step 1: ReLU Activation Function + +**ReLU** (Rectified Linear Unit) is the most popular activation function in deep learning. + +**Formula**: `f(x) = max(0, x)` + +**Properties**: +- **Simple**: Easy to compute and understand +- **Sparse**: Outputs exactly zero for negative inputs +- **Unbounded**: No upper limit on positive outputs +- **Non-saturating**: Doesn't suffer from vanishing gradients + +**When to use**: Almost everywhere! It's the default choice for hidden layers. +""" + +# %% +#| export +class ReLU: + """ + ReLU Activation: f(x) = max(0, x) + + The most popular activation function in deep learning. + Simple, effective, and computationally efficient. + + TODO: Implement ReLU activation function. + """ + + def forward(self, x: Tensor) -> Tensor: + """ + Apply ReLU: f(x) = max(0, x) + + Args: + x: Input tensor + + Returns: + Output tensor with ReLU applied element-wise + + TODO: Implement element-wise max(0, x) operation + Hint: Use np.maximum(0, x.data) + """ + raise NotImplementedError("Student implementation required") + + def __call__(self, x: Tensor) -> Tensor: + """Make activation callable: relu(x) same as relu.forward(x)""" + return self.forward(x) + +# %% +#| hide +#| export +class ReLU: + """ReLU Activation: f(x) = max(0, x)""" + + def forward(self, x: Tensor) -> Tensor: + """Apply ReLU: f(x) = max(0, x)""" + return Tensor(np.maximum(0, x.data)) + + def __call__(self, x: Tensor) -> Tensor: + return self.forward(x) + +# %% [markdown] +""" +### πŸ§ͺ Test Your ReLU Function + +Once you implement ReLU above, run this cell to test it: +""" + +# %% +# Test ReLU function +try: + print("=== Testing ReLU Function ===") + + # Test data: mix of positive, negative, and zero + x = Tensor([[-3.0, -1.0, 0.0, 1.0, 3.0]]) + print(f"Input: {x.data}") + + # Test ReLU + relu = ReLU() + y = relu(x) + print(f"ReLU output: {y.data}") + print(f"Expected: [[0. 0. 0. 1. 3.]]") + + # Test with different shapes + x_2d = Tensor([[-2.0, 1.0], [0.5, -0.5]]) + y_2d = relu(x_2d) + print(f"\n2D Input: {x_2d.data}") + print(f"2D ReLU output: {y_2d.data}") + + print("βœ… ReLU working!") + +except Exception as e: + print(f"❌ Error: {e}") + print("Make sure to implement the ReLU function above!") + +# %% [markdown] +""" +### πŸ“Š Visualize ReLU Function + +Let's plot the ReLU function to see how it transforms inputs: +""" + +# %% +# Plot ReLU function +try: + print("=== Plotting ReLU Function ===") + + # Create a range of input values + x_range = np.linspace(-5, 5, 100) + x_tensor = Tensor([x_range]) + + # Apply ReLU (student implementation) + relu = ReLU() + y_tensor = relu(x_tensor) + y_range = y_tensor.data[0] + + # Create ideal ReLU for comparison + y_ideal = np.maximum(0, x_range) + + # Only show plots if we're not in a testing environment + if _should_show_plots(): + # Create the plot + plt.figure(figsize=(12, 8)) + + # Plot both student implementation and ideal + plt.subplot(2, 2, 1) + plt.plot(x_range, y_range, 'b-', linewidth=3, label='Your ReLU Implementation') + plt.plot(x_range, y_ideal, 'r--', linewidth=2, alpha=0.7, label='Ideal ReLU') + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.axvline(x=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('Output') + plt.title('ReLU: Your Implementation vs Ideal') + plt.grid(True, alpha=0.3) + plt.legend() + plt.xlim(-5, 5) + plt.ylim(-1, 5) + + # Mathematical explanation plot + plt.subplot(2, 2, 2) + # Show the mathematical definition + x_math = np.array([-3, -2, -1, 0, 1, 2, 3]) + y_math = np.maximum(0, x_math) + plt.stem(x_math, y_math, basefmt=' ', linefmt='g-', markerfmt='go') + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.axvline(x=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('max(0, x)') + plt.title('Mathematical Definition: max(0, x)') + plt.grid(True, alpha=0.3) + plt.xlim(-4, 4) + plt.ylim(-0.5, 3.5) + + # Show the piecewise nature + plt.subplot(2, 2, 3) + x_left = np.linspace(-5, 0, 50) + x_right = np.linspace(0, 5, 50) + plt.plot(x_left, np.zeros_like(x_left), 'r-', linewidth=3, label='f(x) = 0 for x < 0') + plt.plot(x_right, x_right, 'b-', linewidth=3, label='f(x) = x for x β‰₯ 0') + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.axvline(x=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('Output') + plt.title('Piecewise Function Definition') + plt.grid(True, alpha=0.3) + plt.legend() + plt.xlim(-5, 5) + plt.ylim(-1, 5) + + # Error analysis + plt.subplot(2, 2, 4) + difference = np.abs(y_range - y_ideal) + max_error = np.max(difference) + plt.plot(x_range, difference, 'purple', linewidth=2) + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('|Your Output - Ideal Output|') + plt.title(f'Implementation Error (Max: {max_error:.6f})') + plt.grid(True, alpha=0.3) + plt.xlim(-5, 5) + + plt.tight_layout() + plt.show() + + # Print analysis + print(f"\nπŸ“Š Analysis:") + print(f"βœ… Maximum error: {max_error:.10f}") + if max_error < 1e-10: + print("πŸŽ‰ Perfect implementation!") + elif max_error < 1e-6: + print("🌟 Excellent implementation!") + elif max_error < 1e-3: + print("πŸ‘ Good implementation!") + else: + print("πŸ”§ Implementation needs work.") + + print(f"πŸ“ˆ Function properties:") + print(f" β€’ Range: [0, ∞)") + print(f" β€’ Piecewise: f(x) = 0 for x < 0, f(x) = x for x β‰₯ 0") + print(f" β€’ Monotonic: Always increasing for x β‰₯ 0") + print(f" β€’ Sparse: Exactly zero for negative inputs") + else: + print("πŸ“Š Plots disabled during testing - this is normal!") + + # Always show the mathematical analysis + difference = np.abs(y_range - y_ideal) + max_error = np.max(difference) + print(f"\nπŸ“Š Mathematical Analysis:") + print(f"βœ… Maximum error: {max_error:.10f}") + if max_error < 1e-10: + print("πŸŽ‰ Perfect implementation!") + elif max_error < 1e-6: + print("🌟 Excellent implementation!") + elif max_error < 1e-3: + print("πŸ‘ Good implementation!") + else: + print("πŸ”§ Implementation needs work.") + +except Exception as e: + print(f"❌ Error in plotting: {e}") + print("Make sure to implement the ReLU function above!") + +# %% [markdown] +""" +## Step 2: Sigmoid Activation Function + +**Sigmoid** squashes any input to the range (0, 1), making it useful for probabilities. + +**Formula**: `f(x) = 1 / (1 + e^(-x))` + +**Properties**: +- **Bounded**: Always outputs between 0 and 1 +- **Smooth**: Differentiable everywhere +- **S-shaped**: Smooth transition from 0 to 1 +- **Saturating**: Can suffer from vanishing gradients + +**When to use**: Binary classification (final layer), gates in RNNs/LSTMs. + +**⚠️ Numerical Stability**: Be careful with large inputs to avoid overflow! +""" + +# %% +#| export +class Sigmoid: + """ + Sigmoid Activation: f(x) = 1 / (1 + e^(-x)) + + Squashes input to range (0, 1). Often used for binary classification. + + TODO: Implement Sigmoid activation function. + """ + + def forward(self, x: Tensor) -> Tensor: + """ + Apply Sigmoid: f(x) = 1 / (1 + e^(-x)) + + Args: + x: Input tensor + + Returns: + Output tensor with Sigmoid applied element-wise + + TODO: Implement sigmoid function (be careful with numerical stability!) + + Hint: For numerical stability, use: + - For x >= 0: sigmoid(x) = 1 / (1 + exp(-x)) + - For x < 0: sigmoid(x) = exp(x) / (1 + exp(x)) + """ + raise NotImplementedError("Student implementation required") + + def __call__(self, x: Tensor) -> Tensor: + return self.forward(x) + +# %% +#| hide +#| export +class Sigmoid: + """Sigmoid Activation: f(x) = 1 / (1 + e^(-x))""" + + def forward(self, x: Tensor) -> Tensor: + """Apply Sigmoid with numerical stability""" + # Use the numerically stable version to avoid overflow + # For x >= 0: sigmoid(x) = 1 / (1 + exp(-x)) + # For x < 0: sigmoid(x) = exp(x) / (1 + exp(x)) + x_data = x.data + result = np.zeros_like(x_data) + + # Stable computation + positive_mask = x_data >= 0 + result[positive_mask] = 1.0 / (1.0 + np.exp(-x_data[positive_mask])) + result[~positive_mask] = np.exp(x_data[~positive_mask]) / (1.0 + np.exp(x_data[~positive_mask])) + + return Tensor(result) + + def __call__(self, x: Tensor) -> Tensor: + return self.forward(x) + +# %% [markdown] +""" +### πŸ§ͺ Test Your Sigmoid Function + +Once you implement Sigmoid above, run this cell to test it: +""" + +# %% +# Test Sigmoid function +try: + print("=== Testing Sigmoid Function ===") + + # Test data: mix of positive, negative, and zero + x = Tensor([[-5.0, -1.0, 0.0, 1.0, 5.0]]) + print(f"Input: {x.data}") + + # Test Sigmoid + sigmoid = Sigmoid() + y = sigmoid(x) + print(f"Sigmoid output: {y.data}") + print("Expected: values between 0 and 1") + print(f"All values in (0,1)? {np.all((y.data > 0) & (y.data < 1))}") + + # Test specific values + x_zero = Tensor([[0.0]]) + y_zero = sigmoid(x_zero) + print(f"\nSigmoid(0) = {y_zero.data[0, 0]:.4f} (should be 0.5)") + + # Test extreme values (numerical stability) + x_extreme = Tensor([[-100.0, 100.0]]) + y_extreme = sigmoid(x_extreme) + print(f"Sigmoid([-100, 100]) = {y_extreme.data}") + print("Should be close to [0, 1] without overflow errors") + + print("βœ… Sigmoid working!") + +except Exception as e: + print(f"❌ Error: {e}") + print("Make sure to implement the Sigmoid function above!") + +# %% [markdown] +""" +### πŸ“Š Visualize Sigmoid Function + +Let's plot the Sigmoid function to see its S-shaped curve: +""" + +# %% +# Plot Sigmoid function +try: + print("=== Plotting Sigmoid Function ===") + + # Create a range of input values + x_range = np.linspace(-10, 10, 100) + x_tensor = Tensor([x_range]) + + # Apply Sigmoid (student implementation) + sigmoid = Sigmoid() + y_tensor = sigmoid(x_tensor) + y_range = y_tensor.data[0] + + # Create ideal Sigmoid for comparison + y_ideal = 1.0 / (1.0 + np.exp(-x_range)) + + # Only show plots if we're not in a testing environment + if _should_show_plots(): + # Create the plot + plt.figure(figsize=(12, 8)) + + # Plot both student implementation and ideal + plt.subplot(2, 2, 1) + plt.plot(x_range, y_range, 'g-', linewidth=3, label='Your Sigmoid Implementation') + plt.plot(x_range, y_ideal, 'r--', linewidth=2, alpha=0.7, label='Ideal Sigmoid') + plt.axhline(y=0.5, color='orange', linestyle='--', alpha=0.5, label='y = 0.5') + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.axhline(y=1, color='k', linestyle='-', alpha=0.3) + plt.axvline(x=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('Output') + plt.title('Sigmoid: Your Implementation vs Ideal') + plt.grid(True, alpha=0.3) + plt.legend() + plt.xlim(-10, 10) + plt.ylim(-0.1, 1.1) + + # Mathematical explanation plot + plt.subplot(2, 2, 2) + # Show key points + x_key = np.array([-5, -2, -1, 0, 1, 2, 5]) + y_key = 1.0 / (1.0 + np.exp(-x_key)) + plt.stem(x_key, y_key, basefmt=' ', linefmt='orange', markerfmt='o') + plt.axhline(y=0.5, color='orange', linestyle='--', alpha=0.5) + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.axhline(y=1, color='k', linestyle='-', alpha=0.3) + plt.axvline(x=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('1/(1+e^(-x))') + plt.title('Mathematical Definition: 1/(1+e^(-x))') + plt.grid(True, alpha=0.3) + plt.xlim(-6, 6) + plt.ylim(-0.1, 1.1) + + # Show the S-curve properties + plt.subplot(2, 2, 3) + x_detailed = np.linspace(-8, 8, 200) + y_detailed = 1.0 / (1.0 + np.exp(-x_detailed)) + plt.plot(x_detailed, y_detailed, 'g-', linewidth=3) + # Add asymptotes + plt.axhline(y=0, color='r', linestyle='--', alpha=0.7, label='Lower asymptote: y = 0') + plt.axhline(y=1, color='r', linestyle='--', alpha=0.7, label='Upper asymptote: y = 1') + plt.axhline(y=0.5, color='orange', linestyle='--', alpha=0.7, label='Midpoint: y = 0.5') + plt.axvline(x=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('Output') + plt.title('S-Curve Properties') + plt.grid(True, alpha=0.3) + plt.legend() + plt.xlim(-8, 8) + plt.ylim(-0.1, 1.1) + + # Error analysis + plt.subplot(2, 2, 4) + difference = np.abs(y_range - y_ideal) + max_error = np.max(difference) + plt.plot(x_range, difference, 'purple', linewidth=2) + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('|Your Output - Ideal Output|') + plt.title(f'Implementation Error (Max: {max_error:.6f})') + plt.grid(True, alpha=0.3) + plt.xlim(-10, 10) + + plt.tight_layout() + plt.show() + + # Print analysis + print(f"\nπŸ“Š Analysis:") + print(f"βœ… Maximum error: {max_error:.10f}") + if max_error < 1e-10: + print("πŸŽ‰ Perfect implementation!") + elif max_error < 1e-6: + print("🌟 Excellent implementation!") + elif max_error < 1e-3: + print("πŸ‘ Good implementation!") + else: + print("πŸ”§ Implementation needs work.") + + print(f"πŸ“ˆ Function properties:") + print(f" β€’ Range: (0, 1)") + print(f" β€’ Symmetric around (0, 0.5)") + print(f" β€’ Smooth and differentiable everywhere") + print(f" β€’ Saturates for large |x| (vanishing gradient problem)") + print(f" β€’ Useful for binary classification (outputs probabilities)") + else: + print("πŸ“Š Plots disabled during testing - this is normal!") + + # Always show the mathematical analysis + difference = np.abs(y_range - y_ideal) + max_error = np.max(difference) + print(f"\nπŸ“Š Mathematical Analysis:") + print(f"βœ… Maximum error: {max_error:.10f}") + if max_error < 1e-10: + print("πŸŽ‰ Perfect implementation!") + elif max_error < 1e-6: + print("🌟 Excellent implementation!") + elif max_error < 1e-3: + print("πŸ‘ Good implementation!") + else: + print("πŸ”§ Implementation needs work.") + +except Exception as e: + print(f"❌ Error in plotting: {e}") + print("Make sure to implement the Sigmoid function above!") + +# %% [markdown] +""" +## Step 3: Tanh Activation Function + +**Tanh** (Hyperbolic Tangent) squashes inputs to the range (-1, 1). + +**Formula**: `f(x) = tanh(x) = (e^x - e^(-x)) / (e^x + e^(-x))` + +**Properties**: +- **Bounded**: Always outputs between -1 and 1 +- **Zero-centered**: Output is centered around 0 +- **Smooth**: Differentiable everywhere +- **Stronger gradients**: Than sigmoid around zero + +**When to use**: Hidden layers when you want zero-centered outputs, RNNs. + +**Advantage over Sigmoid**: Zero-centered outputs help with gradient flow. +""" + +# %% +#| export +class Tanh: + """ + Tanh Activation: f(x) = tanh(x) + + Squashes input to range (-1, 1). Zero-centered output. + + TODO: Implement Tanh activation function. + """ + + def forward(self, x: Tensor) -> Tensor: + """ + Apply Tanh: f(x) = tanh(x) + + Args: + x: Input tensor + + Returns: + Output tensor with Tanh applied element-wise + + TODO: Implement tanh function + Hint: Use np.tanh(x.data) + """ + raise NotImplementedError("Student implementation required") + + def __call__(self, x: Tensor) -> Tensor: + return self.forward(x) + +# %% +#| hide +#| export +class Tanh: + """Tanh Activation: f(x) = tanh(x)""" + + def forward(self, x: Tensor) -> Tensor: + """Apply Tanh""" + return Tensor(np.tanh(x.data)) + + def __call__(self, x: Tensor) -> Tensor: + return self.forward(x) + +# %% [markdown] +""" +### πŸ§ͺ Test Your Tanh Function + +Once you implement Tanh above, run this cell to test it: +""" + +# %% +# Test Tanh function +try: + print("=== Testing Tanh Function ===") + + # Test data: mix of positive, negative, and zero + x = Tensor([[-3.0, -1.0, 0.0, 1.0, 3.0]]) + print(f"Input: {x.data}") + + # Test Tanh + tanh = Tanh() + y = tanh(x) + print(f"Tanh output: {y.data}") + print("Expected: values between -1 and 1") + print(f"All values in (-1,1)? {np.all((y.data > -1) & (y.data < 1))}") + + # Test specific values + x_zero = Tensor([[0.0]]) + y_zero = tanh(x_zero) + print(f"\nTanh(0) = {y_zero.data[0, 0]:.4f} (should be 0.0)") + + # Test extreme values + x_extreme = Tensor([[-10.0, 10.0]]) + y_extreme = tanh(x_extreme) + print(f"Tanh([-10, 10]) = {y_extreme.data}") + print("Should be close to [-1, 1]") + + print("βœ… Tanh working!") + +except Exception as e: + print(f"❌ Error: {e}") + print("Make sure to implement the Tanh function above!") + +# %% [markdown] +""" +### πŸ“Š Visualize Tanh Function + +Let's plot the Tanh function to see its zero-centered S-shaped curve: +""" + +# %% +# Plot Tanh function +try: + print("=== Plotting Tanh Function ===") + + # Create a range of input values + x_range = np.linspace(-5, 5, 100) + x_tensor = Tensor([x_range]) + + # Apply Tanh (student implementation) + tanh = Tanh() + y_tensor = tanh(x_tensor) + y_range = y_tensor.data[0] + + # Create ideal Tanh for comparison + y_ideal = np.tanh(x_range) + + # Only show plots if we're not in a testing environment + if _should_show_plots(): + # Create the plot + plt.figure(figsize=(12, 8)) + + # Plot both student implementation and ideal + plt.subplot(2, 2, 1) + plt.plot(x_range, y_range, 'orange', linewidth=3, label='Your Tanh Implementation') + plt.plot(x_range, y_ideal, 'r--', linewidth=2, alpha=0.7, label='Ideal Tanh') + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.axhline(y=1, color='k', linestyle='--', alpha=0.3) + plt.axhline(y=-1, color='k', linestyle='--', alpha=0.3) + plt.axvline(x=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('Output') + plt.title('Tanh: Your Implementation vs Ideal') + plt.grid(True, alpha=0.3) + plt.legend() + plt.xlim(-5, 5) + plt.ylim(-1.2, 1.2) + + # Mathematical explanation plot + plt.subplot(2, 2, 2) + # Show key points + x_key = np.array([-3, -2, -1, 0, 1, 2, 3]) + y_key = np.tanh(x_key) + plt.stem(x_key, y_key, basefmt=' ', linefmt='purple', markerfmt='o') + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.axhline(y=1, color='k', linestyle='--', alpha=0.3) + plt.axhline(y=-1, color='k', linestyle='--', alpha=0.3) + plt.axvline(x=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('tanh(x)') + plt.title('Mathematical Definition: tanh(x)') + plt.grid(True, alpha=0.3) + plt.xlim(-4, 4) + plt.ylim(-1.2, 1.2) + + # Show symmetry property + plt.subplot(2, 2, 3) + x_sym = np.linspace(-4, 4, 100) + y_sym = np.tanh(x_sym) + plt.plot(x_sym, y_sym, 'orange', linewidth=3, label='tanh(x)') + plt.plot(-x_sym, -y_sym, 'b--', linewidth=2, alpha=0.7, label='-tanh(-x)') + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.axhline(y=1, color='r', linestyle='--', alpha=0.7, label='Upper asymptote: y = 1') + plt.axhline(y=-1, color='r', linestyle='--', alpha=0.7, label='Lower asymptote: y = -1') + plt.axvline(x=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('Output') + plt.title('Symmetry: tanh(-x) = -tanh(x)') + plt.grid(True, alpha=0.3) + plt.legend() + plt.xlim(-4, 4) + plt.ylim(-1.2, 1.2) + + # Error analysis + plt.subplot(2, 2, 4) + difference = np.abs(y_range - y_ideal) + max_error = np.max(difference) + plt.plot(x_range, difference, 'purple', linewidth=2) + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('|Your Output - Ideal Output|') + plt.title(f'Implementation Error (Max: {max_error:.6f})') + plt.grid(True, alpha=0.3) + plt.xlim(-5, 5) + + plt.tight_layout() + plt.show() + + # Print analysis + print(f"\nπŸ“Š Analysis:") + print(f"βœ… Maximum error: {max_error:.10f}") + if max_error < 1e-10: + print("πŸŽ‰ Perfect implementation!") + elif max_error < 1e-6: + print("🌟 Excellent implementation!") + elif max_error < 1e-3: + print("πŸ‘ Good implementation!") + else: + print("πŸ”§ Implementation needs work.") + + print(f"πŸ“ˆ Function properties:") + print(f" β€’ Range: (-1, 1)") + print(f" β€’ Odd function: tanh(-x) = -tanh(x)") + print(f" β€’ Symmetric around origin (0, 0)") + print(f" β€’ Smooth and differentiable everywhere") + print(f" β€’ Stronger gradients than sigmoid around zero") + print(f" β€’ Related to sigmoid: tanh(x) = 2*sigmoid(2x) - 1") + else: + print("πŸ“Š Plots disabled during testing - this is normal!") + + # Always show the mathematical analysis + difference = np.abs(y_range - y_ideal) + max_error = np.max(difference) + print(f"\nπŸ“Š Mathematical Analysis:") + print(f"βœ… Maximum error: {max_error:.10f}") + if max_error < 1e-10: + print("πŸŽ‰ Perfect implementation!") + elif max_error < 1e-6: + print("🌟 Excellent implementation!") + elif max_error < 1e-3: + print("πŸ‘ Good implementation!") + else: + print("πŸ”§ Implementation needs work.") + +except Exception as e: + print(f"❌ Error in plotting: {e}") + print("Make sure to implement the Tanh function above!") + +# %% [markdown] +""" +## Step 4: Compare All Activation Functions + +Let's see how all three functions behave on the same input: +""" + +# %% +# Compare all activation functions +try: + print("=== Comparing All Activation Functions ===") + + # Test data: range from -5 to 5 + x = Tensor([[-5.0, -2.0, -1.0, 0.0, 1.0, 2.0, 5.0]]) + print(f"Input: {x.data}") + + # Apply all activations + relu = ReLU() + sigmoid = Sigmoid() + tanh = Tanh() + + y_relu = relu(x) + y_sigmoid = sigmoid(x) + y_tanh = tanh(x) + + print(f"\nReLU: {y_relu.data}") + print(f"Sigmoid: {y_sigmoid.data}") + print(f"Tanh: {y_tanh.data}") + + print("\nπŸ“Š Key Differences:") + print("- ReLU: Zeros out negative values, unbounded positive") + print("- Sigmoid: Squashes to (0, 1), always positive") + print("- Tanh: Squashes to (-1, 1), zero-centered") + + print("\nβœ… All activation functions working!") + +except Exception as e: + print(f"❌ Error: {e}") + print("Make sure to implement all activation functions above!") + +# %% [markdown] +""" +### πŸ“Š Comprehensive Activation Function Comparison + +Let's plot all three functions together to see their differences: +""" + +# %% +# Plot all activation functions together +try: + print("=== Plotting All Activation Functions Together ===") + + # Create a range of input values + x_range = np.linspace(-5, 5, 100) + x_tensor = Tensor([x_range]) + + # Apply all activations (student implementations) + relu = ReLU() + sigmoid = Sigmoid() + tanh = Tanh() + + y_relu = relu(x_tensor).data[0] + y_sigmoid = sigmoid(x_tensor).data[0] + y_tanh = tanh(x_tensor).data[0] + + # Create ideal functions for comparison + y_relu_ideal = np.maximum(0, x_range) + y_sigmoid_ideal = 1.0 / (1.0 + np.exp(-x_range)) + y_tanh_ideal = np.tanh(x_range) + + # Only show plots if we're not in a testing environment + if _should_show_plots(): + # Create the comprehensive plot + plt.figure(figsize=(15, 10)) + + # Main comparison plot + plt.subplot(2, 3, (1, 2)) + plt.plot(x_range, y_relu, 'b-', linewidth=3, label='Your ReLU') + plt.plot(x_range, y_sigmoid, 'g-', linewidth=3, label='Your Sigmoid') + plt.plot(x_range, y_tanh, 'orange', linewidth=3, label='Your Tanh') + + # Add ideal functions as dashed lines + plt.plot(x_range, y_relu_ideal, 'b--', linewidth=1, alpha=0.7, label='Ideal ReLU') + plt.plot(x_range, y_sigmoid_ideal, 'g--', linewidth=1, alpha=0.7, label='Ideal Sigmoid') + plt.plot(x_range, y_tanh_ideal, '--', color='orange', linewidth=1, alpha=0.7, label='Ideal Tanh') + + # Add reference lines + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.axhline(y=1, color='k', linestyle='--', alpha=0.3) + plt.axhline(y=-1, color='k', linestyle='--', alpha=0.3) + plt.axvline(x=0, color='k', linestyle='-', alpha=0.3) + + # Formatting + plt.xlabel('Input (x)', fontsize=12) + plt.ylabel('Output f(x)', fontsize=12) + plt.title('Activation Functions: Your Implementation vs Ideal', fontsize=14, fontweight='bold') + plt.grid(True, alpha=0.3) + plt.legend(fontsize=10, loc='upper left') + plt.xlim(-5, 5) + plt.ylim(-1.5, 5) + + # Mathematical definitions + plt.subplot(2, 3, 3) + plt.text(0.05, 0.95, 'Mathematical Definitions:', fontsize=12, fontweight='bold', + transform=plt.gca().transAxes, verticalalignment='top') + plt.text(0.05, 0.85, 'ReLU:', fontsize=11, fontweight='bold', color='blue', + transform=plt.gca().transAxes, verticalalignment='top') + plt.text(0.05, 0.80, 'f(x) = max(0, x)', fontsize=10, fontfamily='monospace', + transform=plt.gca().transAxes, verticalalignment='top') + plt.text(0.05, 0.70, 'Sigmoid:', fontsize=11, fontweight='bold', color='green', + transform=plt.gca().transAxes, verticalalignment='top') + plt.text(0.05, 0.65, 'f(x) = 1/(1+e^(-x))', fontsize=10, fontfamily='monospace', + transform=plt.gca().transAxes, verticalalignment='top') + plt.text(0.05, 0.55, 'Tanh:', fontsize=11, fontweight='bold', color='orange', + transform=plt.gca().transAxes, verticalalignment='top') + plt.text(0.05, 0.50, 'f(x) = tanh(x)', fontsize=10, fontfamily='monospace', + transform=plt.gca().transAxes, verticalalignment='top') + plt.text(0.05, 0.45, ' = (e^x-e^(-x))/(e^x+e^(-x))', fontsize=10, fontfamily='monospace', + transform=plt.gca().transAxes, verticalalignment='top') + + plt.text(0.05, 0.30, 'Key Properties:', fontsize=12, fontweight='bold', + transform=plt.gca().transAxes, verticalalignment='top') + plt.text(0.05, 0.25, 'β€’ ReLU: Sparse, unbounded', fontsize=10, color='blue', + transform=plt.gca().transAxes, verticalalignment='top') + plt.text(0.05, 0.20, 'β€’ Sigmoid: Bounded (0,1)', fontsize=10, color='green', + transform=plt.gca().transAxes, verticalalignment='top') + plt.text(0.05, 0.15, 'β€’ Tanh: Zero-centered (-1,1)', fontsize=10, color='orange', + transform=plt.gca().transAxes, verticalalignment='top') + plt.axis('off') + + # Error analysis for ReLU + plt.subplot(2, 3, 4) + error_relu = np.abs(y_relu - y_relu_ideal) + plt.plot(x_range, error_relu, 'b-', linewidth=2) + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('Error') + plt.title(f'ReLU Error (Max: {np.max(error_relu):.2e})') + plt.grid(True, alpha=0.3) + plt.xlim(-5, 5) + + # Error analysis for Sigmoid + plt.subplot(2, 3, 5) + error_sigmoid = np.abs(y_sigmoid - y_sigmoid_ideal) + plt.plot(x_range, error_sigmoid, 'g-', linewidth=2) + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('Error') + plt.title(f'Sigmoid Error (Max: {np.max(error_sigmoid):.2e})') + plt.grid(True, alpha=0.3) + plt.xlim(-5, 5) + + # Error analysis for Tanh + plt.subplot(2, 3, 6) + error_tanh = np.abs(y_tanh - y_tanh_ideal) + plt.plot(x_range, error_tanh, 'orange', linewidth=2) + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('Error') + plt.title(f'Tanh Error (Max: {np.max(error_tanh):.2e})') + plt.grid(True, alpha=0.3) + plt.xlim(-5, 5) + + plt.tight_layout() + plt.show() + + # Comprehensive analysis + print("\nπŸ“Š Comprehensive Analysis:") + print("=" * 60) + + # Function ranges + print("πŸ“ˆ Output Ranges:") + print(f" ReLU: [{np.min(y_relu):.3f}, {np.max(y_relu):.3f}]") + print(f" Sigmoid: [{np.min(y_sigmoid):.3f}, {np.max(y_sigmoid):.3f}]") + print(f" Tanh: [{np.min(y_tanh):.3f}, {np.max(y_tanh):.3f}]") + + # Implementation accuracy + print("\n🎯 Implementation Accuracy:") + max_errors = [np.max(error_relu), np.max(error_sigmoid), np.max(error_tanh)] + functions = ['ReLU', 'Sigmoid', 'Tanh'] + + for func, error in zip(functions, max_errors): + if error < 1e-10: + status = "βœ… PERFECT" + elif error < 1e-6: + status = "βœ… EXCELLENT" + elif error < 1e-3: + status = "⚠️ GOOD" + else: + status = "❌ NEEDS WORK" + print(f" {func:8s}: {status:12s} (error: {error:.2e})") + + # Mathematical properties verification + print("\nπŸ” Mathematical Properties:") + + # Zero-centered test + x_zero = Tensor([[0.0]]) + print(" Zero-centered test (f(0) should be 0):") + for name, func in [("ReLU", relu), ("Sigmoid", sigmoid), ("Tanh", tanh)]: + output = func(x_zero).data[0, 0] + is_zero = abs(output) < 1e-6 + expected = 0.0 if name != "Sigmoid" else 0.5 + print(f" {name:8s}: f(0) = {output:.4f} {'βœ…' if abs(output - expected) < 1e-6 else '❌'}") + + # Monotonicity test + print(" Monotonicity test (should be increasing):") + test_vals = np.array([-2, -1, 0, 1, 2]) + x_test = Tensor([test_vals]) + for name, func in [("ReLU", relu), ("Sigmoid", sigmoid), ("Tanh", tanh)]: + outputs = func(x_test).data[0] + is_monotonic = np.all(outputs[1:] >= outputs[:-1]) + print(f" {name:8s}: {'βœ… Monotonic' if is_monotonic else '❌ Not monotonic'}") + + print("\nπŸŽ‰ Comparison complete! Use these insights to understand each function's role in neural networks.") + else: + print("πŸ“Š Plots disabled during testing - this is normal!") + +except Exception as e: + print(f"❌ Error in plotting: {e}") + print("Make sure matplotlib is installed and all functions are implemented!") + +# %% [markdown] +""" +## Step 5: Understanding Activation Function Properties + +Let's explore the mathematical properties of each function: +""" + +# %% +# Explore activation function properties +try: + print("=== Activation Function Properties ===") + + # Create test functions + relu = ReLU() + sigmoid = Sigmoid() + tanh = Tanh() + + # Test with a range of values + test_values = np.linspace(-5, 5, 11) + x = Tensor([test_values]) + + print(f"Input range: {test_values}") + print(f"ReLU range: [{np.min(relu(x).data):.2f}, {np.max(relu(x).data):.2f}]") + print(f"Sigmoid range: [{np.min(sigmoid(x).data):.2f}, {np.max(sigmoid(x).data):.2f}]") + print(f"Tanh range: [{np.min(tanh(x).data):.2f}, {np.max(tanh(x).data):.2f}]") + + # Test monotonicity (should all be increasing functions) + print(f"\nπŸ“ˆ Monotonicity Test:") + for name, func in [("ReLU", relu), ("Sigmoid", sigmoid), ("Tanh", tanh)]: + outputs = func(x).data[0] + is_monotonic = np.all(outputs[1:] >= outputs[:-1]) + print(f"{name}: {'βœ… Monotonic' if is_monotonic else '❌ Not monotonic'}") + + # Test zero-centered property + print(f"\n🎯 Zero-Centered Test (f(0) = 0):") + x_zero = Tensor([[0.0]]) + for name, func in [("ReLU", relu), ("Sigmoid", sigmoid), ("Tanh", tanh)]: + output = func(x_zero).data[0, 0] + is_zero_centered = abs(output) < 1e-6 + print(f"{name}: f(0) = {output:.4f} {'βœ… Zero-centered' if is_zero_centered else '❌ Not zero-centered'}") + + print("\nπŸŽ‰ Property analysis complete!") + +except Exception as e: + print(f"❌ Error: {e}") + print("Check your activation function implementations!") + +# %% [markdown] +""" +## Step 6: Practical Usage Examples + +Let's see how these functions would be used in practice: +""" + +# %% +# Practical usage examples +try: + print("=== Practical Usage Examples ===") + + # Example 1: Binary classification with sigmoid + print("1. Binary Classification (Sigmoid):") + logits = Tensor([[2.5, -1.2, 0.8, -0.3]]) # Raw network outputs + sigmoid = Sigmoid() + probabilities = sigmoid(logits) + print(f" Logits: {logits.data}") + print(f" Probabilities: {probabilities.data}") + print(f" Predictions: {(probabilities.data > 0.5).astype(int)}") + + # Example 2: Feature processing with ReLU + print("\n2. Feature Processing (ReLU):") + features = Tensor([[-0.5, 1.2, -2.1, 0.8, -0.1]]) # Mixed positive/negative + relu = ReLU() + processed = relu(features) + print(f" Raw features: {features.data}") + print(f" After ReLU: {processed.data}") + print(f" Sparsity: {np.mean(processed.data == 0):.1%} zeros") + + # Example 3: Normalized features with Tanh + print("\n3. Normalized Features (Tanh):") + raw_features = Tensor([[3.2, -1.8, 0.5, -2.4, 1.1]]) + tanh = Tanh() + normalized = tanh(raw_features) + print(f" Raw features: {raw_features.data}") + print(f" Normalized: {normalized.data}") + print(f" Mean: {np.mean(normalized.data):.3f} (close to 0)") + + print("\nβœ… Practical examples complete!") + +except Exception as e: + print(f"❌ Error: {e}") + print("Check your activation function implementations!") + +# %% [markdown] +""" +## πŸŽ‰ Congratulations! + +You've successfully implemented the three most important activation functions in deep learning! + +### 🧱 What You Built +1. **ReLU**: The workhorse activation that enables deep networks +2. **Sigmoid**: The probability activation for binary classification +3. **Tanh**: The zero-centered activation for better gradient flow + +### 🎯 Key Insights +- **Nonlinearity is essential**: Without activations, neural networks are just linear transformations +- **Different functions serve different purposes**: ReLU for hidden layers, Sigmoid for probabilities, Tanh for zero-centered outputs +- **Mathematical properties matter**: Monotonicity, boundedness, and zero-centering affect learning + +### πŸš€ What's Next +These activation functions will be used in: +- **Layers Module**: Building neural network layers +- **Loss Functions**: Computing training objectives +- **Advanced Architectures**: CNNs, RNNs, and more + +### πŸ”§ Export to Package +Run this to export your activations to the TinyTorch package: +```bash +python bin/tito.py sync +``` + +Then test your implementation: +```bash +python bin/tito.py test --module activations +``` + +**Excellent work! You've mastered the mathematical foundations of neural networks!** πŸŽ‰ + +--- + +## πŸ“š Further Reading + +**Want to learn more about activation functions?** +- **ReLU variants**: Leaky ReLU, ELU, Swish +- **Advanced activations**: GELU, Mish, SiLU +- **Activation choice**: When to use which function +- **Gradient flow**: How activations affect training + +**Next modules**: Layers, Loss Functions, Optimization +""" \ No newline at end of file diff --git a/modules/activations/tests/test_activations.py b/modules/activations/tests/test_activations.py new file mode 100644 index 00000000..0a9f7967 --- /dev/null +++ b/modules/activations/tests/test_activations.py @@ -0,0 +1,345 @@ +""" +Test suite for the TinyTorch Activations module. + +This test suite validates the mathematical correctness of activation functions: +- ReLU: f(x) = max(0, x) +- Sigmoid: f(x) = 1 / (1 + e^(-x)) +- Tanh: f(x) = tanh(x) + +Tests focus on: +1. Mathematical correctness +2. Numerical stability +3. Edge cases +4. Shape preservation +5. Type consistency +""" + +import pytest +import numpy as np +import math +from tinytorch.core.tensor import Tensor + +# Import the activation functions +import sys +import os +sys.path.append(os.path.join(os.path.dirname(__file__), '..')) +from activations_dev import ReLU, Sigmoid, Tanh + + +class TestReLU: + """Test the ReLU activation function.""" + + def test_relu_basic_functionality(self): + """Test basic ReLU behavior: max(0, x)""" + relu = ReLU() + + # Test mixed positive/negative values + x = Tensor([[-2.0, -1.0, 0.0, 1.0, 2.0]]) + y = relu(x) + expected = np.array([[0.0, 0.0, 0.0, 1.0, 2.0]]) + + assert np.allclose(y.data, expected), f"Expected {expected}, got {y.data}" + + def test_relu_all_positive(self): + """Test ReLU with all positive values (should be unchanged)""" + relu = ReLU() + + x = Tensor([[1.0, 2.5, 3.7, 10.0]]) + y = relu(x) + + assert np.allclose(y.data, x.data), "ReLU should preserve positive values" + + def test_relu_all_negative(self): + """Test ReLU with all negative values (should be zeros)""" + relu = ReLU() + + x = Tensor([[-1.0, -2.5, -3.7, -10.0]]) + y = relu(x) + expected = np.zeros_like(x.data) + + assert np.allclose(y.data, expected), "ReLU should zero out negative values" + + def test_relu_zero_input(self): + """Test ReLU with zero input""" + relu = ReLU() + + x = Tensor([[0.0]]) + y = relu(x) + + assert y.data[0, 0] == 0.0, "ReLU(0) should be 0" + + def test_relu_shape_preservation(self): + """Test that ReLU preserves tensor shape""" + relu = ReLU() + + # Test different shapes + shapes = [(1, 5), (2, 3), (4, 1), (3, 3)] + for shape in shapes: + x = Tensor(np.random.randn(*shape)) + y = relu(x) + assert y.shape == x.shape, f"Shape mismatch: expected {x.shape}, got {y.shape}" + + def test_relu_callable(self): + """Test that ReLU can be called directly""" + relu = ReLU() + x = Tensor([[1.0, -1.0]]) + + y1 = relu(x) + y2 = relu.forward(x) + + assert np.allclose(y1.data, y2.data), "Direct call should match forward method" + + +class TestSigmoid: + """Test the Sigmoid activation function.""" + + def test_sigmoid_basic_functionality(self): + """Test basic Sigmoid behavior""" + sigmoid = Sigmoid() + + # Test known values + x = Tensor([[0.0]]) + y = sigmoid(x) + assert abs(y.data[0, 0] - 0.5) < 1e-6, "Sigmoid(0) should be 0.5" + + def test_sigmoid_range(self): + """Test that Sigmoid outputs are in (0, 1)""" + sigmoid = Sigmoid() + + # Test wide range of inputs + x = Tensor([[-10.0, -5.0, -1.0, 0.0, 1.0, 5.0, 10.0]]) + y = sigmoid(x) + + assert np.all(y.data > 0), "Sigmoid outputs should be > 0" + assert np.all(y.data < 1), "Sigmoid outputs should be < 1" + + def test_sigmoid_numerical_stability(self): + """Test Sigmoid with extreme values (numerical stability)""" + sigmoid = Sigmoid() + + # Test extreme values that could cause overflow + x = Tensor([[-100.0, -50.0, 50.0, 100.0]]) + y = sigmoid(x) + + # Should not contain NaN or inf + assert not np.any(np.isnan(y.data)), "Sigmoid should not produce NaN" + assert not np.any(np.isinf(y.data)), "Sigmoid should not produce inf" + + # Should be close to 0 for very negative, close to 1 for very positive + assert y.data[0, 0] < 1e-10, "Sigmoid(-100) should be very close to 0" + assert y.data[0, 1] < 1e-10, "Sigmoid(-50) should be very close to 0" + assert y.data[0, 2] > 1 - 1e-10, "Sigmoid(50) should be very close to 1" + assert y.data[0, 3] > 1 - 1e-10, "Sigmoid(100) should be very close to 1" + + def test_sigmoid_monotonicity(self): + """Test that Sigmoid is monotonically increasing""" + sigmoid = Sigmoid() + + x = Tensor([[-3.0, -1.0, 0.0, 1.0, 3.0]]) + y = sigmoid(x) + + # Check that outputs are increasing + for i in range(len(y.data[0]) - 1): + assert y.data[0, i] < y.data[0, i + 1], "Sigmoid should be monotonically increasing" + + def test_sigmoid_shape_preservation(self): + """Test that Sigmoid preserves tensor shape""" + sigmoid = Sigmoid() + + shapes = [(1, 5), (2, 3), (4, 1)] + for shape in shapes: + x = Tensor(np.random.randn(*shape)) + y = sigmoid(x) + assert y.shape == x.shape, f"Shape mismatch: expected {x.shape}, got {y.shape}" + + def test_sigmoid_callable(self): + """Test that Sigmoid can be called directly""" + sigmoid = Sigmoid() + x = Tensor([[1.0, -1.0]]) + + y1 = sigmoid(x) + y2 = sigmoid.forward(x) + + assert np.allclose(y1.data, y2.data), "Direct call should match forward method" + + +class TestTanh: + """Test the Tanh activation function.""" + + def test_tanh_basic_functionality(self): + """Test basic Tanh behavior""" + tanh = Tanh() + + # Test known values + x = Tensor([[0.0]]) + y = tanh(x) + assert abs(y.data[0, 0] - 0.0) < 1e-6, "Tanh(0) should be 0" + + def test_tanh_range(self): + """Test that Tanh outputs are in [-1, 1]""" + tanh = Tanh() + + # Test wide range of inputs + x = Tensor([[-10.0, -5.0, -1.0, 0.0, 1.0, 5.0, 10.0]]) + y = tanh(x) + + assert np.all(y.data >= -1), "Tanh outputs should be >= -1" + assert np.all(y.data <= 1), "Tanh outputs should be <= 1" + + def test_tanh_symmetry(self): + """Test that Tanh is symmetric: tanh(-x) = -tanh(x)""" + tanh = Tanh() + + x = Tensor([[1.0, 2.0, 3.0]]) + x_neg = Tensor([[-1.0, -2.0, -3.0]]) + + y_pos = tanh(x) + y_neg = tanh(x_neg) + + assert np.allclose(y_neg.data, -y_pos.data), "Tanh should be symmetric" + + def test_tanh_monotonicity(self): + """Test that Tanh is monotonically increasing""" + tanh = Tanh() + + x = Tensor([[-3.0, -1.0, 0.0, 1.0, 3.0]]) + y = tanh(x) + + # Check that outputs are increasing + for i in range(len(y.data[0]) - 1): + assert y.data[0, i] < y.data[0, i + 1], "Tanh should be monotonically increasing" + + def test_tanh_extreme_values(self): + """Test Tanh with extreme values""" + tanh = Tanh() + + x = Tensor([[-100.0, 100.0]]) + y = tanh(x) + + # Should be close to -1 and 1 respectively + assert abs(y.data[0, 0] - (-1.0)) < 1e-10, "Tanh(-100) should be very close to -1" + assert abs(y.data[0, 1] - 1.0) < 1e-10, "Tanh(100) should be very close to 1" + + def test_tanh_shape_preservation(self): + """Test that Tanh preserves tensor shape""" + tanh = Tanh() + + shapes = [(1, 5), (2, 3), (4, 1)] + for shape in shapes: + x = Tensor(np.random.randn(*shape)) + y = tanh(x) + assert y.shape == x.shape, f"Shape mismatch: expected {x.shape}, got {y.shape}" + + def test_tanh_callable(self): + """Test that Tanh can be called directly""" + tanh = Tanh() + x = Tensor([[1.0, -1.0]]) + + y1 = tanh(x) + y2 = tanh.forward(x) + + assert np.allclose(y1.data, y2.data), "Direct call should match forward method" + + +class TestActivationComparison: + """Test interactions and comparisons between activation functions.""" + + def test_activation_consistency(self): + """Test that all activations work with the same input""" + relu = ReLU() + sigmoid = Sigmoid() + tanh = Tanh() + + x = Tensor([[-2.0, -1.0, 0.0, 1.0, 2.0]]) + + # All should process without error + y_relu = relu(x) + y_sigmoid = sigmoid(x) + y_tanh = tanh(x) + + # All should preserve shape + assert y_relu.shape == x.shape + assert y_sigmoid.shape == x.shape + assert y_tanh.shape == x.shape + + def test_activation_ranges(self): + """Test that activations have expected output ranges""" + relu = ReLU() + sigmoid = Sigmoid() + tanh = Tanh() + + x = Tensor([[-5.0, -2.0, 0.0, 2.0, 5.0]]) + + y_relu = relu(x) + y_sigmoid = sigmoid(x) + y_tanh = tanh(x) + + # ReLU: [0, inf) + assert np.all(y_relu.data >= 0), "ReLU should be non-negative" + + # Sigmoid: (0, 1) + assert np.all(y_sigmoid.data > 0), "Sigmoid should be positive" + assert np.all(y_sigmoid.data < 1), "Sigmoid should be less than 1" + + # Tanh: (-1, 1) + assert np.all(y_tanh.data > -1), "Tanh should be greater than -1" + assert np.all(y_tanh.data < 1), "Tanh should be less than 1" + + +# Integration tests with edge cases +class TestActivationEdgeCases: + """Test edge cases and boundary conditions.""" + + def test_zero_tensor(self): + """Test all activations with zero tensor""" + relu = ReLU() + sigmoid = Sigmoid() + tanh = Tanh() + + x = Tensor([[0.0, 0.0, 0.0]]) + + y_relu = relu(x) + y_sigmoid = sigmoid(x) + y_tanh = tanh(x) + + assert np.allclose(y_relu.data, [0.0, 0.0, 0.0]), "ReLU(0) should be 0" + assert np.allclose(y_sigmoid.data, [0.5, 0.5, 0.5]), "Sigmoid(0) should be 0.5" + assert np.allclose(y_tanh.data, [0.0, 0.0, 0.0]), "Tanh(0) should be 0" + + def test_single_element_tensor(self): + """Test all activations with single element tensor""" + relu = ReLU() + sigmoid = Sigmoid() + tanh = Tanh() + + x = Tensor([[1.0]]) + + y_relu = relu(x) + y_sigmoid = sigmoid(x) + y_tanh = tanh(x) + + assert y_relu.shape == (1, 1) + assert y_sigmoid.shape == (1, 1) + assert y_tanh.shape == (1, 1) + + def test_large_tensor(self): + """Test activations with larger tensors""" + relu = ReLU() + sigmoid = Sigmoid() + tanh = Tanh() + + # Create a 10x10 tensor + x = Tensor(np.random.randn(10, 10)) + + y_relu = relu(x) + y_sigmoid = sigmoid(x) + y_tanh = tanh(x) + + assert y_relu.shape == (10, 10) + assert y_sigmoid.shape == (10, 10) + assert y_tanh.shape == (10, 10) + + +if __name__ == "__main__": + # Run tests with pytest + pytest.main([__file__, "-v"]) \ No newline at end of file diff --git a/modules/layers/layers_dev.py b/modules/layers/layers_dev.py index 5b657c1e..74b6890a 100644 --- a/modules/layers/layers_dev.py +++ b/modules/layers/layers_dev.py @@ -17,15 +17,20 @@ Welcome to the Layers module! This is where neural networks begin. You'll implem ## Learning Goals - Understand layers as functions that transform tensors: `y = f(x)` - Implement Dense layers with linear transformations: `y = Wx + b` -- Add activation functions for nonlinearity (ReLU, Sigmoid, Tanh) +- Use activation functions from the activations module for nonlinearity - See how neural networks are just function composition - Build intuition before diving into training ## Build β†’ Use β†’ Understand -1. **Build**: Dense layers and activation functions +1. **Build**: Dense layers using activation functions as building blocks 2. **Use**: Transform tensors and see immediate results 3. **Understand**: How neural networks transform information +## Module Dependencies +This module builds on the **activations** module: +- **activations** β†’ **layers** β†’ **networks** +- Clean separation of concerns: math functions β†’ layer building blocks β†’ full networks + ## Module β†’ Package Structure **πŸŽ“ Teaching vs. πŸ”§ Building**: - **Learning side**: Work in `modules/layers/layers_dev.py` @@ -51,6 +56,9 @@ import sys from typing import Union, Optional, Callable from tinytorch.core.tensor import Tensor +# Import activation functions from the activations module +from tinytorch.core.activations import ReLU, Sigmoid, Tanh + # Import our Tensor class # sys.path.append('../../') # from modules.tensor.tensor_dev import Tensor @@ -203,12 +211,11 @@ try: print(f"Input: {x.data}") print(f"Output: {y.data}") - # Test with batch of examples + # Test with batch x_batch = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) # Shape: (2, 3) y_batch = layer(x_batch) print(f"\nBatch input shape: {x_batch.shape}") print(f"Batch output shape: {y_batch.shape}") - print(f"Batch output: {y_batch.data}") print("βœ… Dense layer working!") @@ -218,14 +225,20 @@ except Exception as e: # %% [markdown] """ -## Step 2: Activation Functions +## Step 2: Activation Functions - Adding Nonlinearity -Dense layers alone can only learn **linear** transformations. But most real-world problems need **nonlinear** transformations. +Now we'll use the activation functions from the **activations** module! -**Activation functions** add nonlinearity: -- **ReLU**: `max(0, x)` - Most common, simple and effective -- **Sigmoid**: `1 / (1 + e^(-x))` - Squashes to (0, 1) -- **Tanh**: `tanh(x)` - Squashes to (-1, 1) +**Clean Architecture**: We import the activation functions rather than redefining them: +```python +from tinytorch.core.activations import ReLU, Sigmoid, Tanh +``` + +**Why this matters**: +- **Separation of concerns**: Math functions vs. layer building blocks +- **Reusability**: Activations can be used anywhere in the system +- **Maintainability**: One place to update activation implementations +- **Composability**: Clean imports make neural networks easier to build **Why nonlinearity matters**: Without it, stacking layers is pointless! ``` @@ -234,178 +247,43 @@ Linear β†’ NonLinear β†’ Linear = Can learn complex patterns ``` """ -# %% -#| export -class ReLU: - """ - ReLU Activation: f(x) = max(0, x) - - The most popular activation function in deep learning. - Simple, effective, and computationally efficient. - - TODO: Implement ReLU activation function. - """ - - def forward(self, x: Tensor) -> Tensor: - """ - Apply ReLU: f(x) = max(0, x) - - Args: - x: Input tensor - - Returns: - Output tensor with ReLU applied element-wise - - TODO: Implement element-wise max(0, x) operation - """ - raise NotImplementedError("Student implementation required") - - def __call__(self, x: Tensor) -> Tensor: - """Make activation callable: relu(x) same as relu.forward(x)""" - return self.forward(x) - -# %% -#| hide -#| export -class ReLU: - """ReLU Activation: f(x) = max(0, x)""" - - def forward(self, x: Tensor) -> Tensor: - """Apply ReLU: f(x) = max(0, x)""" - return Tensor(np.maximum(0, x.data)) - - def __call__(self, x: Tensor) -> Tensor: - return self.forward(x) - -# %% -#| export -class Sigmoid: - """ - Sigmoid Activation: f(x) = 1 / (1 + e^(-x)) - - Squashes input to range (0, 1). Often used for binary classification. - - TODO: Implement Sigmoid activation function. - """ - - def forward(self, x: Tensor) -> Tensor: - """ - Apply Sigmoid: f(x) = 1 / (1 + e^(-x)) - - Args: - x: Input tensor - - Returns: - Output tensor with Sigmoid applied element-wise - - TODO: Implement sigmoid function (be careful with numerical stability!) - """ - raise NotImplementedError("Student implementation required") - - def __call__(self, x: Tensor) -> Tensor: - return self.forward(x) - -# %% -#| hide -#| export -class Sigmoid: - """Sigmoid Activation: f(x) = 1 / (1 + e^(-x))""" - - def forward(self, x: Tensor) -> Tensor: - """Apply Sigmoid with numerical stability""" - # Use the numerically stable version to avoid overflow - # For x >= 0: sigmoid(x) = 1 / (1 + exp(-x)) - # For x < 0: sigmoid(x) = exp(x) / (1 + exp(x)) - x_data = x.data - result = np.zeros_like(x_data) - - # Stable computation - positive_mask = x_data >= 0 - result[positive_mask] = 1.0 / (1.0 + np.exp(-x_data[positive_mask])) - result[~positive_mask] = np.exp(x_data[~positive_mask]) / (1.0 + np.exp(x_data[~positive_mask])) - - return Tensor(result) - - def __call__(self, x: Tensor) -> Tensor: - return self.forward(x) - -# %% -#| export -class Tanh: - """ - Tanh Activation: f(x) = tanh(x) - - Squashes input to range (-1, 1). Zero-centered output. - - TODO: Implement Tanh activation function. - """ - - def forward(self, x: Tensor) -> Tensor: - """ - Apply Tanh: f(x) = tanh(x) - - Args: - x: Input tensor - - Returns: - Output tensor with Tanh applied element-wise - - TODO: Implement tanh function - """ - raise NotImplementedError("Student implementation required") - - def __call__(self, x: Tensor) -> Tensor: - return self.forward(x) - -# %% -#| hide -#| export -class Tanh: - """Tanh Activation: f(x) = tanh(x)""" - - def forward(self, x: Tensor) -> Tensor: - """Apply Tanh""" - return Tensor(np.tanh(x.data)) - - def __call__(self, x: Tensor) -> Tensor: - return self.forward(x) - # %% [markdown] """ -### πŸ§ͺ Test Your Activation Functions +### πŸ§ͺ Test Activation Functions from Activations Module -Once you implement the activation functions above, run this cell to test them: +Let's test that we can use the activation functions from the activations module: """ # %% -# Test activation functions +# Test activation functions from activations module try: - print("=== Testing Activation Functions ===") + print("=== Testing Activation Functions from Activations Module ===") # Test data: mix of positive, negative, and zero x = Tensor([[-2.0, -1.0, 0.0, 1.0, 2.0]]) print(f"Input: {x.data}") - # Test ReLU + # Test ReLU from activations module relu = ReLU() y_relu = relu(x) print(f"ReLU output: {y_relu.data}") - # Test Sigmoid + # Test Sigmoid from activations module sigmoid = Sigmoid() y_sigmoid = sigmoid(x) print(f"Sigmoid output: {y_sigmoid.data}") - # Test Tanh + # Test Tanh from activations module tanh = Tanh() y_tanh = tanh(x) print(f"Tanh output: {y_tanh.data}") - print("βœ… Activation functions working!") + print("βœ… Activation functions from activations module working!") + print("πŸŽ‰ Clean architecture: layers module uses activations module!") except Exception as e: print(f"❌ Error: {e}") - print("Make sure to implement the activation functions above!") + print("Make sure the activations module is properly exported!") # %% [markdown] """ @@ -418,6 +296,11 @@ Input β†’ Dense β†’ ReLU β†’ Dense β†’ Sigmoid β†’ Output ``` This is a 2-layer neural network that can learn complex nonlinear patterns! + +**Notice the clean architecture**: +- Dense layers handle linear transformations +- Activation functions (from activations module) handle nonlinearity +- Composition creates complex behaviors from simple building blocks """ # %% @@ -431,9 +314,9 @@ try: # Output: 2 neurons with Sigmoid layer1 = Dense(input_size=3, output_size=4) - activation1 = ReLU() + activation1 = ReLU() # From activations module layer2 = Dense(input_size=4, output_size=2) - activation2 = Sigmoid() + activation2 = Sigmoid() # From activations module print("Network architecture:") print(f" Input: 3 features") @@ -458,28 +341,36 @@ try: print(f"Output values: {output.data}") print("\nπŸŽ‰ Neural network working! You just built your first neural network!") + print("πŸ—οΈ Clean architecture: Dense layers + Activations module = Neural Network") print("Notice how the network transforms 3D input into 2D output through learned transformations.") except Exception as e: print(f"❌ Error: {e}") - print("Make sure to implement the layers and activations above!") + print("Make sure to implement the layers and check activations module!") # %% [markdown] """ ## Step 4: Understanding What We Built -Congratulations! You just implemented the fundamental building blocks of neural networks: +Congratulations! You just implemented a clean, modular neural network architecture: ### 🧱 **What You Built** 1. **Dense Layer**: Linear transformation `y = Wx + b` -2. **Activation Functions**: Nonlinear transformations (ReLU, Sigmoid, Tanh) +2. **Activation Functions**: Imported from activations module (ReLU, Sigmoid, Tanh) 3. **Layer Composition**: Chaining layers to build networks +### πŸ—οΈ **Clean Architecture Benefits** +- **Separation of concerns**: Math functions vs. layer building blocks +- **Reusability**: Activations can be used across different modules +- **Maintainability**: One place to update activation implementations +- **Composability**: Clean imports make complex networks easier to build + ### 🎯 **Key Insights** - **Layers are functions**: They transform tensors from one space to another - **Composition creates complexity**: Simple layers β†’ complex networks - **Nonlinearity is crucial**: Without it, deep networks are just linear transformations - **Neural networks are function approximators**: They learn to map inputs to outputs +- **Modular design**: Building blocks can be combined in many ways ### πŸš€ **What's Next** In the next modules, you'll learn: @@ -498,7 +389,7 @@ Then test your implementation: python bin/tito.py test --module layers ``` -**Great job! You've built the foundation of neural networks!** πŸŽ‰ +**Great job! You've built a clean, modular foundation for neural networks!** πŸŽ‰ """ # %% @@ -514,9 +405,9 @@ try: # Build a 3-layer network for digit classification # 784 β†’ 128 β†’ 64 β†’ 10 layer1 = Dense(input_size=image_size, output_size=128) - relu1 = ReLU() + relu1 = ReLU() # From activations module layer2 = Dense(input_size=128, output_size=64) - relu2 = ReLU() + relu2 = ReLU() # From activations module layer3 = Dense(input_size=64, output_size=num_classes) softmax = Sigmoid() # Using Sigmoid as a simple "probability-like" output @@ -541,8 +432,38 @@ try: print(f" Sample predictions: {predictions.data[0]}") # First image predictions print("\nπŸŽ‰ You built a neural network that could classify images!") + print("πŸ—οΈ Clean architecture: Dense layers + Activations module = Image Classifier") print("With training, this network could learn to recognize handwritten digits!") except Exception as e: print(f"❌ Error: {e}") - print("Check your layer implementations!") \ No newline at end of file + print("Check your layer implementations and activations module!") + +# %% [markdown] +""" +## πŸŽ“ Module Summary + +### What You Learned +1. **Layer Architecture**: Dense layers as linear transformations +2. **Clean Dependencies**: Layers module uses activations module +3. **Function Composition**: Simple building blocks β†’ complex networks +4. **Modular Design**: Separation of concerns for maintainable code + +### Key Architectural Insight +``` +activations (math functions) β†’ layers (building blocks) β†’ networks (applications) +``` + +This clean dependency graph makes the system: +- **Understandable**: Each module has a clear purpose +- **Testable**: Each module can be tested independently +- **Reusable**: Components can be used across different contexts +- **Maintainable**: Changes are localized to appropriate modules + +### Next Steps +- **Training**: Learn how networks learn from data +- **Advanced Architectures**: CNNs, RNNs, Transformers +- **Applications**: Real-world machine learning problems + +**Congratulations on building a clean, modular neural network foundation!** πŸš€ +""" \ No newline at end of file diff --git a/modules/layers/tests/test_layers.py b/modules/layers/tests/test_layers.py index c85e208a..4f0af00b 100644 --- a/modules/layers/tests/test_layers.py +++ b/modules/layers/tests/test_layers.py @@ -18,7 +18,11 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) # Import from the module's development file # Note: This imports the instructor version with full implementation -from layers_dev import Dense, ReLU, Sigmoid, Tanh, Tensor +from layers_dev import Dense, Tensor + +# Import activation functions from the activations module +sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(__file__)), '..', 'activations')) +from activations_dev import ReLU, Sigmoid, Tanh def safe_numpy(tensor): """Get numpy array from tensor, using .numpy() if available, otherwise .data""" diff --git a/tinytorch/_modidx.py b/tinytorch/_modidx.py index fe281e09..6c56fd46 100644 --- a/tinytorch/_modidx.py +++ b/tinytorch/_modidx.py @@ -5,7 +5,30 @@ d = { 'settings': { 'branch': 'main', 'doc_host': 'https://tinytorch.github.io', 'git_url': 'https://github.com/tinytorch/TinyTorch/', 'lib_path': 'tinytorch'}, - 'syms': { 'tinytorch.core.tensor': { 'tinytorch.core.tensor.Tensor': ('tensor/tensor_dev.html#tensor', 'tinytorch/core/tensor.py'), + 'syms': { 'tinytorch.core.activations': {}, + 'tinytorch.core.layers': { 'tinytorch.core.layers.Dense': ('layers/layers_dev.html#dense', 'tinytorch/core/layers.py'), + 'tinytorch.core.layers.Dense.__call__': ( 'layers/layers_dev.html#dense.__call__', + 'tinytorch/core/layers.py'), + 'tinytorch.core.layers.Dense.__init__': ( 'layers/layers_dev.html#dense.__init__', + 'tinytorch/core/layers.py'), + 'tinytorch.core.layers.Dense.forward': ( 'layers/layers_dev.html#dense.forward', + 'tinytorch/core/layers.py'), + 'tinytorch.core.layers.ReLU': ('layers/layers_dev.html#relu', 'tinytorch/core/layers.py'), + 'tinytorch.core.layers.ReLU.__call__': ( 'layers/layers_dev.html#relu.__call__', + 'tinytorch/core/layers.py'), + 'tinytorch.core.layers.ReLU.forward': ( 'layers/layers_dev.html#relu.forward', + 'tinytorch/core/layers.py'), + 'tinytorch.core.layers.Sigmoid': ('layers/layers_dev.html#sigmoid', 'tinytorch/core/layers.py'), + 'tinytorch.core.layers.Sigmoid.__call__': ( 'layers/layers_dev.html#sigmoid.__call__', + 'tinytorch/core/layers.py'), + 'tinytorch.core.layers.Sigmoid.forward': ( 'layers/layers_dev.html#sigmoid.forward', + 'tinytorch/core/layers.py'), + 'tinytorch.core.layers.Tanh': ('layers/layers_dev.html#tanh', 'tinytorch/core/layers.py'), + 'tinytorch.core.layers.Tanh.__call__': ( 'layers/layers_dev.html#tanh.__call__', + 'tinytorch/core/layers.py'), + 'tinytorch.core.layers.Tanh.forward': ( 'layers/layers_dev.html#tanh.forward', + 'tinytorch/core/layers.py')}, + 'tinytorch.core.tensor': { 'tinytorch.core.tensor.Tensor': ('tensor/tensor_dev.html#tensor', 'tinytorch/core/tensor.py'), 'tinytorch.core.tensor.Tensor.__init__': ( 'tensor/tensor_dev.html#tensor.__init__', 'tinytorch/core/tensor.py'), 'tinytorch.core.tensor.Tensor.__repr__': ( 'tensor/tensor_dev.html#tensor.__repr__', @@ -22,7 +45,21 @@ d = { 'settings': { 'branch': 'main', 'tinytorch/core/tensor.py'), 'tinytorch.core.tensor._add_utility_methods': ( 'tensor/tensor_dev.html#_add_utility_methods', 'tinytorch/core/tensor.py')}, - 'tinytorch.core.utils': { 'tinytorch.core.utils.SystemInfo': ('setup/setup_dev.html#systeminfo', 'tinytorch/core/utils.py'), + 'tinytorch.core.utils': { 'tinytorch.core.utils.DeveloperProfile': ( 'setup/setup_dev.html#developerprofile', + 'tinytorch/core/utils.py'), + 'tinytorch.core.utils.DeveloperProfile.__init__': ( 'setup/setup_dev.html#developerprofile.__init__', + 'tinytorch/core/utils.py'), + 'tinytorch.core.utils.DeveloperProfile.__str__': ( 'setup/setup_dev.html#developerprofile.__str__', + 'tinytorch/core/utils.py'), + 'tinytorch.core.utils.DeveloperProfile._load_default_flame': ( 'setup/setup_dev.html#developerprofile._load_default_flame', + 'tinytorch/core/utils.py'), + 'tinytorch.core.utils.DeveloperProfile.get_ascii_art': ( 'setup/setup_dev.html#developerprofile.get_ascii_art', + 'tinytorch/core/utils.py'), + 'tinytorch.core.utils.DeveloperProfile.get_full_profile': ( 'setup/setup_dev.html#developerprofile.get_full_profile', + 'tinytorch/core/utils.py'), + 'tinytorch.core.utils.DeveloperProfile.get_signature': ( 'setup/setup_dev.html#developerprofile.get_signature', + 'tinytorch/core/utils.py'), + 'tinytorch.core.utils.SystemInfo': ('setup/setup_dev.html#systeminfo', 'tinytorch/core/utils.py'), 'tinytorch.core.utils.SystemInfo.__init__': ( 'setup/setup_dev.html#systeminfo.__init__', 'tinytorch/core/utils.py'), 'tinytorch.core.utils.SystemInfo.__str__': ( 'setup/setup_dev.html#systeminfo.__str__', diff --git a/tinytorch/core/activations.py b/tinytorch/core/activations.py new file mode 100644 index 00000000..beec6336 --- /dev/null +++ b/tinytorch/core/activations.py @@ -0,0 +1,58 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/activations/activations_dev.py. + +# %% auto 0 +__all__ = ['ReLU', 'Sigmoid', 'Tanh'] + +# %% ../../modules/activations/activations_dev.py auto 1 +import math +import numpy as np +import matplotlib.pyplot as plt +import os +import sys + +# TinyTorch imports +from tinytorch.core.tensor import Tensor + +# %% ../../modules/activations/activations_dev.py auto 2 +class ReLU: + """ReLU Activation: f(x) = max(0, x)""" + + def forward(self, x: Tensor) -> Tensor: + """Apply ReLU: f(x) = max(0, x)""" + return Tensor(np.maximum(0, x.data)) + + def __call__(self, x: Tensor) -> Tensor: + return self.forward(x) + +# %% ../../modules/activations/activations_dev.py auto 3 +class Sigmoid: + """Sigmoid Activation: f(x) = 1 / (1 + e^(-x))""" + + def forward(self, x: Tensor) -> Tensor: + """Apply Sigmoid with numerical stability""" + # Use the numerically stable version to avoid overflow + # For x >= 0: sigmoid(x) = 1 / (1 + exp(-x)) + # For x < 0: sigmoid(x) = exp(x) / (1 + exp(x)) + x_data = x.data + result = np.zeros_like(x_data) + + # Stable computation + positive_mask = x_data >= 0 + result[positive_mask] = 1.0 / (1.0 + np.exp(-x_data[positive_mask])) + result[~positive_mask] = np.exp(x_data[~positive_mask]) / (1.0 + np.exp(x_data[~positive_mask])) + + return Tensor(result) + + def __call__(self, x: Tensor) -> Tensor: + return self.forward(x) + +# %% ../../modules/activations/activations_dev.py auto 4 +class Tanh: + """Tanh Activation: f(x) = tanh(x)""" + + def forward(self, x: Tensor) -> Tensor: + """Apply Tanh""" + return Tensor(np.tanh(x.data)) + + def __call__(self, x: Tensor) -> Tensor: + return self.forward(x) \ No newline at end of file diff --git a/tinytorch/core/layers.py b/tinytorch/core/layers.py index 567b612a..6134438a 100644 --- a/tinytorch/core/layers.py +++ b/tinytorch/core/layers.py @@ -1,7 +1,7 @@ # AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/layers/layers_dev.ipynb. # %% auto 0 -__all__ = ['Dense', 'ReLU', 'Sigmoid', 'Tanh'] +__all__ = ['Dense'] # %% ../../modules/layers/layers_dev.ipynb 2 import numpy as np @@ -10,6 +10,9 @@ import sys from typing import Union, Optional, Callable from .tensor import Tensor +# Import activation functions from the activations module +from .activations import ReLU, Sigmoid, Tanh + # Import our Tensor class # sys.path.append('../../') # from modules.tensor.tensor_dev import Tensor @@ -109,130 +112,3 @@ class Dense: def __call__(self, x: Tensor) -> Tensor: """Make layer callable: layer(x) same as layer.forward(x)""" return self.forward(x) - -# %% ../../modules/layers/layers_dev.ipynb 9 -class ReLU: - """ - ReLU Activation: f(x) = max(0, x) - - The most popular activation function in deep learning. - Simple, effective, and computationally efficient. - - TODO: Implement ReLU activation function. - """ - - def forward(self, x: Tensor) -> Tensor: - """ - Apply ReLU: f(x) = max(0, x) - - Args: - x: Input tensor - - Returns: - Output tensor with ReLU applied element-wise - - TODO: Implement element-wise max(0, x) operation - """ - raise NotImplementedError("Student implementation required") - - def __call__(self, x: Tensor) -> Tensor: - """Make activation callable: relu(x) same as relu.forward(x)""" - return self.forward(x) - -# %% ../../modules/layers/layers_dev.ipynb 10 -class ReLU: - """ReLU Activation: f(x) = max(0, x)""" - - def forward(self, x: Tensor) -> Tensor: - """Apply ReLU: f(x) = max(0, x)""" - return Tensor(np.maximum(0, x.data)) - - def __call__(self, x: Tensor) -> Tensor: - return self.forward(x) - -# %% ../../modules/layers/layers_dev.ipynb 11 -class Sigmoid: - """ - Sigmoid Activation: f(x) = 1 / (1 + e^(-x)) - - Squashes input to range (0, 1). Often used for binary classification. - - TODO: Implement Sigmoid activation function. - """ - - def forward(self, x: Tensor) -> Tensor: - """ - Apply Sigmoid: f(x) = 1 / (1 + e^(-x)) - - Args: - x: Input tensor - - Returns: - Output tensor with Sigmoid applied element-wise - - TODO: Implement sigmoid function (be careful with numerical stability!) - """ - raise NotImplementedError("Student implementation required") - - def __call__(self, x: Tensor) -> Tensor: - return self.forward(x) - -# %% ../../modules/layers/layers_dev.ipynb 12 -class Sigmoid: - """Sigmoid Activation: f(x) = 1 / (1 + e^(-x))""" - - def forward(self, x: Tensor) -> Tensor: - """Apply Sigmoid with numerical stability""" - # Use the numerically stable version to avoid overflow - # For x >= 0: sigmoid(x) = 1 / (1 + exp(-x)) - # For x < 0: sigmoid(x) = exp(x) / (1 + exp(x)) - x_data = x.data - result = np.zeros_like(x_data) - - # Stable computation - positive_mask = x_data >= 0 - result[positive_mask] = 1.0 / (1.0 + np.exp(-x_data[positive_mask])) - result[~positive_mask] = np.exp(x_data[~positive_mask]) / (1.0 + np.exp(x_data[~positive_mask])) - - return Tensor(result) - - def __call__(self, x: Tensor) -> Tensor: - return self.forward(x) - -# %% ../../modules/layers/layers_dev.ipynb 13 -class Tanh: - """ - Tanh Activation: f(x) = tanh(x) - - Squashes input to range (-1, 1). Zero-centered output. - - TODO: Implement Tanh activation function. - """ - - def forward(self, x: Tensor) -> Tensor: - """ - Apply Tanh: f(x) = tanh(x) - - Args: - x: Input tensor - - Returns: - Output tensor with Tanh applied element-wise - - TODO: Implement tanh function - """ - raise NotImplementedError("Student implementation required") - - def __call__(self, x: Tensor) -> Tensor: - return self.forward(x) - -# %% ../../modules/layers/layers_dev.ipynb 14 -class Tanh: - """Tanh Activation: f(x) = tanh(x)""" - - def forward(self, x: Tensor) -> Tensor: - """Apply Tanh""" - return Tensor(np.tanh(x.data)) - - def __call__(self, x: Tensor) -> Tensor: - return self.forward(x) diff --git a/tinytorch/core/utils.py b/tinytorch/core/utils.py index df63d59a..ef2bdf91 100644 --- a/tinytorch/core/utils.py +++ b/tinytorch/core/utils.py @@ -1,22 +1,98 @@ # AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/setup/setup_dev.ipynb. # %% auto 0 -__all__ = ['hello_tinytorch', 'add_numbers', 'SystemInfo'] +__all__ = ['hello_tinytorch', 'add_numbers', 'SystemInfo', 'DeveloperProfile'] # %% ../../modules/setup/setup_dev.ipynb 3 def hello_tinytorch(): - """A simple hello world function for TinyTorch.""" - return "Hello from TinyTorch! πŸ”₯" + """ + A simple hello world function for TinyTorch. + + TODO: Implement this function to display TinyTorch ASCII art and welcome message. + Load the flame art from tinytorch_flame.txt file with graceful fallback. + """ + raise NotImplementedError("Student implementation required") + +def add_numbers(a, b): + """ + Add two numbers together. + + TODO: Implement addition of two numbers. + This is the foundation of all mathematical operations in ML. + """ + raise NotImplementedError("Student implementation required") + +# %% ../../modules/setup/setup_dev.ipynb 4 +def hello_tinytorch(): + """Display the TinyTorch ASCII art and welcome message.""" + try: + # Get the directory containing this file + current_dir = Path(__file__).parent + art_file = current_dir / "tinytorch_flame.txt" + + if art_file.exists(): + with open(art_file, 'r') as f: + ascii_art = f.read() + print(ascii_art) + print("TinyπŸ”₯Torch") + print("Build ML Systems from Scratch!") + else: + print("πŸ”₯ TinyTorch πŸ”₯") + print("Build ML Systems from Scratch!") + except NameError: + # Handle case when running in notebook where __file__ is not defined + try: + art_file = Path(os.getcwd()) / "tinytorch_flame.txt" + if art_file.exists(): + with open(art_file, 'r') as f: + ascii_art = f.read() + print(ascii_art) + print("TinyπŸ”₯Torch") + print("Build ML Systems from Scratch!") + else: + print("πŸ”₯ TinyTorch πŸ”₯") + print("Build ML Systems from Scratch!") + except: + print("πŸ”₯ TinyTorch πŸ”₯") + print("Build ML Systems from Scratch!") def add_numbers(a, b): """Add two numbers together.""" return a + b +# %% ../../modules/setup/setup_dev.ipynb 8 +class SystemInfo: + """ + Simple system information class. + + TODO: Implement this class to collect and display system information. + """ + + def __init__(self): + """ + Initialize system information collection. + + TODO: Collect Python version, platform, and machine information. + """ + raise NotImplementedError("Student implementation required") + + def __str__(self): + """ + Return human-readable system information. + + TODO: Format system info as a readable string. + """ + raise NotImplementedError("Student implementation required") + + def is_compatible(self): + """ + Check if system meets minimum requirements. + + TODO: Check if Python version is >= 3.8 + """ + raise NotImplementedError("Student implementation required") -# %% ../../modules/setup/setup_dev.ipynb 6 -import sys -import platform - +# %% ../../modules/setup/setup_dev.ipynb 9 class SystemInfo: """Simple system information class.""" @@ -32,3 +108,145 @@ class SystemInfo: """Check if system meets minimum requirements.""" return self.python_version >= (3, 8) +# %% ../../modules/setup/setup_dev.ipynb 13 +class DeveloperProfile: + """ + Developer profile for personalizing TinyTorch experience. + + TODO: Implement this class to store and display developer information. + Default to course instructor but allow students to personalize. + """ + + @staticmethod + def _load_default_flame(): + """ + Load the default TinyTorch flame ASCII art from file. + + TODO: Implement file loading for tinytorch_flame.txt with fallback. + """ + raise NotImplementedError("Student implementation required") + + def __init__(self, name="Vijay Janapa Reddi", affiliation="Harvard University", + email="vj@eecs.harvard.edu", github_username="profvjreddi", ascii_art=None): + """ + Initialize developer profile. + + TODO: Store developer information with sensible defaults. + Students should be able to customize this with their own info and ASCII art. + """ + raise NotImplementedError("Student implementation required") + + def __str__(self): + """ + Return formatted developer information. + + TODO: Format developer info as a professional signature with optional ASCII art. + """ + raise NotImplementedError("Student implementation required") + + def get_signature(self): + """ + Get a short signature for code headers. + + TODO: Return a concise signature like "Built by Name (@github)" + """ + raise NotImplementedError("Student implementation required") + + def get_ascii_art(self): + """ + Get ASCII art for the profile. + + TODO: Return custom ASCII art or default flame loaded from file. + """ + raise NotImplementedError("Student implementation required") + +# %% ../../modules/setup/setup_dev.ipynb 14 +class DeveloperProfile: + """Developer profile for personalizing TinyTorch experience.""" + + @staticmethod + def _load_default_flame(): + """Load the default TinyTorch flame ASCII art from file.""" + try: + # Try to load from the same directory as this module + try: + # Try to get the directory of the current file + current_dir = os.path.dirname(__file__) + except NameError: + # If __file__ is not defined (e.g., in notebook), use current directory + current_dir = os.getcwd() + + flame_path = os.path.join(current_dir, 'tinytorch_flame.txt') + + with open(flame_path, 'r', encoding='utf-8') as f: + flame_art = f.read() + + # Add the TinyπŸ”₯Torch text below the flame + return f"""{flame_art} + + TinyπŸ”₯Torch + Build ML Systems from Scratch! + """ + except (FileNotFoundError, IOError): + # Fallback to simple flame if file not found + return """ + πŸ”₯ TinyTorch Developer πŸ”₯ + . . . . . . + . . . . . . + . . . . . . . + . . . . . . . . + . . . . . . . . . + . . . . . . . . . . + . . . . . . . . . . . + . . . . . . . . . . . . + . . . . . . . . . . . . . +. . . . . . . . . . . . . . + \\ \\ \\ \\ \\ \\ \\ \\ \\ / / / / / / + \\ \\ \\ \\ \\ \\ \\ \\ / / / / / / + \\ \\ \\ \\ \\ \\ \\ / / / / / / + \\ \\ \\ \\ \\ \\ / / / / / / + \\ \\ \\ \\ \\ / / / / / / + \\ \\ \\ \\ / / / / / / + \\ \\ \\ / / / / / / + \\ \\ / / / / / / + \\ / / / / / / + \\/ / / / / / + \\/ / / / / + \\/ / / / + \\/ / / + \\/ / + \\/ + + TinyπŸ”₯Torch + Build ML Systems from Scratch! + """ + + def __init__(self, name="Vijay Janapa Reddi", affiliation="Harvard University", + email="vj@eecs.harvard.edu", github_username="profvjreddi", ascii_art=None): + self.name = name + self.affiliation = affiliation + self.email = email + self.github_username = github_username + self.ascii_art = ascii_art or self._load_default_flame() + + def __str__(self): + return f"πŸ‘¨β€πŸ’» {self.name} | {self.affiliation} | @{self.github_username}" + + def get_signature(self): + """Get a short signature for code headers.""" + return f"Built by {self.name} (@{self.github_username})" + + def get_ascii_art(self): + """Get ASCII art for the profile.""" + return self.ascii_art + + def get_full_profile(self): + """Get complete profile with ASCII art.""" + return f"""{self.ascii_art} + +πŸ‘¨β€πŸ’» Developer: {self.name} +πŸ›οΈ Affiliation: {self.affiliation} +πŸ“§ Email: {self.email} +πŸ™ GitHub: @{self.github_username} +πŸ”₯ Ready to build ML systems from scratch! +"""