diff --git a/_proc/_quarto.yml b/_proc/_quarto.yml new file mode 100644 index 00000000..59a1e49c --- /dev/null +++ b/_proc/_quarto.yml @@ -0,0 +1,22 @@ +project: + type: website + +format: + html: + theme: cosmo + css: styles.css + toc: true + keep-md: true + commonmark: default + +website: + twitter-card: true + open-graph: true + repo-actions: [issue] + navbar: + background: primary + search: true + sidebar: + style: floating + +metadata-files: [nbdev.yml, sidebar.yml] \ No newline at end of file diff --git a/_proc/activations/README.md b/_proc/activations/README.md new file mode 100644 index 00000000..9f37c496 --- /dev/null +++ b/_proc/activations/README.md @@ -0,0 +1,237 @@ +# πŸ”₯ TinyTorch Activations Module + +Welcome to the **Activations** module! This is where you'll implement the mathematical functions that give neural networks their power to learn complex patterns. + +## 🎯 Learning Objectives + +By the end of this module, you will: +1. **Understand** why activation functions are essential for neural networks +2. **Implement** the three most important activation functions: ReLU, Sigmoid, and Tanh +3. **Test** your functions with various inputs to understand their behavior +4. **Grasp** the mathematical properties that make each function useful + +## 🧠 Why This Module Matters + +**Without activation functions, neural networks are just linear transformations!** + +``` +Linear β†’ Linear β†’ Linear = Still just Linear +Linear β†’ Activation β†’ Linear = Can learn complex patterns! +``` + +This module teaches you the mathematical foundations that make deep learning possible. + +## πŸ“š What You'll Build + +### 1. **ReLU** (Rectified Linear Unit) +- **Formula**: `f(x) = max(0, x)` +- **Properties**: Simple, sparse, unbounded +- **Use case**: Hidden layers (most common) + +### 2. **Sigmoid** +- **Formula**: `f(x) = 1 / (1 + e^(-x))` +- **Properties**: Bounded to (0,1), smooth, probabilistic +- **Use case**: Binary classification, gates + +### 3. **Tanh** (Hyperbolic Tangent) +- **Formula**: `f(x) = tanh(x)` +- **Properties**: Bounded to (-1,1), zero-centered, smooth +- **Use case**: Hidden layers, RNNs + +## πŸš€ Getting Started + +### Development Workflow + +1. **Open the development file**: + ```bash + python bin/tito.py jupyter + # Then open modules/activations/activations_dev.py + ``` + +2. **Implement the functions**: + - Start with ReLU (simplest) + - Move to Sigmoid (numerical stability challenge) + - Finish with Tanh (symmetry properties) + +3. **Visualize your functions**: + - Each function has plotting sections + - See how your implementation transforms inputs + - Compare all functions side-by-side + +4. **Test as you go**: + ```bash + python bin/tito.py test --module activations + ``` + +5. **Export to package**: + ```bash + python bin/tito.py sync + ``` + +### πŸ“Š Visual Learning Features + +This module includes comprehensive plotting sections to help you understand: + +- **Individual Function Plots**: See each activation function's curve +- **Implementation Comparison**: Your implementation vs ideal side-by-side +- **Mathematical Explanations**: Visual breakdown of function properties +- **Error Analysis**: Quantitative feedback on implementation accuracy +- **Comprehensive Comparison**: All functions analyzed together + +**Enhanced Features**: +- **4-Panel Plots**: Implementation vs ideal, mathematical definition, properties, error analysis +- **Real-time Feedback**: Immediate accuracy scores with color-coded status +- **Mathematical Insights**: Detailed explanations of function properties +- **Numerical Stability Testing**: Verification with extreme values +- **Property Verification**: Symmetry, monotonicity, and zero-centering tests + +**Why enhanced plots matter**: +- **Visual Debugging**: See exactly where your implementation differs +- **Quantitative Feedback**: Get precise error measurements +- **Mathematical Understanding**: Connect formulas to visual behavior +- **Implementation Confidence**: Know immediately if your code is correct +- **Learning Reinforcement**: Multiple visual perspectives of the same concept + +### Implementation Tips + +#### ReLU Implementation +```python +def forward(self, x: Tensor) -> Tensor: + return Tensor(np.maximum(0, x.data)) +``` + +#### Sigmoid Implementation (Numerical Stability) +```python +def forward(self, x: Tensor) -> Tensor: + # For x >= 0: sigmoid(x) = 1 / (1 + exp(-x)) + # For x < 0: sigmoid(x) = exp(x) / (1 + exp(x)) + x_data = x.data + result = np.zeros_like(x_data) + + positive_mask = x_data >= 0 + result[positive_mask] = 1.0 / (1.0 + np.exp(-x_data[positive_mask])) + result[~positive_mask] = np.exp(x_data[~positive_mask]) / (1.0 + np.exp(x_data[~positive_mask])) + + return Tensor(result) +``` + +#### Tanh Implementation +```python +def forward(self, x: Tensor) -> Tensor: + return Tensor(np.tanh(x.data)) +``` + +## πŸ§ͺ Testing Your Implementation + +### Unit Tests +```bash +python bin/tito.py test --module activations +``` + +**Test Coverage**: +- βœ… Mathematical correctness +- βœ… Numerical stability +- βœ… Shape preservation +- βœ… Edge cases +- βœ… Function properties + +### Manual Testing +```python +# Test all activations +from tinytorch.core.tensor import Tensor +from modules.activations.activations_dev import ReLU, Sigmoid, Tanh + +x = Tensor([[-2.0, -1.0, 0.0, 1.0, 2.0]]) + +relu = ReLU() +sigmoid = Sigmoid() +tanh = Tanh() + +print("Input:", x.data) +print("ReLU:", relu(x).data) +print("Sigmoid:", sigmoid(x).data) +print("Tanh:", tanh(x).data) +``` + +## πŸ“Š Understanding Function Properties + +### Range Comparison +| Function | Input Range | Output Range | Zero Point | +|----------|-------------|--------------|------------| +| ReLU | (-∞, ∞) | [0, ∞) | f(0) = 0 | +| Sigmoid | (-∞, ∞) | (0, 1) | f(0) = 0.5 | +| Tanh | (-∞, ∞) | (-1, 1) | f(0) = 0 | + +### Key Properties +- **ReLU**: Sparse (zeros out negatives), unbounded, simple +- **Sigmoid**: Probabilistic (0-1 range), smooth, saturating +- **Tanh**: Zero-centered, symmetric, stronger gradients than sigmoid + +## πŸ”§ Integration with TinyTorch + +After implementation, your activations will be available as: + +```python +from tinytorch.core.activations import ReLU, Sigmoid, Tanh + +# Use in neural networks +relu = ReLU() +output = relu(input_tensor) +``` + +## 🎯 Common Issues & Solutions + +### Issue 1: Sigmoid Overflow +**Problem**: `exp()` overflow with large inputs +**Solution**: Use numerically stable implementation (see code above) + +### Issue 2: Wrong Output Range +**Problem**: Sigmoid/Tanh outputs outside expected range +**Solution**: Check your mathematical implementation + +### Issue 3: Shape Mismatch +**Problem**: Output shape differs from input shape +**Solution**: Ensure element-wise operations preserve shape + +### Issue 4: Import Errors +**Problem**: Cannot import after implementation +**Solution**: Run `python bin/tito.py sync` to export to package + +## πŸ“ˆ Performance Considerations + +- **ReLU**: Fastest (simple max operation) +- **Sigmoid**: Moderate (exponential computation) +- **Tanh**: Moderate (hyperbolic function) + +All implementations use NumPy for vectorized operations. + +## πŸš€ What's Next + +After mastering activations, you'll use them in: +1. **Layers Module**: Building neural network layers +2. **Loss Functions**: Computing training objectives +3. **Advanced Architectures**: CNNs, RNNs, and more + +These functions are the mathematical foundation for everything that follows! + +## πŸ“š Further Reading + +**Mathematical Background**: +- [Activation Functions in Neural Networks](https://en.wikipedia.org/wiki/Activation_function) +- [Deep Learning Book - Chapter 6](http://www.deeplearningbook.org/) + +**Advanced Topics**: +- ReLU variants (Leaky ReLU, ELU, Swish) +- Activation function choice and impact +- Gradient flow and vanishing gradients + +## πŸŽ‰ Success Criteria + +You've mastered this module when: +- [ ] All tests pass (`python bin/tito.py test --module activations`) +- [ ] You understand why each function is useful +- [ ] You can explain the mathematical properties +- [ ] You can use activations in neural networks +- [ ] You appreciate the importance of nonlinearity + +**Great work! You've built the mathematical foundation of neural networks!** πŸŽ‰ \ No newline at end of file diff --git a/_proc/activations/activations_dev b/_proc/activations/activations_dev new file mode 100644 index 00000000..582933ff --- /dev/null +++ b/_proc/activations/activations_dev @@ -0,0 +1,1162 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.17.1 +# --- + +# %% [markdown] +""" +# πŸ”₯ TinyTorch Activations Module + +Welcome to the **Activations** module! This is where you'll implement the mathematical functions that give neural networks their power. + +## 🎯 Learning Objectives + +By the end of this module, you will: +1. **Understand** why activation functions are essential for neural networks +2. **Implement** the three most important activation functions: ReLU, Sigmoid, and Tanh +3. **Test** your functions with various inputs to understand their behavior +4. **Use** these functions as building blocks for neural networks + +## 🧠 Why Activation Functions Matter + +**Without activation functions, neural networks are just linear transformations!** + +``` +Linear β†’ Linear β†’ Linear = Still just Linear +Linear β†’ Activation β†’ Linear = Can learn complex patterns! +``` + +**Key insight**: Activation functions add **nonlinearity**, allowing networks to learn complex patterns that linear functions cannot capture. + +## πŸ“š What You'll Build + +- **ReLU**: `f(x) = max(0, x)` - The workhorse of deep learning +- **Sigmoid**: `f(x) = 1 / (1 + e^(-x))` - Squashes to (0, 1) +- **Tanh**: `f(x) = tanh(x)` - Squashes to (-1, 1) + +Each function serves different purposes and has different mathematical properties. + +--- + +Let's start building! πŸš€ +""" + +# %% +#| default_exp core.activations + +# Standard library imports +import math +import numpy as np +import matplotlib.pyplot as plt +import os +import sys + +# TinyTorch imports +from tinytorch.core.tensor import Tensor + +# %% +# Helper function to detect if we're in a testing environment +def _should_show_plots(): + """ + Determine if we should show plots based on the execution context. + + Returns False if: + - Running in pytest (detected by 'pytest' in sys.modules) + - Running in test environment (detected by environment variables) + - Running from command line test runner + + Returns True if: + - Running in Jupyter notebook + - Running interactively in Python + """ + # Check if we're running in pytest + if 'pytest' in sys.modules: + return False + + # Check if we're in a test environment + if os.environ.get('PYTEST_CURRENT_TEST'): + return False + + # Check if we're running from a test file (more specific check) + if any(arg.endswith('.py') and 'test_' in os.path.basename(arg) and 'tests/' in arg for arg in sys.argv): + return False + + # Check if we're running from the tito CLI test command + if len(sys.argv) > 0 and 'tito.py' in sys.argv[0] and 'test' in sys.argv: + return False + + # Default to showing plots (notebook/interactive environment) + return True + +# %% [markdown] +""" +## Step 1: ReLU Activation Function + +**ReLU** (Rectified Linear Unit) is the most popular activation function in deep learning. + +**Formula**: `f(x) = max(0, x)` + +**Properties**: +- **Simple**: Easy to compute and understand +- **Sparse**: Outputs exactly zero for negative inputs +- **Unbounded**: No upper limit on positive outputs +- **Non-saturating**: Doesn't suffer from vanishing gradients + +**When to use**: Almost everywhere! It's the default choice for hidden layers. +""" + +# %% +#| export +class ReLU: + """ + ReLU Activation: f(x) = max(0, x) + + The most popular activation function in deep learning. + Simple, effective, and computationally efficient. + + TODO: Implement ReLU activation function. + """ + + def forward(self, x: Tensor) -> Tensor: + """ + Apply ReLU: f(x) = max(0, x) + + Args: + x: Input tensor + + Returns: + Output tensor with ReLU applied element-wise + + TODO: Implement element-wise max(0, x) operation + Hint: Use np.maximum(0, x.data) + """ + raise NotImplementedError("Student implementation required") + + def __call__(self, x: Tensor) -> Tensor: + """Make activation callable: relu(x) same as relu.forward(x)""" + return self.forward(x) + +# %% +#| hide +#| export +class ReLU: + """ReLU Activation: f(x) = max(0, x)""" + + def forward(self, x: Tensor) -> Tensor: + """Apply ReLU: f(x) = max(0, x)""" + return Tensor(np.maximum(0, x.data)) + + def __call__(self, x: Tensor) -> Tensor: + return self.forward(x) + +# %% [markdown] +""" +### πŸ§ͺ Test Your ReLU Function + +Once you implement ReLU above, run this cell to test it: +""" + +# %% +# Test ReLU function +try: + print("=== Testing ReLU Function ===") + + # Test data: mix of positive, negative, and zero + x = Tensor([[-3.0, -1.0, 0.0, 1.0, 3.0]]) + print(f"Input: {x.data}") + + # Test ReLU + relu = ReLU() + y = relu(x) + print(f"ReLU output: {y.data}") + print(f"Expected: [[0. 0. 0. 1. 3.]]") + + # Test with different shapes + x_2d = Tensor([[-2.0, 1.0], [0.5, -0.5]]) + y_2d = relu(x_2d) + print(f"\n2D Input: {x_2d.data}") + print(f"2D ReLU output: {y_2d.data}") + + print("βœ… ReLU working!") + +except Exception as e: + print(f"❌ Error: {e}") + print("Make sure to implement the ReLU function above!") + +# %% [markdown] +""" +### πŸ“Š Visualize ReLU Function + +Let's plot the ReLU function to see how it transforms inputs: +""" + +# %% +# Plot ReLU function +try: + print("=== Plotting ReLU Function ===") + + # Create a range of input values + x_range = np.linspace(-5, 5, 100) + x_tensor = Tensor([x_range]) + + # Apply ReLU (student implementation) + relu = ReLU() + y_tensor = relu(x_tensor) + y_range = y_tensor.data[0] + + # Create ideal ReLU for comparison + y_ideal = np.maximum(0, x_range) + + # Only show plots if we're not in a testing environment + if _should_show_plots(): + # Create the plot + plt.figure(figsize=(12, 8)) + + # Plot both student implementation and ideal + plt.subplot(2, 2, 1) + plt.plot(x_range, y_range, 'b-', linewidth=3, label='Your ReLU Implementation') + plt.plot(x_range, y_ideal, 'r--', linewidth=2, alpha=0.7, label='Ideal ReLU') + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.axvline(x=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('Output') + plt.title('ReLU: Your Implementation vs Ideal') + plt.grid(True, alpha=0.3) + plt.legend() + plt.xlim(-5, 5) + plt.ylim(-1, 5) + + # Mathematical explanation plot + plt.subplot(2, 2, 2) + # Show the mathematical definition + x_math = np.array([-3, -2, -1, 0, 1, 2, 3]) + y_math = np.maximum(0, x_math) + plt.stem(x_math, y_math, basefmt=' ', linefmt='g-', markerfmt='go') + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.axvline(x=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('max(0, x)') + plt.title('Mathematical Definition: max(0, x)') + plt.grid(True, alpha=0.3) + plt.xlim(-4, 4) + plt.ylim(-0.5, 3.5) + + # Show the piecewise nature + plt.subplot(2, 2, 3) + x_left = np.linspace(-5, 0, 50) + x_right = np.linspace(0, 5, 50) + plt.plot(x_left, np.zeros_like(x_left), 'r-', linewidth=3, label='f(x) = 0 for x < 0') + plt.plot(x_right, x_right, 'b-', linewidth=3, label='f(x) = x for x β‰₯ 0') + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.axvline(x=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('Output') + plt.title('Piecewise Function Definition') + plt.grid(True, alpha=0.3) + plt.legend() + plt.xlim(-5, 5) + plt.ylim(-1, 5) + + # Error analysis + plt.subplot(2, 2, 4) + difference = np.abs(y_range - y_ideal) + max_error = np.max(difference) + plt.plot(x_range, difference, 'purple', linewidth=2) + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('|Your Output - Ideal Output|') + plt.title(f'Implementation Error (Max: {max_error:.6f})') + plt.grid(True, alpha=0.3) + plt.xlim(-5, 5) + + plt.tight_layout() + plt.show() + + # Print analysis + print(f"\nπŸ“Š Analysis:") + print(f"βœ… Maximum error: {max_error:.10f}") + if max_error < 1e-10: + print("πŸŽ‰ Perfect implementation!") + elif max_error < 1e-6: + print("🌟 Excellent implementation!") + elif max_error < 1e-3: + print("πŸ‘ Good implementation!") + else: + print("πŸ”§ Implementation needs work.") + + print(f"πŸ“ˆ Function properties:") + print(f" β€’ Range: [0, ∞)") + print(f" β€’ Piecewise: f(x) = 0 for x < 0, f(x) = x for x β‰₯ 0") + print(f" β€’ Monotonic: Always increasing for x β‰₯ 0") + print(f" β€’ Sparse: Exactly zero for negative inputs") + else: + print("πŸ“Š Plots disabled during testing - this is normal!") + + # Always show the mathematical analysis + difference = np.abs(y_range - y_ideal) + max_error = np.max(difference) + print(f"\nπŸ“Š Mathematical Analysis:") + print(f"βœ… Maximum error: {max_error:.10f}") + if max_error < 1e-10: + print("πŸŽ‰ Perfect implementation!") + elif max_error < 1e-6: + print("🌟 Excellent implementation!") + elif max_error < 1e-3: + print("πŸ‘ Good implementation!") + else: + print("πŸ”§ Implementation needs work.") + +except Exception as e: + print(f"❌ Error in plotting: {e}") + print("Make sure to implement the ReLU function above!") + +# %% [markdown] +""" +## Step 2: Sigmoid Activation Function + +**Sigmoid** squashes any input to the range (0, 1), making it useful for probabilities. + +**Formula**: `f(x) = 1 / (1 + e^(-x))` + +**Properties**: +- **Bounded**: Always outputs between 0 and 1 +- **Smooth**: Differentiable everywhere +- **S-shaped**: Smooth transition from 0 to 1 +- **Saturating**: Can suffer from vanishing gradients + +**When to use**: Binary classification (final layer), gates in RNNs/LSTMs. + +**⚠️ Numerical Stability**: Be careful with large inputs to avoid overflow! +""" + +# %% +#| export +class Sigmoid: + """ + Sigmoid Activation: f(x) = 1 / (1 + e^(-x)) + + Squashes input to range (0, 1). Often used for binary classification. + + TODO: Implement Sigmoid activation function. + """ + + def forward(self, x: Tensor) -> Tensor: + """ + Apply Sigmoid: f(x) = 1 / (1 + e^(-x)) + + Args: + x: Input tensor + + Returns: + Output tensor with Sigmoid applied element-wise + + TODO: Implement sigmoid function (be careful with numerical stability!) + + Hint: For numerical stability, use: + - For x >= 0: sigmoid(x) = 1 / (1 + exp(-x)) + - For x < 0: sigmoid(x) = exp(x) / (1 + exp(x)) + """ + raise NotImplementedError("Student implementation required") + + def __call__(self, x: Tensor) -> Tensor: + return self.forward(x) + +# %% +#| hide +#| export +class Sigmoid: + """Sigmoid Activation: f(x) = 1 / (1 + e^(-x))""" + + def forward(self, x: Tensor) -> Tensor: + """Apply Sigmoid with numerical stability""" + # Use the numerically stable version to avoid overflow + # For x >= 0: sigmoid(x) = 1 / (1 + exp(-x)) + # For x < 0: sigmoid(x) = exp(x) / (1 + exp(x)) + x_data = x.data + result = np.zeros_like(x_data) + + # Stable computation + positive_mask = x_data >= 0 + result[positive_mask] = 1.0 / (1.0 + np.exp(-x_data[positive_mask])) + result[~positive_mask] = np.exp(x_data[~positive_mask]) / (1.0 + np.exp(x_data[~positive_mask])) + + return Tensor(result) + + def __call__(self, x: Tensor) -> Tensor: + return self.forward(x) + +# %% [markdown] +""" +### πŸ§ͺ Test Your Sigmoid Function + +Once you implement Sigmoid above, run this cell to test it: +""" + +# %% +# Test Sigmoid function +try: + print("=== Testing Sigmoid Function ===") + + # Test data: mix of positive, negative, and zero + x = Tensor([[-5.0, -1.0, 0.0, 1.0, 5.0]]) + print(f"Input: {x.data}") + + # Test Sigmoid + sigmoid = Sigmoid() + y = sigmoid(x) + print(f"Sigmoid output: {y.data}") + print("Expected: values between 0 and 1") + print(f"All values in (0,1)? {np.all((y.data > 0) & (y.data < 1))}") + + # Test specific values + x_zero = Tensor([[0.0]]) + y_zero = sigmoid(x_zero) + print(f"\nSigmoid(0) = {y_zero.data[0, 0]:.4f} (should be 0.5)") + + # Test extreme values (numerical stability) + x_extreme = Tensor([[-100.0, 100.0]]) + y_extreme = sigmoid(x_extreme) + print(f"Sigmoid([-100, 100]) = {y_extreme.data}") + print("Should be close to [0, 1] without overflow errors") + + print("βœ… Sigmoid working!") + +except Exception as e: + print(f"❌ Error: {e}") + print("Make sure to implement the Sigmoid function above!") + +# %% [markdown] +""" +### πŸ“Š Visualize Sigmoid Function + +Let's plot the Sigmoid function to see its S-shaped curve: +""" + +# %% +# Plot Sigmoid function +try: + print("=== Plotting Sigmoid Function ===") + + # Create a range of input values + x_range = np.linspace(-10, 10, 100) + x_tensor = Tensor([x_range]) + + # Apply Sigmoid (student implementation) + sigmoid = Sigmoid() + y_tensor = sigmoid(x_tensor) + y_range = y_tensor.data[0] + + # Create ideal Sigmoid for comparison + y_ideal = 1.0 / (1.0 + np.exp(-x_range)) + + # Only show plots if we're not in a testing environment + if _should_show_plots(): + # Create the plot + plt.figure(figsize=(12, 8)) + + # Plot both student implementation and ideal + plt.subplot(2, 2, 1) + plt.plot(x_range, y_range, 'g-', linewidth=3, label='Your Sigmoid Implementation') + plt.plot(x_range, y_ideal, 'r--', linewidth=2, alpha=0.7, label='Ideal Sigmoid') + plt.axhline(y=0.5, color='orange', linestyle='--', alpha=0.5, label='y = 0.5') + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.axhline(y=1, color='k', linestyle='-', alpha=0.3) + plt.axvline(x=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('Output') + plt.title('Sigmoid: Your Implementation vs Ideal') + plt.grid(True, alpha=0.3) + plt.legend() + plt.xlim(-10, 10) + plt.ylim(-0.1, 1.1) + + # Mathematical explanation plot + plt.subplot(2, 2, 2) + # Show key points + x_key = np.array([-5, -2, -1, 0, 1, 2, 5]) + y_key = 1.0 / (1.0 + np.exp(-x_key)) + plt.stem(x_key, y_key, basefmt=' ', linefmt='orange', markerfmt='o') + plt.axhline(y=0.5, color='orange', linestyle='--', alpha=0.5) + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.axhline(y=1, color='k', linestyle='-', alpha=0.3) + plt.axvline(x=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('1/(1+e^(-x))') + plt.title('Mathematical Definition: 1/(1+e^(-x))') + plt.grid(True, alpha=0.3) + plt.xlim(-6, 6) + plt.ylim(-0.1, 1.1) + + # Show the S-curve properties + plt.subplot(2, 2, 3) + x_detailed = np.linspace(-8, 8, 200) + y_detailed = 1.0 / (1.0 + np.exp(-x_detailed)) + plt.plot(x_detailed, y_detailed, 'g-', linewidth=3) + # Add asymptotes + plt.axhline(y=0, color='r', linestyle='--', alpha=0.7, label='Lower asymptote: y = 0') + plt.axhline(y=1, color='r', linestyle='--', alpha=0.7, label='Upper asymptote: y = 1') + plt.axhline(y=0.5, color='orange', linestyle='--', alpha=0.7, label='Midpoint: y = 0.5') + plt.axvline(x=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('Output') + plt.title('S-Curve Properties') + plt.grid(True, alpha=0.3) + plt.legend() + plt.xlim(-8, 8) + plt.ylim(-0.1, 1.1) + + # Error analysis + plt.subplot(2, 2, 4) + difference = np.abs(y_range - y_ideal) + max_error = np.max(difference) + plt.plot(x_range, difference, 'purple', linewidth=2) + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('|Your Output - Ideal Output|') + plt.title(f'Implementation Error (Max: {max_error:.6f})') + plt.grid(True, alpha=0.3) + plt.xlim(-10, 10) + + plt.tight_layout() + plt.show() + + # Print analysis + print(f"\nπŸ“Š Analysis:") + print(f"βœ… Maximum error: {max_error:.10f}") + if max_error < 1e-10: + print("πŸŽ‰ Perfect implementation!") + elif max_error < 1e-6: + print("🌟 Excellent implementation!") + elif max_error < 1e-3: + print("πŸ‘ Good implementation!") + else: + print("πŸ”§ Implementation needs work.") + + print(f"πŸ“ˆ Function properties:") + print(f" β€’ Range: (0, 1)") + print(f" β€’ Symmetric around (0, 0.5)") + print(f" β€’ Smooth and differentiable everywhere") + print(f" β€’ Saturates for large |x| (vanishing gradient problem)") + print(f" β€’ Useful for binary classification (outputs probabilities)") + else: + print("πŸ“Š Plots disabled during testing - this is normal!") + + # Always show the mathematical analysis + difference = np.abs(y_range - y_ideal) + max_error = np.max(difference) + print(f"\nπŸ“Š Mathematical Analysis:") + print(f"βœ… Maximum error: {max_error:.10f}") + if max_error < 1e-10: + print("πŸŽ‰ Perfect implementation!") + elif max_error < 1e-6: + print("🌟 Excellent implementation!") + elif max_error < 1e-3: + print("πŸ‘ Good implementation!") + else: + print("πŸ”§ Implementation needs work.") + +except Exception as e: + print(f"❌ Error in plotting: {e}") + print("Make sure to implement the Sigmoid function above!") + +# %% [markdown] +""" +## Step 3: Tanh Activation Function + +**Tanh** (Hyperbolic Tangent) squashes inputs to the range (-1, 1). + +**Formula**: `f(x) = tanh(x) = (e^x - e^(-x)) / (e^x + e^(-x))` + +**Properties**: +- **Bounded**: Always outputs between -1 and 1 +- **Zero-centered**: Output is centered around 0 +- **Smooth**: Differentiable everywhere +- **Stronger gradients**: Than sigmoid around zero + +**When to use**: Hidden layers when you want zero-centered outputs, RNNs. + +**Advantage over Sigmoid**: Zero-centered outputs help with gradient flow. +""" + +# %% +#| export +class Tanh: + """ + Tanh Activation: f(x) = tanh(x) + + Squashes input to range (-1, 1). Zero-centered output. + + TODO: Implement Tanh activation function. + """ + + def forward(self, x: Tensor) -> Tensor: + """ + Apply Tanh: f(x) = tanh(x) + + Args: + x: Input tensor + + Returns: + Output tensor with Tanh applied element-wise + + TODO: Implement tanh function + Hint: Use np.tanh(x.data) + """ + raise NotImplementedError("Student implementation required") + + def __call__(self, x: Tensor) -> Tensor: + return self.forward(x) + +# %% +#| hide +#| export +class Tanh: + """Tanh Activation: f(x) = tanh(x)""" + + def forward(self, x: Tensor) -> Tensor: + """Apply Tanh""" + return Tensor(np.tanh(x.data)) + + def __call__(self, x: Tensor) -> Tensor: + return self.forward(x) + +# %% [markdown] +""" +### πŸ§ͺ Test Your Tanh Function + +Once you implement Tanh above, run this cell to test it: +""" + +# %% +# Test Tanh function +try: + print("=== Testing Tanh Function ===") + + # Test data: mix of positive, negative, and zero + x = Tensor([[-3.0, -1.0, 0.0, 1.0, 3.0]]) + print(f"Input: {x.data}") + + # Test Tanh + tanh = Tanh() + y = tanh(x) + print(f"Tanh output: {y.data}") + print("Expected: values between -1 and 1") + print(f"All values in (-1,1)? {np.all((y.data > -1) & (y.data < 1))}") + + # Test specific values + x_zero = Tensor([[0.0]]) + y_zero = tanh(x_zero) + print(f"\nTanh(0) = {y_zero.data[0, 0]:.4f} (should be 0.0)") + + # Test extreme values + x_extreme = Tensor([[-10.0, 10.0]]) + y_extreme = tanh(x_extreme) + print(f"Tanh([-10, 10]) = {y_extreme.data}") + print("Should be close to [-1, 1]") + + print("βœ… Tanh working!") + +except Exception as e: + print(f"❌ Error: {e}") + print("Make sure to implement the Tanh function above!") + +# %% [markdown] +""" +### πŸ“Š Visualize Tanh Function + +Let's plot the Tanh function to see its zero-centered S-shaped curve: +""" + +# %% +# Plot Tanh function +try: + print("=== Plotting Tanh Function ===") + + # Create a range of input values + x_range = np.linspace(-5, 5, 100) + x_tensor = Tensor([x_range]) + + # Apply Tanh (student implementation) + tanh = Tanh() + y_tensor = tanh(x_tensor) + y_range = y_tensor.data[0] + + # Create ideal Tanh for comparison + y_ideal = np.tanh(x_range) + + # Only show plots if we're not in a testing environment + if _should_show_plots(): + # Create the plot + plt.figure(figsize=(12, 8)) + + # Plot both student implementation and ideal + plt.subplot(2, 2, 1) + plt.plot(x_range, y_range, 'orange', linewidth=3, label='Your Tanh Implementation') + plt.plot(x_range, y_ideal, 'r--', linewidth=2, alpha=0.7, label='Ideal Tanh') + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.axhline(y=1, color='k', linestyle='--', alpha=0.3) + plt.axhline(y=-1, color='k', linestyle='--', alpha=0.3) + plt.axvline(x=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('Output') + plt.title('Tanh: Your Implementation vs Ideal') + plt.grid(True, alpha=0.3) + plt.legend() + plt.xlim(-5, 5) + plt.ylim(-1.2, 1.2) + + # Mathematical explanation plot + plt.subplot(2, 2, 2) + # Show key points + x_key = np.array([-3, -2, -1, 0, 1, 2, 3]) + y_key = np.tanh(x_key) + plt.stem(x_key, y_key, basefmt=' ', linefmt='purple', markerfmt='o') + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.axhline(y=1, color='k', linestyle='--', alpha=0.3) + plt.axhline(y=-1, color='k', linestyle='--', alpha=0.3) + plt.axvline(x=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('tanh(x)') + plt.title('Mathematical Definition: tanh(x)') + plt.grid(True, alpha=0.3) + plt.xlim(-4, 4) + plt.ylim(-1.2, 1.2) + + # Show symmetry property + plt.subplot(2, 2, 3) + x_sym = np.linspace(-4, 4, 100) + y_sym = np.tanh(x_sym) + plt.plot(x_sym, y_sym, 'orange', linewidth=3, label='tanh(x)') + plt.plot(-x_sym, -y_sym, 'b--', linewidth=2, alpha=0.7, label='-tanh(-x)') + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.axhline(y=1, color='r', linestyle='--', alpha=0.7, label='Upper asymptote: y = 1') + plt.axhline(y=-1, color='r', linestyle='--', alpha=0.7, label='Lower asymptote: y = -1') + plt.axvline(x=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('Output') + plt.title('Symmetry: tanh(-x) = -tanh(x)') + plt.grid(True, alpha=0.3) + plt.legend() + plt.xlim(-4, 4) + plt.ylim(-1.2, 1.2) + + # Error analysis + plt.subplot(2, 2, 4) + difference = np.abs(y_range - y_ideal) + max_error = np.max(difference) + plt.plot(x_range, difference, 'purple', linewidth=2) + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('|Your Output - Ideal Output|') + plt.title(f'Implementation Error (Max: {max_error:.6f})') + plt.grid(True, alpha=0.3) + plt.xlim(-5, 5) + + plt.tight_layout() + plt.show() + + # Print analysis + print(f"\nπŸ“Š Analysis:") + print(f"βœ… Maximum error: {max_error:.10f}") + if max_error < 1e-10: + print("πŸŽ‰ Perfect implementation!") + elif max_error < 1e-6: + print("🌟 Excellent implementation!") + elif max_error < 1e-3: + print("πŸ‘ Good implementation!") + else: + print("πŸ”§ Implementation needs work.") + + print(f"πŸ“ˆ Function properties:") + print(f" β€’ Range: (-1, 1)") + print(f" β€’ Odd function: tanh(-x) = -tanh(x)") + print(f" β€’ Symmetric around origin (0, 0)") + print(f" β€’ Smooth and differentiable everywhere") + print(f" β€’ Stronger gradients than sigmoid around zero") + print(f" β€’ Related to sigmoid: tanh(x) = 2*sigmoid(2x) - 1") + else: + print("πŸ“Š Plots disabled during testing - this is normal!") + + # Always show the mathematical analysis + difference = np.abs(y_range - y_ideal) + max_error = np.max(difference) + print(f"\nπŸ“Š Mathematical Analysis:") + print(f"βœ… Maximum error: {max_error:.10f}") + if max_error < 1e-10: + print("πŸŽ‰ Perfect implementation!") + elif max_error < 1e-6: + print("🌟 Excellent implementation!") + elif max_error < 1e-3: + print("πŸ‘ Good implementation!") + else: + print("πŸ”§ Implementation needs work.") + +except Exception as e: + print(f"❌ Error in plotting: {e}") + print("Make sure to implement the Tanh function above!") + +# %% [markdown] +""" +## Step 4: Compare All Activation Functions + +Let's see how all three functions behave on the same input: +""" + +# %% +# Compare all activation functions +try: + print("=== Comparing All Activation Functions ===") + + # Test data: range from -5 to 5 + x = Tensor([[-5.0, -2.0, -1.0, 0.0, 1.0, 2.0, 5.0]]) + print(f"Input: {x.data}") + + # Apply all activations + relu = ReLU() + sigmoid = Sigmoid() + tanh = Tanh() + + y_relu = relu(x) + y_sigmoid = sigmoid(x) + y_tanh = tanh(x) + + print(f"\nReLU: {y_relu.data}") + print(f"Sigmoid: {y_sigmoid.data}") + print(f"Tanh: {y_tanh.data}") + + print("\nπŸ“Š Key Differences:") + print("- ReLU: Zeros out negative values, unbounded positive") + print("- Sigmoid: Squashes to (0, 1), always positive") + print("- Tanh: Squashes to (-1, 1), zero-centered") + + print("\nβœ… All activation functions working!") + +except Exception as e: + print(f"❌ Error: {e}") + print("Make sure to implement all activation functions above!") + +# %% [markdown] +""" +### πŸ“Š Comprehensive Activation Function Comparison + +Let's plot all three functions together to see their differences: +""" + +# %% +# Plot all activation functions together +try: + print("=== Plotting All Activation Functions Together ===") + + # Create a range of input values + x_range = np.linspace(-5, 5, 100) + x_tensor = Tensor([x_range]) + + # Apply all activations (student implementations) + relu = ReLU() + sigmoid = Sigmoid() + tanh = Tanh() + + y_relu = relu(x_tensor).data[0] + y_sigmoid = sigmoid(x_tensor).data[0] + y_tanh = tanh(x_tensor).data[0] + + # Create ideal functions for comparison + y_relu_ideal = np.maximum(0, x_range) + y_sigmoid_ideal = 1.0 / (1.0 + np.exp(-x_range)) + y_tanh_ideal = np.tanh(x_range) + + # Only show plots if we're not in a testing environment + if _should_show_plots(): + # Create the comprehensive plot + plt.figure(figsize=(15, 10)) + + # Main comparison plot + plt.subplot(2, 3, (1, 2)) + plt.plot(x_range, y_relu, 'b-', linewidth=3, label='Your ReLU') + plt.plot(x_range, y_sigmoid, 'g-', linewidth=3, label='Your Sigmoid') + plt.plot(x_range, y_tanh, 'orange', linewidth=3, label='Your Tanh') + + # Add ideal functions as dashed lines + plt.plot(x_range, y_relu_ideal, 'b--', linewidth=1, alpha=0.7, label='Ideal ReLU') + plt.plot(x_range, y_sigmoid_ideal, 'g--', linewidth=1, alpha=0.7, label='Ideal Sigmoid') + plt.plot(x_range, y_tanh_ideal, '--', color='orange', linewidth=1, alpha=0.7, label='Ideal Tanh') + + # Add reference lines + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.axhline(y=1, color='k', linestyle='--', alpha=0.3) + plt.axhline(y=-1, color='k', linestyle='--', alpha=0.3) + plt.axvline(x=0, color='k', linestyle='-', alpha=0.3) + + # Formatting + plt.xlabel('Input (x)', fontsize=12) + plt.ylabel('Output f(x)', fontsize=12) + plt.title('Activation Functions: Your Implementation vs Ideal', fontsize=14, fontweight='bold') + plt.grid(True, alpha=0.3) + plt.legend(fontsize=10, loc='upper left') + plt.xlim(-5, 5) + plt.ylim(-1.5, 5) + + # Mathematical definitions + plt.subplot(2, 3, 3) + plt.text(0.05, 0.95, 'Mathematical Definitions:', fontsize=12, fontweight='bold', + transform=plt.gca().transAxes, verticalalignment='top') + plt.text(0.05, 0.85, 'ReLU:', fontsize=11, fontweight='bold', color='blue', + transform=plt.gca().transAxes, verticalalignment='top') + plt.text(0.05, 0.80, 'f(x) = max(0, x)', fontsize=10, fontfamily='monospace', + transform=plt.gca().transAxes, verticalalignment='top') + plt.text(0.05, 0.70, 'Sigmoid:', fontsize=11, fontweight='bold', color='green', + transform=plt.gca().transAxes, verticalalignment='top') + plt.text(0.05, 0.65, 'f(x) = 1/(1+e^(-x))', fontsize=10, fontfamily='monospace', + transform=plt.gca().transAxes, verticalalignment='top') + plt.text(0.05, 0.55, 'Tanh:', fontsize=11, fontweight='bold', color='orange', + transform=plt.gca().transAxes, verticalalignment='top') + plt.text(0.05, 0.50, 'f(x) = tanh(x)', fontsize=10, fontfamily='monospace', + transform=plt.gca().transAxes, verticalalignment='top') + plt.text(0.05, 0.45, ' = (e^x-e^(-x))/(e^x+e^(-x))', fontsize=10, fontfamily='monospace', + transform=plt.gca().transAxes, verticalalignment='top') + + plt.text(0.05, 0.30, 'Key Properties:', fontsize=12, fontweight='bold', + transform=plt.gca().transAxes, verticalalignment='top') + plt.text(0.05, 0.25, 'β€’ ReLU: Sparse, unbounded', fontsize=10, color='blue', + transform=plt.gca().transAxes, verticalalignment='top') + plt.text(0.05, 0.20, 'β€’ Sigmoid: Bounded (0,1)', fontsize=10, color='green', + transform=plt.gca().transAxes, verticalalignment='top') + plt.text(0.05, 0.15, 'β€’ Tanh: Zero-centered (-1,1)', fontsize=10, color='orange', + transform=plt.gca().transAxes, verticalalignment='top') + plt.axis('off') + + # Error analysis for ReLU + plt.subplot(2, 3, 4) + error_relu = np.abs(y_relu - y_relu_ideal) + plt.plot(x_range, error_relu, 'b-', linewidth=2) + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('Error') + plt.title(f'ReLU Error (Max: {np.max(error_relu):.2e})') + plt.grid(True, alpha=0.3) + plt.xlim(-5, 5) + + # Error analysis for Sigmoid + plt.subplot(2, 3, 5) + error_sigmoid = np.abs(y_sigmoid - y_sigmoid_ideal) + plt.plot(x_range, error_sigmoid, 'g-', linewidth=2) + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('Error') + plt.title(f'Sigmoid Error (Max: {np.max(error_sigmoid):.2e})') + plt.grid(True, alpha=0.3) + plt.xlim(-5, 5) + + # Error analysis for Tanh + plt.subplot(2, 3, 6) + error_tanh = np.abs(y_tanh - y_tanh_ideal) + plt.plot(x_range, error_tanh, 'orange', linewidth=2) + plt.axhline(y=0, color='k', linestyle='-', alpha=0.3) + plt.xlabel('Input (x)') + plt.ylabel('Error') + plt.title(f'Tanh Error (Max: {np.max(error_tanh):.2e})') + plt.grid(True, alpha=0.3) + plt.xlim(-5, 5) + + plt.tight_layout() + plt.show() + + # Comprehensive analysis + print("\nπŸ“Š Comprehensive Analysis:") + print("=" * 60) + + # Function ranges + print("πŸ“ˆ Output Ranges:") + print(f" ReLU: [{np.min(y_relu):.3f}, {np.max(y_relu):.3f}]") + print(f" Sigmoid: [{np.min(y_sigmoid):.3f}, {np.max(y_sigmoid):.3f}]") + print(f" Tanh: [{np.min(y_tanh):.3f}, {np.max(y_tanh):.3f}]") + + # Implementation accuracy + print("\n🎯 Implementation Accuracy:") + max_errors = [np.max(error_relu), np.max(error_sigmoid), np.max(error_tanh)] + functions = ['ReLU', 'Sigmoid', 'Tanh'] + + for func, error in zip(functions, max_errors): + if error < 1e-10: + status = "βœ… PERFECT" + elif error < 1e-6: + status = "βœ… EXCELLENT" + elif error < 1e-3: + status = "⚠️ GOOD" + else: + status = "❌ NEEDS WORK" + print(f" {func:8s}: {status:12s} (error: {error:.2e})") + + # Mathematical properties verification + print("\nπŸ” Mathematical Properties:") + + # Zero-centered test + x_zero = Tensor([[0.0]]) + print(" Zero-centered test (f(0) should be 0):") + for name, func in [("ReLU", relu), ("Sigmoid", sigmoid), ("Tanh", tanh)]: + output = func(x_zero).data[0, 0] + is_zero = abs(output) < 1e-6 + expected = 0.0 if name != "Sigmoid" else 0.5 + print(f" {name:8s}: f(0) = {output:.4f} {'βœ…' if abs(output - expected) < 1e-6 else '❌'}") + + # Monotonicity test + print(" Monotonicity test (should be increasing):") + test_vals = np.array([-2, -1, 0, 1, 2]) + x_test = Tensor([test_vals]) + for name, func in [("ReLU", relu), ("Sigmoid", sigmoid), ("Tanh", tanh)]: + outputs = func(x_test).data[0] + is_monotonic = np.all(outputs[1:] >= outputs[:-1]) + print(f" {name:8s}: {'βœ… Monotonic' if is_monotonic else '❌ Not monotonic'}") + + print("\nπŸŽ‰ Comparison complete! Use these insights to understand each function's role in neural networks.") + else: + print("πŸ“Š Plots disabled during testing - this is normal!") + +except Exception as e: + print(f"❌ Error in plotting: {e}") + print("Make sure matplotlib is installed and all functions are implemented!") + +# %% [markdown] +""" +## Step 5: Understanding Activation Function Properties + +Let's explore the mathematical properties of each function: +""" + +# %% +# Explore activation function properties +try: + print("=== Activation Function Properties ===") + + # Create test functions + relu = ReLU() + sigmoid = Sigmoid() + tanh = Tanh() + + # Test with a range of values + test_values = np.linspace(-5, 5, 11) + x = Tensor([test_values]) + + print(f"Input range: {test_values}") + print(f"ReLU range: [{np.min(relu(x).data):.2f}, {np.max(relu(x).data):.2f}]") + print(f"Sigmoid range: [{np.min(sigmoid(x).data):.2f}, {np.max(sigmoid(x).data):.2f}]") + print(f"Tanh range: [{np.min(tanh(x).data):.2f}, {np.max(tanh(x).data):.2f}]") + + # Test monotonicity (should all be increasing functions) + print(f"\nπŸ“ˆ Monotonicity Test:") + for name, func in [("ReLU", relu), ("Sigmoid", sigmoid), ("Tanh", tanh)]: + outputs = func(x).data[0] + is_monotonic = np.all(outputs[1:] >= outputs[:-1]) + print(f"{name}: {'βœ… Monotonic' if is_monotonic else '❌ Not monotonic'}") + + # Test zero-centered property + print(f"\n🎯 Zero-Centered Test (f(0) = 0):") + x_zero = Tensor([[0.0]]) + for name, func in [("ReLU", relu), ("Sigmoid", sigmoid), ("Tanh", tanh)]: + output = func(x_zero).data[0, 0] + is_zero_centered = abs(output) < 1e-6 + print(f"{name}: f(0) = {output:.4f} {'βœ… Zero-centered' if is_zero_centered else '❌ Not zero-centered'}") + + print("\nπŸŽ‰ Property analysis complete!") + +except Exception as e: + print(f"❌ Error: {e}") + print("Check your activation function implementations!") + +# %% [markdown] +""" +## Step 6: Practical Usage Examples + +Let's see how these functions would be used in practice: +""" + +# %% +# Practical usage examples +try: + print("=== Practical Usage Examples ===") + + # Example 1: Binary classification with sigmoid + print("1. Binary Classification (Sigmoid):") + logits = Tensor([[2.5, -1.2, 0.8, -0.3]]) # Raw network outputs + sigmoid = Sigmoid() + probabilities = sigmoid(logits) + print(f" Logits: {logits.data}") + print(f" Probabilities: {probabilities.data}") + print(f" Predictions: {(probabilities.data > 0.5).astype(int)}") + + # Example 2: Feature processing with ReLU + print("\n2. Feature Processing (ReLU):") + features = Tensor([[-0.5, 1.2, -2.1, 0.8, -0.1]]) # Mixed positive/negative + relu = ReLU() + processed = relu(features) + print(f" Raw features: {features.data}") + print(f" After ReLU: {processed.data}") + print(f" Sparsity: {np.mean(processed.data == 0):.1%} zeros") + + # Example 3: Normalized features with Tanh + print("\n3. Normalized Features (Tanh):") + raw_features = Tensor([[3.2, -1.8, 0.5, -2.4, 1.1]]) + tanh = Tanh() + normalized = tanh(raw_features) + print(f" Raw features: {raw_features.data}") + print(f" Normalized: {normalized.data}") + print(f" Mean: {np.mean(normalized.data):.3f} (close to 0)") + + print("\nβœ… Practical examples complete!") + +except Exception as e: + print(f"❌ Error: {e}") + print("Check your activation function implementations!") + +# %% [markdown] +""" +## πŸŽ‰ Congratulations! + +You've successfully implemented the three most important activation functions in deep learning! + +### 🧱 What You Built +1. **ReLU**: The workhorse activation that enables deep networks +2. **Sigmoid**: The probability activation for binary classification +3. **Tanh**: The zero-centered activation for better gradient flow + +### 🎯 Key Insights +- **Nonlinearity is essential**: Without activations, neural networks are just linear transformations +- **Different functions serve different purposes**: ReLU for hidden layers, Sigmoid for probabilities, Tanh for zero-centered outputs +- **Mathematical properties matter**: Monotonicity, boundedness, and zero-centering affect learning + +### πŸš€ What's Next +These activation functions will be used in: +- **Layers Module**: Building neural network layers +- **Loss Functions**: Computing training objectives +- **Advanced Architectures**: CNNs, RNNs, and more + +### πŸ”§ Export to Package +Run this to export your activations to the TinyTorch package: +```bash +python bin/tito.py sync +``` + +Then test your implementation: +```bash +python bin/tito.py test --module activations +``` + +**Excellent work! You've mastered the mathematical foundations of neural networks!** πŸŽ‰ + +--- + +## πŸ“š Further Reading + +**Want to learn more about activation functions?** +- **ReLU variants**: Leaky ReLU, ELU, Swish +- **Advanced activations**: GELU, Mish, SiLU +- **Activation choice**: When to use which function +- **Gradient flow**: How activations affect training + +**Next modules**: Layers, Loss Functions, Optimization +""" \ No newline at end of file diff --git a/_proc/activations/tests/test_activations b/_proc/activations/tests/test_activations new file mode 100644 index 00000000..0a9f7967 --- /dev/null +++ b/_proc/activations/tests/test_activations @@ -0,0 +1,345 @@ +""" +Test suite for the TinyTorch Activations module. + +This test suite validates the mathematical correctness of activation functions: +- ReLU: f(x) = max(0, x) +- Sigmoid: f(x) = 1 / (1 + e^(-x)) +- Tanh: f(x) = tanh(x) + +Tests focus on: +1. Mathematical correctness +2. Numerical stability +3. Edge cases +4. Shape preservation +5. Type consistency +""" + +import pytest +import numpy as np +import math +from tinytorch.core.tensor import Tensor + +# Import the activation functions +import sys +import os +sys.path.append(os.path.join(os.path.dirname(__file__), '..')) +from activations_dev import ReLU, Sigmoid, Tanh + + +class TestReLU: + """Test the ReLU activation function.""" + + def test_relu_basic_functionality(self): + """Test basic ReLU behavior: max(0, x)""" + relu = ReLU() + + # Test mixed positive/negative values + x = Tensor([[-2.0, -1.0, 0.0, 1.0, 2.0]]) + y = relu(x) + expected = np.array([[0.0, 0.0, 0.0, 1.0, 2.0]]) + + assert np.allclose(y.data, expected), f"Expected {expected}, got {y.data}" + + def test_relu_all_positive(self): + """Test ReLU with all positive values (should be unchanged)""" + relu = ReLU() + + x = Tensor([[1.0, 2.5, 3.7, 10.0]]) + y = relu(x) + + assert np.allclose(y.data, x.data), "ReLU should preserve positive values" + + def test_relu_all_negative(self): + """Test ReLU with all negative values (should be zeros)""" + relu = ReLU() + + x = Tensor([[-1.0, -2.5, -3.7, -10.0]]) + y = relu(x) + expected = np.zeros_like(x.data) + + assert np.allclose(y.data, expected), "ReLU should zero out negative values" + + def test_relu_zero_input(self): + """Test ReLU with zero input""" + relu = ReLU() + + x = Tensor([[0.0]]) + y = relu(x) + + assert y.data[0, 0] == 0.0, "ReLU(0) should be 0" + + def test_relu_shape_preservation(self): + """Test that ReLU preserves tensor shape""" + relu = ReLU() + + # Test different shapes + shapes = [(1, 5), (2, 3), (4, 1), (3, 3)] + for shape in shapes: + x = Tensor(np.random.randn(*shape)) + y = relu(x) + assert y.shape == x.shape, f"Shape mismatch: expected {x.shape}, got {y.shape}" + + def test_relu_callable(self): + """Test that ReLU can be called directly""" + relu = ReLU() + x = Tensor([[1.0, -1.0]]) + + y1 = relu(x) + y2 = relu.forward(x) + + assert np.allclose(y1.data, y2.data), "Direct call should match forward method" + + +class TestSigmoid: + """Test the Sigmoid activation function.""" + + def test_sigmoid_basic_functionality(self): + """Test basic Sigmoid behavior""" + sigmoid = Sigmoid() + + # Test known values + x = Tensor([[0.0]]) + y = sigmoid(x) + assert abs(y.data[0, 0] - 0.5) < 1e-6, "Sigmoid(0) should be 0.5" + + def test_sigmoid_range(self): + """Test that Sigmoid outputs are in (0, 1)""" + sigmoid = Sigmoid() + + # Test wide range of inputs + x = Tensor([[-10.0, -5.0, -1.0, 0.0, 1.0, 5.0, 10.0]]) + y = sigmoid(x) + + assert np.all(y.data > 0), "Sigmoid outputs should be > 0" + assert np.all(y.data < 1), "Sigmoid outputs should be < 1" + + def test_sigmoid_numerical_stability(self): + """Test Sigmoid with extreme values (numerical stability)""" + sigmoid = Sigmoid() + + # Test extreme values that could cause overflow + x = Tensor([[-100.0, -50.0, 50.0, 100.0]]) + y = sigmoid(x) + + # Should not contain NaN or inf + assert not np.any(np.isnan(y.data)), "Sigmoid should not produce NaN" + assert not np.any(np.isinf(y.data)), "Sigmoid should not produce inf" + + # Should be close to 0 for very negative, close to 1 for very positive + assert y.data[0, 0] < 1e-10, "Sigmoid(-100) should be very close to 0" + assert y.data[0, 1] < 1e-10, "Sigmoid(-50) should be very close to 0" + assert y.data[0, 2] > 1 - 1e-10, "Sigmoid(50) should be very close to 1" + assert y.data[0, 3] > 1 - 1e-10, "Sigmoid(100) should be very close to 1" + + def test_sigmoid_monotonicity(self): + """Test that Sigmoid is monotonically increasing""" + sigmoid = Sigmoid() + + x = Tensor([[-3.0, -1.0, 0.0, 1.0, 3.0]]) + y = sigmoid(x) + + # Check that outputs are increasing + for i in range(len(y.data[0]) - 1): + assert y.data[0, i] < y.data[0, i + 1], "Sigmoid should be monotonically increasing" + + def test_sigmoid_shape_preservation(self): + """Test that Sigmoid preserves tensor shape""" + sigmoid = Sigmoid() + + shapes = [(1, 5), (2, 3), (4, 1)] + for shape in shapes: + x = Tensor(np.random.randn(*shape)) + y = sigmoid(x) + assert y.shape == x.shape, f"Shape mismatch: expected {x.shape}, got {y.shape}" + + def test_sigmoid_callable(self): + """Test that Sigmoid can be called directly""" + sigmoid = Sigmoid() + x = Tensor([[1.0, -1.0]]) + + y1 = sigmoid(x) + y2 = sigmoid.forward(x) + + assert np.allclose(y1.data, y2.data), "Direct call should match forward method" + + +class TestTanh: + """Test the Tanh activation function.""" + + def test_tanh_basic_functionality(self): + """Test basic Tanh behavior""" + tanh = Tanh() + + # Test known values + x = Tensor([[0.0]]) + y = tanh(x) + assert abs(y.data[0, 0] - 0.0) < 1e-6, "Tanh(0) should be 0" + + def test_tanh_range(self): + """Test that Tanh outputs are in [-1, 1]""" + tanh = Tanh() + + # Test wide range of inputs + x = Tensor([[-10.0, -5.0, -1.0, 0.0, 1.0, 5.0, 10.0]]) + y = tanh(x) + + assert np.all(y.data >= -1), "Tanh outputs should be >= -1" + assert np.all(y.data <= 1), "Tanh outputs should be <= 1" + + def test_tanh_symmetry(self): + """Test that Tanh is symmetric: tanh(-x) = -tanh(x)""" + tanh = Tanh() + + x = Tensor([[1.0, 2.0, 3.0]]) + x_neg = Tensor([[-1.0, -2.0, -3.0]]) + + y_pos = tanh(x) + y_neg = tanh(x_neg) + + assert np.allclose(y_neg.data, -y_pos.data), "Tanh should be symmetric" + + def test_tanh_monotonicity(self): + """Test that Tanh is monotonically increasing""" + tanh = Tanh() + + x = Tensor([[-3.0, -1.0, 0.0, 1.0, 3.0]]) + y = tanh(x) + + # Check that outputs are increasing + for i in range(len(y.data[0]) - 1): + assert y.data[0, i] < y.data[0, i + 1], "Tanh should be monotonically increasing" + + def test_tanh_extreme_values(self): + """Test Tanh with extreme values""" + tanh = Tanh() + + x = Tensor([[-100.0, 100.0]]) + y = tanh(x) + + # Should be close to -1 and 1 respectively + assert abs(y.data[0, 0] - (-1.0)) < 1e-10, "Tanh(-100) should be very close to -1" + assert abs(y.data[0, 1] - 1.0) < 1e-10, "Tanh(100) should be very close to 1" + + def test_tanh_shape_preservation(self): + """Test that Tanh preserves tensor shape""" + tanh = Tanh() + + shapes = [(1, 5), (2, 3), (4, 1)] + for shape in shapes: + x = Tensor(np.random.randn(*shape)) + y = tanh(x) + assert y.shape == x.shape, f"Shape mismatch: expected {x.shape}, got {y.shape}" + + def test_tanh_callable(self): + """Test that Tanh can be called directly""" + tanh = Tanh() + x = Tensor([[1.0, -1.0]]) + + y1 = tanh(x) + y2 = tanh.forward(x) + + assert np.allclose(y1.data, y2.data), "Direct call should match forward method" + + +class TestActivationComparison: + """Test interactions and comparisons between activation functions.""" + + def test_activation_consistency(self): + """Test that all activations work with the same input""" + relu = ReLU() + sigmoid = Sigmoid() + tanh = Tanh() + + x = Tensor([[-2.0, -1.0, 0.0, 1.0, 2.0]]) + + # All should process without error + y_relu = relu(x) + y_sigmoid = sigmoid(x) + y_tanh = tanh(x) + + # All should preserve shape + assert y_relu.shape == x.shape + assert y_sigmoid.shape == x.shape + assert y_tanh.shape == x.shape + + def test_activation_ranges(self): + """Test that activations have expected output ranges""" + relu = ReLU() + sigmoid = Sigmoid() + tanh = Tanh() + + x = Tensor([[-5.0, -2.0, 0.0, 2.0, 5.0]]) + + y_relu = relu(x) + y_sigmoid = sigmoid(x) + y_tanh = tanh(x) + + # ReLU: [0, inf) + assert np.all(y_relu.data >= 0), "ReLU should be non-negative" + + # Sigmoid: (0, 1) + assert np.all(y_sigmoid.data > 0), "Sigmoid should be positive" + assert np.all(y_sigmoid.data < 1), "Sigmoid should be less than 1" + + # Tanh: (-1, 1) + assert np.all(y_tanh.data > -1), "Tanh should be greater than -1" + assert np.all(y_tanh.data < 1), "Tanh should be less than 1" + + +# Integration tests with edge cases +class TestActivationEdgeCases: + """Test edge cases and boundary conditions.""" + + def test_zero_tensor(self): + """Test all activations with zero tensor""" + relu = ReLU() + sigmoid = Sigmoid() + tanh = Tanh() + + x = Tensor([[0.0, 0.0, 0.0]]) + + y_relu = relu(x) + y_sigmoid = sigmoid(x) + y_tanh = tanh(x) + + assert np.allclose(y_relu.data, [0.0, 0.0, 0.0]), "ReLU(0) should be 0" + assert np.allclose(y_sigmoid.data, [0.5, 0.5, 0.5]), "Sigmoid(0) should be 0.5" + assert np.allclose(y_tanh.data, [0.0, 0.0, 0.0]), "Tanh(0) should be 0" + + def test_single_element_tensor(self): + """Test all activations with single element tensor""" + relu = ReLU() + sigmoid = Sigmoid() + tanh = Tanh() + + x = Tensor([[1.0]]) + + y_relu = relu(x) + y_sigmoid = sigmoid(x) + y_tanh = tanh(x) + + assert y_relu.shape == (1, 1) + assert y_sigmoid.shape == (1, 1) + assert y_tanh.shape == (1, 1) + + def test_large_tensor(self): + """Test activations with larger tensors""" + relu = ReLU() + sigmoid = Sigmoid() + tanh = Tanh() + + # Create a 10x10 tensor + x = Tensor(np.random.randn(10, 10)) + + y_relu = relu(x) + y_sigmoid = sigmoid(x) + y_tanh = tanh(x) + + assert y_relu.shape == (10, 10) + assert y_sigmoid.shape == (10, 10) + assert y_tanh.shape == (10, 10) + + +if __name__ == "__main__": + # Run tests with pytest + pytest.main([__file__, "-v"]) \ No newline at end of file diff --git a/_proc/layers/README.md b/_proc/layers/README.md new file mode 100644 index 00000000..6d62b701 --- /dev/null +++ b/_proc/layers/README.md @@ -0,0 +1,206 @@ +# 🧱 Module 2: Layers - Neural Network Building Blocks + +**Build the fundamental transformations that compose into neural networks** + +## 🎯 Learning Objectives + +After completing this module, you will: +- Understand layers as functions that transform tensors: `y = f(x)` +- Implement Dense layers with linear transformations: `y = Wx + b` +- Add activation functions for nonlinearity (ReLU, Sigmoid, Tanh) +- See how neural networks are just function composition +- Build intuition for neural network architecture before diving into training + +## 🧱 Build β†’ Use β†’ Understand + +This module follows the TinyTorch pedagogical framework: + +1. **Build**: Dense layers and activation functions from scratch +2. **Use**: Transform tensors and see immediate results +3. **Understand**: How neural networks transform information + +## πŸ“š What You'll Build + +### **Dense Layer** +```python +layer = Dense(input_size=3, output_size=2) +x = Tensor([[1.0, 2.0, 3.0]]) +y = layer(x) # Shape: (1, 2) +``` + +### **Activation Functions** +```python +relu = ReLU() +sigmoid = Sigmoid() +tanh = Tanh() + +x = Tensor([[-1.0, 0.0, 1.0]]) +y_relu = relu(x) # [0.0, 0.0, 1.0] +y_sigmoid = sigmoid(x) # [0.27, 0.5, 0.73] +y_tanh = tanh(x) # [-0.76, 0.0, 0.76] +``` + +### **Neural Networks** +```python +# 3 β†’ 4 β†’ 2 network +layer1 = Dense(input_size=3, output_size=4) +activation1 = ReLU() +layer2 = Dense(input_size=4, output_size=2) +activation2 = Sigmoid() + +# Forward pass +x = Tensor([[1.0, 2.0, 3.0]]) +h1 = layer1(x) +h1_activated = activation1(h1) +h2 = layer2(h1_activated) +output = activation2(h2) +``` + +## πŸš€ Getting Started + +### Prerequisites +- Complete Module 1: Tensor βœ… +- Understand basic linear algebra (matrix multiplication) +- Familiar with Python classes and methods + +### Quick Start +```bash +# Navigate to the layers module +cd modules/layers + +# Work in the development notebook +jupyter notebook layers_dev.ipynb + +# Or work in the Python file +code layers_dev.py +``` + +## πŸ“– Module Structure + +``` +modules/layers/ +β”œβ”€β”€ layers_dev.py # Main development file (work here!) +β”œβ”€β”€ layers_dev.ipynb # Jupyter notebook version +β”œβ”€β”€ tests/ +β”‚ └── test_layers.py # Comprehensive tests +β”œβ”€β”€ README.md # This file +└── solutions/ # Reference implementations (if stuck) +``` + +## πŸŽ“ Learning Path + +### Step 1: Dense Layer (Linear Transformation) +- Understand `y = Wx + b` +- Implement weight initialization +- Handle matrix multiplication and bias addition +- Test with single examples and batches + +### Step 2: Activation Functions +- Implement ReLU: `max(0, x)` +- Implement Sigmoid: `1 / (1 + e^(-x))` +- Implement Tanh: `tanh(x)` +- Understand why nonlinearity is crucial + +### Step 3: Layer Composition +- Chain layers together +- Build complete neural networks +- See how simple layers create complex functions + +### Step 4: Real-World Application +- Build an image classification network +- Understand how architecture affects capability + +## πŸ§ͺ Testing Your Implementation + +### Module-Level Tests +```bash +# Run comprehensive tests +python -m pytest tests/test_layers.py -v + +# Quick test +python -c "from layers_dev import Dense, ReLU; print('βœ… Layers working!')" +``` + +### Package-Level Tests +```bash +# Export to package +python ../../bin/tito.py sync + +# Test integration +python ../../bin/tito.py test --module layers +``` + +## 🎯 Key Concepts + +### **Layers as Functions** +- Input: Tensor with some shape +- Transformation: Mathematical operation +- Output: Tensor with possibly different shape + +### **Linear vs Nonlinear** +- Dense layers: Linear transformations +- Activation functions: Nonlinear transformations +- Composition: Linear + Nonlinear = Complex functions + +### **Neural Networks = Function Composition** +``` +Input β†’ Dense β†’ ReLU β†’ Dense β†’ Sigmoid β†’ Output +``` + +### **Why This Matters** +- **Modularity**: Build complex networks from simple parts +- **Reusability**: Same layers work for different problems +- **Understanding**: Know how each part contributes to the whole + +## πŸ” Common Issues + +### **Import Errors** +```python +# Make sure you're in the right directory +import sys +sys.path.append('../../') +from modules.tensor.tensor_dev import Tensor +``` + +### **Shape Mismatches** +```python +# Check input/output sizes match +layer1 = Dense(input_size=3, output_size=4) +layer2 = Dense(input_size=4, output_size=2) # 4 matches output of layer1 +``` + +### **Gradient Issues (Later)** +```python +# Use proper weight initialization +limit = math.sqrt(6.0 / (input_size + output_size)) +weights = np.random.uniform(-limit, limit, (input_size, output_size)) +``` + +## πŸŽ‰ Success Criteria + +You've successfully completed this module when: +- βœ… All tests pass (`pytest tests/test_layers.py`) +- βœ… You can build a 2-layer neural network +- βœ… You understand how layers transform tensors +- βœ… You see the connection between layers and neural networks +- βœ… Package export works (`tito test --module layers`) + +## πŸš€ What's Next + +After completing this module, you're ready for: +- **Module 3: Networks** - Compose layers into common architectures +- **Module 4: Training** - Learn how networks improve through experience +- **Module 5: Applications** - Use networks for real problems + +## 🀝 Getting Help + +- Check the tests for examples of expected behavior +- Look at the solutions/ directory if you're stuck +- Review the pedagogical principles in `docs/pedagogy/` +- Remember: Build β†’ Use β†’ Understand! + +--- + +**Great job building the foundation of neural networks!** πŸŽ‰ + +*This module implements the core insight: neural networks are just function composition of simple building blocks.* \ No newline at end of file diff --git a/_proc/layers/layers_dev b/_proc/layers/layers_dev new file mode 100644 index 00000000..74b6890a --- /dev/null +++ b/_proc/layers/layers_dev @@ -0,0 +1,469 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.17.1 +# --- + +# %% [markdown] +""" +# Module 2: Layers - Neural Network Building Blocks + +Welcome to the Layers module! This is where neural networks begin. You'll implement the fundamental building blocks that transform tensors. + +## Learning Goals +- Understand layers as functions that transform tensors: `y = f(x)` +- Implement Dense layers with linear transformations: `y = Wx + b` +- Use activation functions from the activations module for nonlinearity +- See how neural networks are just function composition +- Build intuition before diving into training + +## Build β†’ Use β†’ Understand +1. **Build**: Dense layers using activation functions as building blocks +2. **Use**: Transform tensors and see immediate results +3. **Understand**: How neural networks transform information + +## Module Dependencies +This module builds on the **activations** module: +- **activations** β†’ **layers** β†’ **networks** +- Clean separation of concerns: math functions β†’ layer building blocks β†’ full networks + +## Module β†’ Package Structure +**πŸŽ“ Teaching vs. πŸ”§ Building**: +- **Learning side**: Work in `modules/layers/layers_dev.py` +- **Building side**: Exports to `tinytorch/core/layers.py` + +This module builds the fundamental transformations that compose into neural networks. +""" + +# %% +#| default_exp core.layers + +# Setup and imports +import numpy as np +import sys +from typing import Union, Optional, Callable +import math + +# %% +#| export +import numpy as np +import math +import sys +from typing import Union, Optional, Callable +from tinytorch.core.tensor import Tensor + +# Import activation functions from the activations module +from tinytorch.core.activations import ReLU, Sigmoid, Tanh + +# Import our Tensor class +# sys.path.append('../../') +# from modules.tensor.tensor_dev import Tensor + +# print("πŸ”₯ TinyTorch Layers Module") +# print(f"NumPy version: {np.__version__}") +# print(f"Python version: {sys.version_info.major}.{sys.version_info.minor}") +# print("Ready to build neural network layers!") + +# %% [markdown] +""" +## Step 1: What is a Layer? + +A **layer** is a function that transforms tensors. Think of it as: +- **Input**: Tensor with some shape +- **Transformation**: Mathematical operation (linear, nonlinear, etc.) +- **Output**: Tensor with possibly different shape + +**The fundamental insight**: Neural networks are just function composition! +``` +x β†’ Layer1 β†’ Layer2 β†’ Layer3 β†’ y +``` + +**Why layers matter**: +- They're the building blocks of all neural networks +- Each layer learns a different transformation +- Composing layers creates complex functions +- Understanding layers = understanding neural networks + +Let's start with the most important layer: **Dense** (also called Linear or Fully Connected). +""" + +# %% +#| export +class Dense: + """ + Dense (Linear) Layer: y = Wx + b + + The fundamental building block of neural networks. + Performs linear transformation: matrix multiplication + bias addition. + + Args: + input_size: Number of input features + output_size: Number of output features + use_bias: Whether to include bias term (default: True) + + TODO: Implement the Dense layer with weight initialization and forward pass. + """ + + def __init__(self, input_size: int, output_size: int, use_bias: bool = True): + """ + Initialize Dense layer with random weights. + + TODO: + 1. Store layer parameters (input_size, output_size, use_bias) + 2. Initialize weights with small random values + 3. Initialize bias to zeros (if use_bias=True) + """ + raise NotImplementedError("Student implementation required") + + def forward(self, x: Tensor) -> Tensor: + """ + Forward pass: y = Wx + b + + Args: + x: Input tensor of shape (batch_size, input_size) + + Returns: + Output tensor of shape (batch_size, output_size) + + TODO: Implement matrix multiplication and bias addition + """ + raise NotImplementedError("Student implementation required") + + def __call__(self, x: Tensor) -> Tensor: + """Make layer callable: layer(x) same as layer.forward(x)""" + return self.forward(x) + +# %% +#| hide +#| export +class Dense: + """ + Dense (Linear) Layer: y = Wx + b + + The fundamental building block of neural networks. + Performs linear transformation: matrix multiplication + bias addition. + """ + + def __init__(self, input_size: int, output_size: int, use_bias: bool = True): + """Initialize Dense layer with random weights.""" + self.input_size = input_size + self.output_size = output_size + self.use_bias = use_bias + + # Initialize weights with Xavier/Glorot initialization + # This helps with gradient flow during training + limit = math.sqrt(6.0 / (input_size + output_size)) + self.weights = Tensor( + np.random.uniform(-limit, limit, (input_size, output_size)).astype(np.float32) + ) + + # Initialize bias to zeros + if use_bias: + self.bias = Tensor(np.zeros(output_size, dtype=np.float32)) + else: + self.bias = None + + def forward(self, x: Tensor) -> Tensor: + """Forward pass: y = Wx + b""" + # Matrix multiplication: x @ weights + # x shape: (batch_size, input_size) + # weights shape: (input_size, output_size) + # result shape: (batch_size, output_size) + output = Tensor(x.data @ self.weights.data) + + # Add bias if present + if self.bias is not None: + output = Tensor(output.data + self.bias.data) + + return output + + def __call__(self, x: Tensor) -> Tensor: + """Make layer callable: layer(x) same as layer.forward(x)""" + return self.forward(x) + +# %% [markdown] +""" +### πŸ§ͺ Test Your Dense Layer + +Once you implement the Dense layer above, run this cell to test it: +""" + +# %% +# Test the Dense layer +try: + print("=== Testing Dense Layer ===") + + # Create a simple Dense layer: 3 inputs β†’ 2 outputs + layer = Dense(input_size=3, output_size=2) + print(f"Created Dense layer: {layer.input_size} β†’ {layer.output_size}") + print(f"Weights shape: {layer.weights.shape}") + print(f"Bias shape: {layer.bias.shape if layer.bias else 'No bias'}") + + # Test with a single example + x = Tensor([[1.0, 2.0, 3.0]]) # Shape: (1, 3) + y = layer(x) + print(f"Input shape: {x.shape}") + print(f"Output shape: {y.shape}") + print(f"Input: {x.data}") + print(f"Output: {y.data}") + + # Test with batch + x_batch = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) # Shape: (2, 3) + y_batch = layer(x_batch) + print(f"\nBatch input shape: {x_batch.shape}") + print(f"Batch output shape: {y_batch.shape}") + + print("βœ… Dense layer working!") + +except Exception as e: + print(f"❌ Error: {e}") + print("Make sure to implement the Dense layer above!") + +# %% [markdown] +""" +## Step 2: Activation Functions - Adding Nonlinearity + +Now we'll use the activation functions from the **activations** module! + +**Clean Architecture**: We import the activation functions rather than redefining them: +```python +from tinytorch.core.activations import ReLU, Sigmoid, Tanh +``` + +**Why this matters**: +- **Separation of concerns**: Math functions vs. layer building blocks +- **Reusability**: Activations can be used anywhere in the system +- **Maintainability**: One place to update activation implementations +- **Composability**: Clean imports make neural networks easier to build + +**Why nonlinearity matters**: Without it, stacking layers is pointless! +``` +Linear β†’ Linear β†’ Linear = Just one big Linear transformation +Linear β†’ NonLinear β†’ Linear = Can learn complex patterns +``` +""" + +# %% [markdown] +""" +### πŸ§ͺ Test Activation Functions from Activations Module + +Let's test that we can use the activation functions from the activations module: +""" + +# %% +# Test activation functions from activations module +try: + print("=== Testing Activation Functions from Activations Module ===") + + # Test data: mix of positive, negative, and zero + x = Tensor([[-2.0, -1.0, 0.0, 1.0, 2.0]]) + print(f"Input: {x.data}") + + # Test ReLU from activations module + relu = ReLU() + y_relu = relu(x) + print(f"ReLU output: {y_relu.data}") + + # Test Sigmoid from activations module + sigmoid = Sigmoid() + y_sigmoid = sigmoid(x) + print(f"Sigmoid output: {y_sigmoid.data}") + + # Test Tanh from activations module + tanh = Tanh() + y_tanh = tanh(x) + print(f"Tanh output: {y_tanh.data}") + + print("βœ… Activation functions from activations module working!") + print("πŸŽ‰ Clean architecture: layers module uses activations module!") + +except Exception as e: + print(f"❌ Error: {e}") + print("Make sure the activations module is properly exported!") + +# %% [markdown] +""" +## Step 3: Layer Composition - Building Neural Networks + +Now comes the magic! We can **compose** layers to build neural networks: + +``` +Input β†’ Dense β†’ ReLU β†’ Dense β†’ Sigmoid β†’ Output +``` + +This is a 2-layer neural network that can learn complex nonlinear patterns! + +**Notice the clean architecture**: +- Dense layers handle linear transformations +- Activation functions (from activations module) handle nonlinearity +- Composition creates complex behaviors from simple building blocks +""" + +# %% +# Build a simple 2-layer neural network +try: + print("=== Building a 2-Layer Neural Network ===") + + # Network architecture: 3 β†’ 4 β†’ 2 + # Input: 3 features + # Hidden: 4 neurons with ReLU + # Output: 2 neurons with Sigmoid + + layer1 = Dense(input_size=3, output_size=4) + activation1 = ReLU() # From activations module + layer2 = Dense(input_size=4, output_size=2) + activation2 = Sigmoid() # From activations module + + print("Network architecture:") + print(f" Input: 3 features") + print(f" Hidden: {layer1.input_size} β†’ {layer1.output_size} (Dense + ReLU)") + print(f" Output: {layer2.input_size} β†’ {layer2.output_size} (Dense + Sigmoid)") + + # Test with sample data + x = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) # 2 examples, 3 features each + print(f"\nInput shape: {x.shape}") + print(f"Input data: {x.data}") + + # Forward pass through the network + h1 = layer1(x) # Dense layer 1 + h1_activated = activation1(h1) # ReLU activation + h2 = layer2(h1_activated) # Dense layer 2 + output = activation2(h2) # Sigmoid activation + + print(f"\nAfter layer 1: {h1.shape}") + print(f"After ReLU: {h1_activated.shape}") + print(f"After layer 2: {h2.shape}") + print(f"Final output: {output.shape}") + print(f"Output values: {output.data}") + + print("\nπŸŽ‰ Neural network working! You just built your first neural network!") + print("πŸ—οΈ Clean architecture: Dense layers + Activations module = Neural Network") + print("Notice how the network transforms 3D input into 2D output through learned transformations.") + +except Exception as e: + print(f"❌ Error: {e}") + print("Make sure to implement the layers and check activations module!") + +# %% [markdown] +""" +## Step 4: Understanding What We Built + +Congratulations! You just implemented a clean, modular neural network architecture: + +### 🧱 **What You Built** +1. **Dense Layer**: Linear transformation `y = Wx + b` +2. **Activation Functions**: Imported from activations module (ReLU, Sigmoid, Tanh) +3. **Layer Composition**: Chaining layers to build networks + +### πŸ—οΈ **Clean Architecture Benefits** +- **Separation of concerns**: Math functions vs. layer building blocks +- **Reusability**: Activations can be used across different modules +- **Maintainability**: One place to update activation implementations +- **Composability**: Clean imports make complex networks easier to build + +### 🎯 **Key Insights** +- **Layers are functions**: They transform tensors from one space to another +- **Composition creates complexity**: Simple layers β†’ complex networks +- **Nonlinearity is crucial**: Without it, deep networks are just linear transformations +- **Neural networks are function approximators**: They learn to map inputs to outputs +- **Modular design**: Building blocks can be combined in many ways + +### πŸš€ **What's Next** +In the next modules, you'll learn: +- **Training**: How networks learn from data (backpropagation, optimizers) +- **Architectures**: Specialized layers for different problems (CNNs, RNNs) +- **Applications**: Using networks for real problems + +### πŸ”§ **Export to Package** +Run this to export your layers to the TinyTorch package: +```bash +python bin/tito.py sync +``` + +Then test your implementation: +```bash +python bin/tito.py test --module layers +``` + +**Great job! You've built a clean, modular foundation for neural networks!** πŸŽ‰ +""" + +# %% +# Final demonstration: A more complex example +try: + print("=== Final Demo: Image Classification Network ===") + + # Simulate a small image: 28x28 pixels flattened to 784 features + # This is like a tiny MNIST digit + image_size = 28 * 28 # 784 pixels + num_classes = 10 # 10 digits (0-9) + + # Build a 3-layer network for digit classification + # 784 β†’ 128 β†’ 64 β†’ 10 + layer1 = Dense(input_size=image_size, output_size=128) + relu1 = ReLU() # From activations module + layer2 = Dense(input_size=128, output_size=64) + relu2 = ReLU() # From activations module + layer3 = Dense(input_size=64, output_size=num_classes) + softmax = Sigmoid() # Using Sigmoid as a simple "probability-like" output + + print(f"Image classification network:") + print(f" Input: {image_size} pixels (28x28 image)") + print(f" Hidden 1: {layer1.input_size} β†’ {layer1.output_size} (Dense + ReLU)") + print(f" Hidden 2: {layer2.input_size} β†’ {layer2.output_size} (Dense + ReLU)") + print(f" Output: {layer3.input_size} β†’ {layer3.output_size} (Dense + Sigmoid)") + + # Simulate a batch of 5 images + batch_size = 5 + fake_images = Tensor(np.random.randn(batch_size, image_size).astype(np.float32)) + + # Forward pass + h1 = relu1(layer1(fake_images)) + h2 = relu2(layer2(h1)) + predictions = softmax(layer3(h2)) + + print(f"\nBatch processing:") + print(f" Input batch shape: {fake_images.shape}") + print(f" Predictions shape: {predictions.shape}") + print(f" Sample predictions: {predictions.data[0]}") # First image predictions + + print("\nπŸŽ‰ You built a neural network that could classify images!") + print("πŸ—οΈ Clean architecture: Dense layers + Activations module = Image Classifier") + print("With training, this network could learn to recognize handwritten digits!") + +except Exception as e: + print(f"❌ Error: {e}") + print("Check your layer implementations and activations module!") + +# %% [markdown] +""" +## πŸŽ“ Module Summary + +### What You Learned +1. **Layer Architecture**: Dense layers as linear transformations +2. **Clean Dependencies**: Layers module uses activations module +3. **Function Composition**: Simple building blocks β†’ complex networks +4. **Modular Design**: Separation of concerns for maintainable code + +### Key Architectural Insight +``` +activations (math functions) β†’ layers (building blocks) β†’ networks (applications) +``` + +This clean dependency graph makes the system: +- **Understandable**: Each module has a clear purpose +- **Testable**: Each module can be tested independently +- **Reusable**: Components can be used across different contexts +- **Maintainable**: Changes are localized to appropriate modules + +### Next Steps +- **Training**: Learn how networks learn from data +- **Advanced Architectures**: CNNs, RNNs, Transformers +- **Applications**: Real-world machine learning problems + +**Congratulations on building a clean, modular neural network foundation!** πŸš€ +""" \ No newline at end of file diff --git a/_proc/layers/tests/test_layers b/_proc/layers/tests/test_layers new file mode 100644 index 00000000..4f0af00b --- /dev/null +++ b/_proc/layers/tests/test_layers @@ -0,0 +1,347 @@ +""" +Tests for TinyTorch Layers module. + +Tests the core layer functionality including Dense layers, activation functions, +and layer composition. + +These tests work with the current implementation and provide stretch goals +for students to implement additional features. +""" + +import sys +import os +import pytest +import numpy as np + +# Add the parent directory to path to import layers_dev +sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) + +# Import from the module's development file +# Note: This imports the instructor version with full implementation +from layers_dev import Dense, Tensor + +# Import activation functions from the activations module +sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(__file__)), '..', 'activations')) +from activations_dev import ReLU, Sigmoid, Tanh + +def safe_numpy(tensor): + """Get numpy array from tensor, using .numpy() if available, otherwise .data""" + if hasattr(tensor, 'numpy'): + return tensor.numpy() + else: + return tensor.data + +class TestDenseLayer: + """Test Dense (Linear) layer functionality.""" + + def test_dense_creation(self): + """Test creating Dense layers with different configurations.""" + # Basic dense layer + layer = Dense(input_size=3, output_size=2) + assert layer.input_size == 3 + assert layer.output_size == 2 + assert layer.use_bias == True + assert layer.weights.shape == (3, 2) + assert layer.bias.shape == (2,) + + # Dense layer without bias + layer_no_bias = Dense(input_size=4, output_size=3, use_bias=False) + assert layer_no_bias.use_bias == False + assert layer_no_bias.bias is None + + def test_dense_forward_single(self): + """Test Dense layer forward pass with single input.""" + layer = Dense(input_size=3, output_size=2) + + # Single input + x = Tensor([[1.0, 2.0, 3.0]]) + y = layer(x) + + assert y.shape == (1, 2) + assert isinstance(y, Tensor) + + def test_dense_forward_batch(self): + """Test Dense layer forward pass with batch input.""" + layer = Dense(input_size=3, output_size=2) + + # Batch input + x = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) + y = layer(x) + + assert y.shape == (2, 2) + assert isinstance(y, Tensor) + + def test_dense_no_bias(self): + """Test Dense layer without bias.""" + layer = Dense(input_size=2, output_size=1, use_bias=False) + + x = Tensor([[1.0, 2.0]]) + y = layer(x) + + assert y.shape == (1, 1) + # Should be just matrix multiplication without bias + expected = safe_numpy(x) @ safe_numpy(layer.weights) + np.testing.assert_array_almost_equal(safe_numpy(y), expected) + + def test_dense_callable(self): + """Test that Dense layer is callable.""" + layer = Dense(input_size=2, output_size=1) + x = Tensor([[1.0, 2.0]]) + + # Both should work + y1 = layer.forward(x) + y2 = layer(x) + + np.testing.assert_array_equal(safe_numpy(y1), safe_numpy(y2)) + +class TestActivationFunctions: + """Test activation function implementations.""" + + def test_relu_basic(self): + """Test ReLU activation function.""" + relu = ReLU() + x = Tensor([[-2.0, -1.0, 0.0, 1.0, 2.0]]) + y = relu(x) + + expected = [[0.0, 0.0, 0.0, 1.0, 2.0]] + np.testing.assert_array_equal(safe_numpy(y), expected) + + def test_relu_callable(self): + """Test that ReLU is callable.""" + relu = ReLU() + x = Tensor([[1.0, -1.0]]) + + y1 = relu.forward(x) + y2 = relu(x) + + np.testing.assert_array_equal(safe_numpy(y1), safe_numpy(y2)) + + def test_sigmoid_basic(self): + """Test Sigmoid activation function.""" + sigmoid = Sigmoid() + x = Tensor([[0.0]]) # sigmoid(0) = 0.5 + y = sigmoid(x) + + np.testing.assert_array_almost_equal(safe_numpy(y), [[0.5]]) + + def test_sigmoid_range(self): + """Test Sigmoid output range.""" + sigmoid = Sigmoid() + x = Tensor([[-10.0, 0.0, 10.0]]) + y = sigmoid(x) + + # Should be in range [0, 1] - use reasonable bounds + assert np.all(safe_numpy(y) >= 0) + assert np.all(safe_numpy(y) <= 1) + # Check that extreme values are close to bounds + assert safe_numpy(y)[0][0] < 0.01 # Very small for -10 + assert safe_numpy(y)[0][2] > 0.99 # Very large for 10 + + def test_tanh_basic(self): + """Test Tanh activation function.""" + tanh = Tanh() + x = Tensor([[0.0]]) # tanh(0) = 0 + y = tanh(x) + + np.testing.assert_array_almost_equal(safe_numpy(y), [[0.0]]) + + def test_tanh_range(self): + """Test Tanh output range.""" + tanh = Tanh() + x = Tensor([[-10.0, 0.0, 10.0]]) + y = tanh(x) + + # Should be in range [-1, 1] - use reasonable bounds + assert np.all(safe_numpy(y) >= -1) + assert np.all(safe_numpy(y) <= 1) + # Check that extreme values are close to bounds + assert safe_numpy(y)[0][0] < -0.99 # Very negative for -10 + assert safe_numpy(y)[0][2] > 0.99 # Very positive for 10 + +class TestLayerComposition: + """Test composing layers into neural networks.""" + + def test_simple_network(self): + """Test a simple 2-layer network.""" + # 3 β†’ 4 β†’ 2 network + layer1 = Dense(input_size=3, output_size=4) + relu = ReLU() + layer2 = Dense(input_size=4, output_size=2) + sigmoid = Sigmoid() + + # Forward pass + x = Tensor([[1.0, 2.0, 3.0]]) + h1 = layer1(x) + h1_activated = relu(h1) + h2 = layer2(h1_activated) + output = sigmoid(h2) + + assert h1.shape == (1, 4) + assert h1_activated.shape == (1, 4) + assert h2.shape == (1, 2) + assert output.shape == (1, 2) + + # Output should be in sigmoid range + assert np.all(safe_numpy(output) >= 0) + assert np.all(safe_numpy(output) <= 1) + + def test_batch_network(self): + """Test network with batch processing.""" + layer1 = Dense(input_size=2, output_size=3) + relu = ReLU() + layer2 = Dense(input_size=3, output_size=1) + + # Batch of 4 examples + x = Tensor([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]]) + + h1 = layer1(x) + h1_activated = relu(h1) + output = layer2(h1_activated) + + assert output.shape == (4, 1) + + def test_deep_network(self): + """Test deeper network composition.""" + # 5-layer network + layers = [ + Dense(input_size=10, output_size=8), + ReLU(), + Dense(input_size=8, output_size=6), + ReLU(), + Dense(input_size=6, output_size=4), + ReLU(), + Dense(input_size=4, output_size=2), + Sigmoid() + ] + + x = Tensor([[1.0] * 10]) # 10 features + + # Forward pass through all layers + current = x + for layer in layers: + current = layer(current) + + assert current.shape == (1, 2) + # Final output should be in sigmoid range + assert np.all(safe_numpy(current) >= 0) + assert np.all(safe_numpy(current) <= 1) + +class TestEdgeCases: + """Test edge cases and error conditions.""" + + def test_zero_input(self): + """Test layers with zero input.""" + layer = Dense(input_size=3, output_size=2) + relu = ReLU() + + x = Tensor([[0.0, 0.0, 0.0]]) + y = layer(x) + y_relu = relu(y) + + assert y.shape == (1, 2) + assert y_relu.shape == (1, 2) + + def test_large_input(self): + """Test layers with large input values.""" + layer = Dense(input_size=2, output_size=1) + sigmoid = Sigmoid() + + x = Tensor([[1000.0, -1000.0]]) + y = layer(x) + y_sigmoid = sigmoid(y) + + # Should not overflow + assert not np.any(np.isnan(safe_numpy(y_sigmoid))) + assert not np.any(np.isinf(safe_numpy(y_sigmoid))) + + def test_single_neuron(self): + """Test single neuron layers.""" + layer = Dense(input_size=1, output_size=1) + x = Tensor([[5.0]]) + y = layer(x) + + assert y.shape == (1, 1) + +# Stretch goal tests (these will be skipped if methods don't exist) +class TestStretchGoals: + """Stretch goal tests for advanced features.""" + + @pytest.mark.skip(reason="Stretch goal: Weight initialization methods") + def test_weight_initialization_methods(self): + """Test different weight initialization strategies.""" + # Xavier initialization + layer_xavier = Dense(input_size=100, output_size=50, init_method='xavier') + weights_xavier = safe_numpy(layer_xavier.weights) + + # He initialization + layer_he = Dense(input_size=100, output_size=50, init_method='he') + weights_he = safe_numpy(layer_he.weights) + + # Check initialization ranges + xavier_limit = np.sqrt(6.0 / (100 + 50)) + assert np.all(np.abs(weights_xavier) <= xavier_limit) + + he_limit = np.sqrt(2.0 / 100) + assert np.std(weights_he) <= he_limit * 1.5 # Some tolerance + + @pytest.mark.skip(reason="Stretch goal: Layer parameter access") + def test_layer_parameters(self): + """Test accessing and modifying layer parameters.""" + layer = Dense(input_size=3, output_size=2) + + # Should be able to access parameters + assert hasattr(layer, 'parameters') + params = layer.parameters() + assert len(params) == 2 # weights and bias + + # Should be able to set parameters + new_weights = Tensor(np.ones((3, 2))) + layer.set_weights(new_weights) + np.testing.assert_array_equal(safe_numpy(layer.weights), safe_numpy(new_weights)) + + @pytest.mark.skip(reason="Stretch goal: Additional activation functions") + def test_additional_activations(self): + """Test additional activation functions.""" + # Leaky ReLU + leaky_relu = LeakyReLU(alpha=0.1) + x = Tensor([[-1.0, 0.0, 1.0]]) + y = leaky_relu(x) + expected = [[-0.1, 0.0, 1.0]] + np.testing.assert_array_almost_equal(safe_numpy(y), expected) + + # Softmax + softmax = Softmax() + x = Tensor([[1.0, 2.0, 3.0]]) + y = softmax(x) + # Should sum to 1 + assert np.allclose(np.sum(safe_numpy(y)), 1.0) + + @pytest.mark.skip(reason="Stretch goal: Dropout layer") + def test_dropout_layer(self): + """Test dropout layer implementation.""" + dropout = Dropout(p=0.5) + x = Tensor([[1.0, 2.0, 3.0, 4.0]]) + + # Training mode + dropout.train() + y_train = dropout(x) + + # Inference mode + dropout.eval() + y_eval = dropout(x) + + # In eval mode, should be same as input + np.testing.assert_array_equal(safe_numpy(y_eval), safe_numpy(x)) + + @pytest.mark.skip(reason="Stretch goal: Batch normalization") + def test_batch_normalization(self): + """Test batch normalization layer.""" + bn = BatchNorm1d(num_features=3) + x = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) + y = bn(x) + + # Should normalize across batch dimension + assert y.shape == x.shape + # Mean should be close to 0, std close to 1 + assert np.allclose(np.mean(safe_numpy(y), axis=0), 0.0, atol=1e-6) + assert np.allclose(np.std(safe_numpy(y), axis=0), 1.0, atol=1e-6) \ No newline at end of file diff --git a/_proc/nbdev.yml b/_proc/nbdev.yml new file mode 100644 index 00000000..9e72b3f1 --- /dev/null +++ b/_proc/nbdev.yml @@ -0,0 +1,9 @@ +project: + output-dir: _docs + +website: + title: "TinyTorch" + site-url: "https://tinytorch.github.io/TinyTorch/" + description: "Build ML Systems from Scratch - A hands-on systems course" + repo-branch: main + repo-url: "https://github.com/tinytorch/TinyTorch/" diff --git a/_proc/setup/README.md b/_proc/setup/README.md new file mode 100644 index 00000000..aee767dd --- /dev/null +++ b/_proc/setup/README.md @@ -0,0 +1,141 @@ +# Setup Module + +Welcome to TinyTorch! This is your first module in the Machine Learning Systems course. + +## Overview + +The setup module teaches you the complete TinyTorch development workflow while introducing fundamental programming concepts. You'll learn to write code with NBDev directives, implement classes and functions, and understand the module-to-package export system. + +## Learning Goals + +- Understand the nbdev notebook-to-Python workflow +- Write your first TinyTorch code with `#| export` directives +- Implement system information collection and developer profiles +- Run tests and use the CLI tools +- Get comfortable with the development rhythm + +## Files + +- `setup_dev.py` - Main development file (Jupytext format with full educational content) +- `setup_dev.ipynb` - Jupyter notebook version (auto-generated and executed) +- `tinytorch_flame.txt` - ASCII art file containing the TinyTorch flame design +- `tests/test_setup.py` - Comprehensive pytest test suite +- `README.md` - This file + +## What You'll Implement + +### 1. Basic Functions +- `hello_tinytorch()` - Display ASCII art and welcome message +- `add_numbers()` - Basic arithmetic (foundation of ML operations) + +### 2. System Information Class +- `SystemInfo` - Collect and display Python version, platform, and machine info +- Compatibility checking for minimum requirements + +### 3. Developer Profile Class +- `DeveloperProfile` - Personalized developer information and signatures +- ASCII art customization and file loading +- Professional code attribution system + +## Usage + +### Python Script +```python +from setup_dev import hello_tinytorch, add_numbers, SystemInfo, DeveloperProfile + +# Display welcome message +hello_tinytorch() + +# Basic arithmetic +result = add_numbers(2, 3) + +# System information +info = SystemInfo() +print(f"System: {info}") +print(f"Compatible: {info.is_compatible()}") + +# Developer profile +profile = DeveloperProfile() +print(profile.get_full_profile()) +``` + +### Jupyter Notebook +Open `setup_dev.ipynb` and work through the educational content step by step. + +## Testing + +Run the comprehensive test suite using pytest: + +```bash +# Using the TinyTorch CLI (recommended) +python bin/tito.py test --module setup + +# Or directly with pytest +python -m pytest modules/setup/tests/test_setup.py -v +``` + +### Test Coverage + +The test suite includes **20 comprehensive tests** covering: +- βœ… **Function execution** - All functions run without errors +- βœ… **Output validation** - Correct content and formatting +- βœ… **Arithmetic operations** - Basic, negative, and floating-point math +- βœ… **System information** - Platform detection and compatibility +- βœ… **Developer profiles** - Default and custom configurations +- βœ… **ASCII art handling** - File loading and fallback behavior +- βœ… **Error recovery** - Graceful handling of missing files +- βœ… **Integration testing** - All components work together + +## Development Workflow + +This module teaches the core TinyTorch development cycle: + +1. **Write code** in the notebook using `#| export` directives +2. **Export code** with `python bin/tito.py sync --module setup` +3. **Run tests** with `python bin/tito.py test --module setup` +4. **Check progress** with `python bin/tito.py info` + +## Key Concepts + +- **NBDev workflow** - Write in notebooks, export to Python packages +- **Export directives** - Use `#| export` to mark code for export +- **Module β†’ Package mapping** - This module exports to `tinytorch/core/utils.py` +- **Teaching vs. Building** - Learn by modules, build by function +- **Student implementation** - TODO sections with instructor solutions hidden + +## Personalization Features + +### ASCII Art Customization +The ASCII art is loaded from `tinytorch_flame.txt`. You can customize it by: + +1. **Edit the file directly** - Modify `tinytorch_flame.txt` with your own ASCII art +2. **Custom parameter** - Pass your own ASCII art to `DeveloperProfile` +3. **Create your own design** - Your initials, logo, or motivational art + +### Developer Profile Customization +```python +my_profile = DeveloperProfile( + name="Your Name", + affiliation="Your University", + email="your.email@example.com", + github_username="yourgithub", + ascii_art="Your custom ASCII art here!" +) +``` + +## What You'll Learn + +This comprehensive module introduces: +- **NBDev educational patterns** - `#| export`, `#| hide` directives +- **File I/O operations** - Loading ASCII art with error handling +- **Object-oriented programming** - Classes, methods, and properties +- **System programming** - Platform detection and compatibility +- **Testing with pytest** - Professional test structure and assertions +- **Code organization** - Module structure and package exports +- **The TinyTorch development workflow** - Complete cycle from code to tests + +## Next Steps + +Once you've completed this module and all tests pass, you're ready to move on to the **tensor module** where you'll build the core data structures that power TinyTorch neural networks! + +The skills you learn here - the development workflow, testing patterns, and code organization - will be used throughout every module in TinyTorch. \ No newline at end of file diff --git a/_proc/setup/setup_dev b/_proc/setup/setup_dev new file mode 100644 index 00000000..a7670175 --- /dev/null +++ b/_proc/setup/setup_dev @@ -0,0 +1,535 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.17.1 +# --- + +# %% [markdown] +""" +# Module 0: Setup - TinyπŸ”₯Torch Development Workflow + +Welcome to TinyTorch! This module teaches you the development workflow you'll use throughout the course. + +## Learning Goals +- Understand the nbdev notebook-to-Python workflow +- Write your first TinyTorch code +- Run tests and use the CLI tools +- Get comfortable with the development rhythm + +## The TinyTorch Development Cycle + +1. **Write code** in this notebook using `#| export` +2. **Export code** with `python bin/tito.py sync --module setup` +3. **Run tests** with `python bin/tito.py test --module setup` +4. **Check progress** with `python bin/tito.py info` + +Let's get started! +""" + +# %% +#| default_exp core.utils + +# Setup imports and environment +import sys +import platform +from datetime import datetime +import os +from pathlib import Path + +print("πŸ”₯ TinyTorch Development Environment") +print(f"Python {sys.version}") +print(f"Platform: {platform.system()} {platform.release()}") +print(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") + +# %% [markdown] +""" +## Step 1: Understanding the Module β†’ Package Structure + +**πŸŽ“ Teaching vs. πŸ”§ Building**: This course has two sides: +- **Teaching side**: You work in `modules/setup/setup_dev.ipynb` (learning-focused) +- **Building side**: Your code exports to `tinytorch/core/utils.py` (production package) + +**Key Concept**: The `#| default_exp core.utils` directive at the top tells nbdev to export all `#| export` cells to `tinytorch/core/utils.py`. + +This separation allows us to: +- Organize learning by **concepts** (modules) +- Organize code by **function** (package structure) +- Build a real ML framework while learning systematically + +Let's write a simple "Hello World" function with the `#| export` directive: +""" + +# %% +#| export +def hello_tinytorch(): + """ + A simple hello world function for TinyTorch. + + TODO: Implement this function to display TinyTorch ASCII art and welcome message. + Load the flame art from tinytorch_flame.txt file with graceful fallback. + """ + raise NotImplementedError("Student implementation required") + +def add_numbers(a, b): + """ + Add two numbers together. + + TODO: Implement addition of two numbers. + This is the foundation of all mathematical operations in ML. + """ + raise NotImplementedError("Student implementation required") + +# %% +#| hide +#| export +def hello_tinytorch(): + """Display the TinyTorch ASCII art and welcome message.""" + try: + # Get the directory containing this file + current_dir = Path(__file__).parent + art_file = current_dir / "tinytorch_flame.txt" + + if art_file.exists(): + with open(art_file, 'r') as f: + ascii_art = f.read() + print(ascii_art) + print("TinyπŸ”₯Torch") + print("Build ML Systems from Scratch!") + else: + print("πŸ”₯ TinyTorch πŸ”₯") + print("Build ML Systems from Scratch!") + except NameError: + # Handle case when running in notebook where __file__ is not defined + try: + art_file = Path(os.getcwd()) / "tinytorch_flame.txt" + if art_file.exists(): + with open(art_file, 'r') as f: + ascii_art = f.read() + print(ascii_art) + print("TinyπŸ”₯Torch") + print("Build ML Systems from Scratch!") + else: + print("πŸ”₯ TinyTorch πŸ”₯") + print("Build ML Systems from Scratch!") + except: + print("πŸ”₯ TinyTorch πŸ”₯") + print("Build ML Systems from Scratch!") + +def add_numbers(a, b): + """Add two numbers together.""" + return a + b + +# %% [markdown] +""" +### πŸ§ͺ Test Your Implementation + +Once you implement the functions above, run this cell to test them: +""" + +# %% +# Test the functions in the notebook (will fail until implemented) +try: + print("Testing hello_tinytorch():") + hello_tinytorch() + print() + print("Testing add_numbers():") + print(f"2 + 3 = {add_numbers(2, 3)}") +except NotImplementedError as e: + print(f"⚠️ {e}") + print("Implement the functions above first!") + +# %% [markdown] +""" +## Step 2: A Simple Class + +Let's create a simple class that will help us understand system information. This is still basic, but shows how to structure classes in TinyTorch. +""" + +# %% +#| export +class SystemInfo: + """ + Simple system information class. + + TODO: Implement this class to collect and display system information. + """ + + def __init__(self): + """ + Initialize system information collection. + + TODO: Collect Python version, platform, and machine information. + """ + raise NotImplementedError("Student implementation required") + + def __str__(self): + """ + Return human-readable system information. + + TODO: Format system info as a readable string. + """ + raise NotImplementedError("Student implementation required") + + def is_compatible(self): + """ + Check if system meets minimum requirements. + + TODO: Check if Python version is >= 3.8 + """ + raise NotImplementedError("Student implementation required") + +# %% +#| hide +#| export +class SystemInfo: + """Simple system information class.""" + + def __init__(self): + self.python_version = sys.version_info + self.platform = platform.system() + self.machine = platform.machine() + + def __str__(self): + return f"Python {self.python_version.major}.{self.python_version.minor} on {self.platform} ({self.machine})" + + def is_compatible(self): + """Check if system meets minimum requirements.""" + return self.python_version >= (3, 8) + +# %% [markdown] +""" +### πŸ§ͺ Test Your SystemInfo Class + +Once you implement the SystemInfo class above, run this cell to test it: +""" + +# %% +# Test the SystemInfo class (will fail until implemented) +try: + print("Testing SystemInfo class:") + info = SystemInfo() + print(f"System: {info}") + print(f"Compatible: {info.is_compatible()}") +except NotImplementedError as e: + print(f"⚠️ {e}") + print("Implement the SystemInfo class above first!") + +# %% [markdown] +""" +## Step 3: Developer Personalization + +Let's make TinyTorch yours! Create a developer profile that will identify you throughout your ML systems journey. +""" + +# %% +#| export +class DeveloperProfile: + """ + Developer profile for personalizing TinyTorch experience. + + TODO: Implement this class to store and display developer information. + Default to course instructor but allow students to personalize. + """ + + @staticmethod + def _load_default_flame(): + """ + Load the default TinyTorch flame ASCII art from file. + + TODO: Implement file loading for tinytorch_flame.txt with fallback. + """ + raise NotImplementedError("Student implementation required") + + def __init__(self, name="Vijay Janapa Reddi", affiliation="Harvard University", + email="vj@eecs.harvard.edu", github_username="profvjreddi", ascii_art=None): + """ + Initialize developer profile. + + TODO: Store developer information with sensible defaults. + Students should be able to customize this with their own info and ASCII art. + """ + raise NotImplementedError("Student implementation required") + + def __str__(self): + """ + Return formatted developer information. + + TODO: Format developer info as a professional signature with optional ASCII art. + """ + raise NotImplementedError("Student implementation required") + + def get_signature(self): + """ + Get a short signature for code headers. + + TODO: Return a concise signature like "Built by Name (@github)" + """ + raise NotImplementedError("Student implementation required") + + def get_ascii_art(self): + """ + Get ASCII art for the profile. + + TODO: Return custom ASCII art or default flame loaded from file. + """ + raise NotImplementedError("Student implementation required") + +# %% +#| hide +#| export +class DeveloperProfile: + """Developer profile for personalizing TinyTorch experience.""" + + @staticmethod + def _load_default_flame(): + """Load the default TinyTorch flame ASCII art from file.""" + try: + # Try to load from the same directory as this module + try: + # Try to get the directory of the current file + current_dir = os.path.dirname(__file__) + except NameError: + # If __file__ is not defined (e.g., in notebook), use current directory + current_dir = os.getcwd() + + flame_path = os.path.join(current_dir, 'tinytorch_flame.txt') + + with open(flame_path, 'r', encoding='utf-8') as f: + flame_art = f.read() + + # Add the TinyπŸ”₯Torch text below the flame + return f"""{flame_art} + + TinyπŸ”₯Torch + Build ML Systems from Scratch! + """ + except (FileNotFoundError, IOError): + # Fallback to simple flame if file not found + return """ + πŸ”₯ TinyTorch Developer πŸ”₯ + . . . . . . + . . . . . . + . . . . . . . + . . . . . . . . + . . . . . . . . . + . . . . . . . . . . + . . . . . . . . . . . + . . . . . . . . . . . . + . . . . . . . . . . . . . +. . . . . . . . . . . . . . + \\ \\ \\ \\ \\ \\ \\ \\ \\ / / / / / / + \\ \\ \\ \\ \\ \\ \\ \\ / / / / / / + \\ \\ \\ \\ \\ \\ \\ / / / / / / + \\ \\ \\ \\ \\ \\ / / / / / / + \\ \\ \\ \\ \\ / / / / / / + \\ \\ \\ \\ / / / / / / + \\ \\ \\ / / / / / / + \\ \\ / / / / / / + \\ / / / / / / + \\/ / / / / / + \\/ / / / / + \\/ / / / + \\/ / / + \\/ / + \\/ + + TinyπŸ”₯Torch + Build ML Systems from Scratch! + """ + + def __init__(self, name="Vijay Janapa Reddi", affiliation="Harvard University", + email="vj@eecs.harvard.edu", github_username="profvjreddi", ascii_art=None): + self.name = name + self.affiliation = affiliation + self.email = email + self.github_username = github_username + self.ascii_art = ascii_art or self._load_default_flame() + + def __str__(self): + return f"πŸ‘¨β€πŸ’» {self.name} | {self.affiliation} | @{self.github_username}" + + def get_signature(self): + """Get a short signature for code headers.""" + return f"Built by {self.name} (@{self.github_username})" + + def get_ascii_art(self): + """Get ASCII art for the profile.""" + return self.ascii_art + + def get_full_profile(self): + """Get complete profile with ASCII art.""" + return f"""{self.ascii_art} + +πŸ‘¨β€πŸ’» Developer: {self.name} +πŸ›οΈ Affiliation: {self.affiliation} +πŸ“§ Email: {self.email} +πŸ™ GitHub: @{self.github_username} +πŸ”₯ Ready to build ML systems from scratch! +""" + +# %% [markdown] +""" +### πŸ§ͺ Test Your Developer Profile + +Customize your developer profile! Replace the default information with your own: +""" + +# %% +# Test the DeveloperProfile class +try: + print("Testing DeveloperProfile (with defaults):") + # Default profile (instructor) + default_profile = DeveloperProfile() + print(f"Profile: {default_profile}") + print(f"Signature: {default_profile.get_signature()}") + print() + + print("🎨 ASCII Art Preview:") + print(default_profile.get_ascii_art()) + print() + + print("πŸ”₯ Full Profile Display:") + print(default_profile.get_full_profile()) + print() + + # TODO: Students should customize this with their own information! + print("🎯 YOUR TURN: Create your own profile!") + print("Uncomment and modify the lines below:") + print("# my_profile = DeveloperProfile(") + print("# name='Your Name',") + print("# affiliation='Your University/Company',") + print("# email='your.email@example.com',") + print("# github_username='yourgithub',") + print("# ascii_art='''") + print("# Your Custom ASCII Art Here!") + print("# Maybe your initials, a logo, or something fun!") + print("# '''") + print("# )") + print("# print(f'My Profile: {my_profile}')") + print("# print(f'My Signature: {my_profile.get_signature()}')") + print("# print(my_profile.get_full_profile())") + +except NotImplementedError as e: + print(f"⚠️ {e}") + print("Implement the DeveloperProfile class above first!") + +# %% [markdown] +""" +### 🎨 Personalization Challenge + +**For Students**: Make TinyTorch truly yours by: + +1. **Update your profile** in the cell above with your real information +2. **Create custom ASCII art** - your initials, a simple logo, or something that represents you +3. **Customize the flame file** - edit `tinytorch_flame.txt` to create your own default art +4. **Add your signature** to code you write throughout the course +5. **Show off your full profile** with the `get_full_profile()` method + +This isn't just about customization - it's about taking ownership of your learning journey in ML systems! + +**ASCII Art Customization Options:** + +**Option 1: Custom ASCII Art Parameter** +```python +my_profile = DeveloperProfile( + name="Your Name", + ascii_art=''' + Your Custom ASCII Art Here! + Maybe your initials, a logo, or something fun! + ''' +) +``` + +**Option 2: Edit the Default Flame File** +- Edit `tinytorch_flame.txt` in this directory +- Replace with your own ASCII art design +- All students using defaults will see your custom art! + +**ASCII Art Ideas:** +- Your initials in block letters +- A simple logo or symbol that represents you +- Your university mascot in ASCII +- A coding-themed design +- Something that motivates you! + +**Pro Tip**: The `tinytorch_flame.txt` file contains the beautiful default flame art. You can: +- Edit it directly for a personalized default +- Create your own `.txt` file and modify the code to load it +- Use online ASCII art generators for inspiration +""" + +# %% [markdown] +""" +## Step 4: Try the Export Process + +Now let's export our code! In your terminal, run: + +```bash +python bin/tito.py sync --module setup +``` + +This will export the code marked with `#| export` to `tinytorch/core/utils.py`. + +**What happens during export:** +1. nbdev scans this notebook for `#| export` cells +2. Extracts the Python code +3. Writes it to `tinytorch/core/utils.py` (because of `#| default_exp core.utils`) +4. Handles imports and dependencies automatically + +**πŸ” Verification**: After export, check `tinytorch/core/utils.py` - you'll see your functions there with auto-generated headers pointing back to this notebook! + +**Note**: The export process will use the instructor solutions (from `#|hide` cells) so the package will have working implementations even if you haven't completed the exercises yet. +""" + +# %% [markdown] +""" +## Step 5: Run Tests + +After exporting, run the tests: + +```bash +python bin/tito.py test --module setup +``` + +This will run all tests for the setup module and verify your implementation works correctly. + +## Step 6: Check Your Progress + +See your overall progress: + +```bash +python bin/tito.py info +``` + +This shows which modules are complete and which are pending. +""" + +# %% [markdown] +""" +## πŸŽ‰ Congratulations! + +You've learned the TinyTorch development workflow: + +1. βœ… Write code in notebooks with `#| export` +2. βœ… Export with `tito sync --module setup` +3. βœ… Test with `tito test --module setup` +4. βœ… Check progress with `tito info` + +**This is the rhythm you'll use for every module in TinyTorch.** + +### Next Steps + +Ready for the real work? Head to **Module 1: Tensor** where you'll build the core data structures that power everything else in TinyTorch. + +**Development Tips:** +- Always test your code in the notebook first +- Export frequently to catch issues early +- Read error messages carefully - they're designed to help +- When stuck, check if your code exports cleanly first + +Happy building! πŸ”₯ +""" diff --git a/_proc/setup/setup_dev.ipynb b/_proc/setup/setup_dev.ipynb new file mode 100644 index 00000000..2e862a19 --- /dev/null +++ b/_proc/setup/setup_dev.ipynb @@ -0,0 +1,703 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "output-file: setup_dev.html\n", + "title: \"Module 0: Setup - Tiny\\U0001F525Torch Development Workflow\"\n", + "\n", + "---\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "id": "a1a9c143", + "metadata": { + "cell_marker": "\"\"\"", + "lines_to_next_cell": 1 + }, + "source": [ + "## Step 1: Understanding the Module β†’ Package Structure\n", + "\n", + "**πŸŽ“ Teaching vs. πŸ”§ Building**: This course has two sides:\n", + "- **Teaching side**: You work in `modules/setup/setup_dev.ipynb` (learning-focused)\n", + "- **Building side**: Your code exports to `tinytorch/core/utils.py` (production package)\n", + "\n", + "**Key Concept**: The `#| default_exp core.utils` directive at the top tells nbdev to export all `#| export` cells to `tinytorch/core/utils.py`.\n", + "\n", + "This separation allows us to:\n", + "- Organize learning by **concepts** (modules) \n", + "- Organize code by **function** (package structure)\n", + "- Build a real ML framework while learning systematically\n", + "\n", + "Let's write a simple \"Hello World\" function with the `#| export` directive:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "has_sd": true, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "### add_numbers\n", + "\n", + "> add_numbers (a, b)\n", + "\n", + "*Add two numbers together.\n", + "\n", + "TODO: Implement addition of two numbers.\n", + "This is the foundation of all mathematical operations in ML.*" + ], + "text/plain": [ + "---\n", + "\n", + "### add_numbers\n", + "\n", + "> add_numbers (a, b)\n", + "\n", + "*Add two numbers together.\n", + "\n", + "TODO: Implement addition of two numbers.\n", + "This is the foundation of all mathematical operations in ML.*" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#| echo: false\n", + "#| output: asis\n", + "show_doc(add_numbers)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "has_sd": true, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "### hello_tinytorch\n", + "\n", + "> hello_tinytorch ()\n", + "\n", + "*A simple hello world function for TinyTorch.\n", + "\n", + "TODO: Implement this function to display TinyTorch ASCII art and welcome message.\n", + "Load the flame art from tinytorch_flame.txt file with graceful fallback.*" + ], + "text/plain": [ + "---\n", + "\n", + "### hello_tinytorch\n", + "\n", + "> hello_tinytorch ()\n", + "\n", + "*A simple hello world function for TinyTorch.\n", + "\n", + "TODO: Implement this function to display TinyTorch ASCII art and welcome message.\n", + "Load the flame art from tinytorch_flame.txt file with graceful fallback.*" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#| echo: false\n", + "#| output: asis\n", + "show_doc(hello_tinytorch)" + ] + }, + { + "cell_type": "markdown", + "id": "b28103af", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "### πŸ§ͺ Test Your Implementation\n", + "\n", + "Once you implement the functions above, run this cell to test them:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a1beca72", + "metadata": { + "execution": { + "iopub.execute_input": "2025-07-10T23:28:59.088616Z", + "iopub.status.busy": "2025-07-10T23:28:59.088506Z", + "iopub.status.idle": "2025-07-10T23:28:59.091981Z", + "shell.execute_reply": "2025-07-10T23:28:59.091554Z" + }, + "language": "python" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Testing hello_tinytorch():\n", + ". . ... ....... .... ... . . .. .... . .. . . . . . .... \n", + ". . .. .++. .. . . . .. ... . . . .. ... .. \n", + " . . . .=++=.. . . . .. . . .. . ... .. . . \n", + ". .. ... .++++=. . . . . .. . .. .\n", + ". . . . ....-+++++.... ... .. . .... .. . . . . . . . . . . . . \n", + " . .. ...-++++++-...... .. . ..... ..-:.. .. . .... .. . . .. . .. . . . \n", + " .. .. ..++++++++-.. . . ..##... -%#. . . . . . \n", + ". .. .:+++++++++.... ... . ...:%%:............:-:. ..... ...... . . ....... .. . . \n", + " ..+++++++++++. ... . .. .=#%%##+.-##..#%####%%=.=%%. .*%+.. . . . ... \n", + " . ..++++++++++++...-++..... . .%%... -##..##=...=%#..*%*..=%#.. . .. ... . . . . .. . ...\n", + " ..-+++++++++++++..=++++... .....%#.. -##..#%-.. -##. .%%=.%%.. . . . . . ... .\n", + ". .=++++++++++++++-+++++++.... . ...%%:...-##..#%-. .-%#. ..#%#%=.. . .. ... . . . .\n", + "..=+++++++++++++++++++++++-. . ..=%%%+.-%#..##-. .-%#....-%%*.. . .. . .. .. .. \n", + ".:+++++++++++=+++++++++++++. . ................ .......-%%... . .. . . .. . \n", + ".++++++++++===+++++++++++++: . .................... . ...%%%#:........ . .. ..... ......... ....\n", + ":+++++++++====+++++++++++++=.. ...-----------.....-+#*=:.....-------:.......:=*#+-.. ..--:.....--=.\n", + ":++++++++======++++++++++++=.. ...#%%%%%%%%%#..-#%%###%%#=...#%####%%%=...+%%%###%%#...#%+.. ..#%%.\n", + ".+++++++========+++++++++++- .. .#%%.. ..-%%+.. ..-%%+..#%*.. .*%%..*%%:. ..#%*..#%+... .#%%.\n", + ".=++++++==========+++++++++: . .#%%.....#%#.... .*%#..#%*...-%%*..#%+. ... . ..##%#####%%%.\n", + "..++++++===========+++++++-. . ...#%%. . .#%#. . .*%#..#%%%%%%#-. .#%+. . ....#%*-----#%%.\n", + "...+++++===========++++++=. . . . .#%%... -%%+.....=%%+..#%*..+%%-. .*%%-.....#%*..%%+.. ..%%%.\n", + ". ..-+++===========+++++.. . .. ..#%%. .:%%%###%%%=...#%*...+%%=...+%%####%%#...%%+.. ..%%%.\n", + " . ...-++==========+++:.... ... . .===. ... ..-+++=.. ..-=-....-==: ..:=+++-.. ..==-... .===.\n", + " ....-+=======+-...... .. . . ... . . .. ... . . .... . . . . ..... . ... ..... .\n", + " .... . ......:..... ... . .. . ... . . ... . . . ... . . . ... .. ..... . . \n", + "\n", + "TinyπŸ”₯Torch\n", + "Build ML Systems from Scratch!\n", + "\n", + "Testing add_numbers():\n", + "2 + 3 = 5\n" + ] + } + ], + "source": [ + "# Test the functions in the notebook (will fail until implemented)\n", + "try:\n", + " print(\"Testing hello_tinytorch():\")\n", + " hello_tinytorch()\n", + " print()\n", + " print(\"Testing add_numbers():\")\n", + " print(f\"2 + 3 = {add_numbers(2, 3)}\")\n", + "except NotImplementedError as e:\n", + " print(f\"⚠️ {e}\")\n", + " print(\"Implement the functions above first!\")" + ] + }, + { + "cell_type": "markdown", + "id": "887b9723", + "metadata": { + "cell_marker": "\"\"\"", + "lines_to_next_cell": 1 + }, + "source": [ + "## Step 2: A Simple Class\n", + "\n", + "Let's create a simple class that will help us understand system information. This is still basic, but shows how to structure classes in TinyTorch." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "has_sd": true, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "### SystemInfo\n", + "\n", + "> SystemInfo ()\n", + "\n", + "*Simple system information class.\n", + "\n", + "TODO: Implement this class to collect and display system information.*" + ], + "text/plain": [ + "---\n", + "\n", + "### SystemInfo\n", + "\n", + "> SystemInfo ()\n", + "\n", + "*Simple system information class.\n", + "\n", + "TODO: Implement this class to collect and display system information.*" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#| echo: false\n", + "#| output: asis\n", + "show_doc(SystemInfo)" + ] + }, + { + "cell_type": "markdown", + "id": "a182b8ad", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "### πŸ§ͺ Test Your SystemInfo Class\n", + "\n", + "Once you implement the SystemInfo class above, run this cell to test it:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "bfd7d3c4", + "metadata": { + "execution": { + "iopub.execute_input": "2025-07-10T23:28:59.101366Z", + "iopub.status.busy": "2025-07-10T23:28:59.101221Z", + "iopub.status.idle": "2025-07-10T23:28:59.103476Z", + "shell.execute_reply": "2025-07-10T23:28:59.103228Z" + }, + "language": "python" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Testing SystemInfo class:\n", + "System: Python 3.13 on Darwin (arm64)\n", + "Compatible: True\n" + ] + } + ], + "source": [ + "# Test the SystemInfo class (will fail until implemented)\n", + "try:\n", + " print(\"Testing SystemInfo class:\")\n", + " info = SystemInfo()\n", + " print(f\"System: {info}\")\n", + " print(f\"Compatible: {info.is_compatible()}\")\n", + "except NotImplementedError as e:\n", + " print(f\"⚠️ {e}\")\n", + " print(\"Implement the SystemInfo class above first!\")" + ] + }, + { + "cell_type": "markdown", + "id": "9a14de41", + "metadata": { + "cell_marker": "\"\"\"", + "lines_to_next_cell": 1 + }, + "source": [ + "## Step 3: Developer Personalization\n", + "\n", + "Let's make TinyTorch yours! Create a developer profile that will identify you throughout your ML systems journey." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "has_sd": true, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "---\n", + "\n", + "### DeveloperProfile\n", + "\n", + "> DeveloperProfile (name='Vijay Janapa Reddi', affiliation='Harvard\n", + "> University', email='vj@eecs.harvard.edu',\n", + "> github_username='profvjreddi', ascii_art=None)\n", + "\n", + "*Developer profile for personalizing TinyTorch experience.\n", + "\n", + "TODO: Implement this class to store and display developer information.\n", + "Default to course instructor but allow students to personalize.*" + ], + "text/plain": [ + "---\n", + "\n", + "### DeveloperProfile\n", + "\n", + "> DeveloperProfile (name='Vijay Janapa Reddi', affiliation='Harvard\n", + "> University', email='vj@eecs.harvard.edu',\n", + "> github_username='profvjreddi', ascii_art=None)\n", + "\n", + "*Developer profile for personalizing TinyTorch experience.\n", + "\n", + "TODO: Implement this class to store and display developer information.\n", + "Default to course instructor but allow students to personalize.*" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#| echo: false\n", + "#| output: asis\n", + "show_doc(DeveloperProfile)" + ] + }, + { + "cell_type": "markdown", + "id": "b848981d", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "### πŸ§ͺ Test Your Developer Profile\n", + "\n", + "Customize your developer profile! Replace the default information with your own:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "5d80e79c", + "metadata": { + "execution": { + "iopub.execute_input": "2025-07-10T23:28:59.114628Z", + "iopub.status.busy": "2025-07-10T23:28:59.114540Z", + "iopub.status.idle": "2025-07-10T23:28:59.118055Z", + "shell.execute_reply": "2025-07-10T23:28:59.117792Z" + }, + "language": "python" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Testing DeveloperProfile (with defaults):\n", + "Profile: πŸ‘¨β€πŸ’» Vijay Janapa Reddi | Harvard University | @profvjreddi\n", + "Signature: Built by Vijay Janapa Reddi (@profvjreddi)\n", + "\n", + "🎨 ASCII Art Preview:\n", + ". . ... ....... .... ... . . .. .... . .. . . . . . .... \n", + ". . .. .++. .. . . . .. ... . . . .. ... .. \n", + " . . . .=++=.. . . . .. . . .. . ... .. . . \n", + ". .. ... .++++=. . . . . .. . .. .\n", + ". . . . ....-+++++.... ... .. . .... .. . . . . . . . . . . . . \n", + " . .. ...-++++++-...... .. . ..... ..-:.. .. . .... .. . . .. . .. . . . \n", + " .. .. ..++++++++-.. . . ..##... -%#. . . . . . \n", + ". .. .:+++++++++.... ... . ...:%%:............:-:. ..... ...... . . ....... .. . . \n", + " ..+++++++++++. ... . .. .=#%%##+.-##..#%####%%=.=%%. .*%+.. . . . ... \n", + " . ..++++++++++++...-++..... . .%%... -##..##=...=%#..*%*..=%#.. . .. ... . . . . .. . ...\n", + " ..-+++++++++++++..=++++... .....%#.. -##..#%-.. -##. .%%=.%%.. . . . . . ... .\n", + ". .=++++++++++++++-+++++++.... . ...%%:...-##..#%-. .-%#. ..#%#%=.. . .. ... . . . .\n", + "..=+++++++++++++++++++++++-. . ..=%%%+.-%#..##-. .-%#....-%%*.. . .. . .. .. .. \n", + ".:+++++++++++=+++++++++++++. . ................ .......-%%... . .. . . .. . \n", + ".++++++++++===+++++++++++++: . .................... . ...%%%#:........ . .. ..... ......... ....\n", + ":+++++++++====+++++++++++++=.. ...-----------.....-+#*=:.....-------:.......:=*#+-.. ..--:.....--=.\n", + ":++++++++======++++++++++++=.. ...#%%%%%%%%%#..-#%%###%%#=...#%####%%%=...+%%%###%%#...#%+.. ..#%%.\n", + ".+++++++========+++++++++++- .. .#%%.. ..-%%+.. ..-%%+..#%*.. .*%%..*%%:. ..#%*..#%+... .#%%.\n", + ".=++++++==========+++++++++: . .#%%.....#%#.... .*%#..#%*...-%%*..#%+. ... . ..##%#####%%%.\n", + "..++++++===========+++++++-. . ...#%%. . .#%#. . .*%#..#%%%%%%#-. .#%+. . ....#%*-----#%%.\n", + "...+++++===========++++++=. . . . .#%%... -%%+.....=%%+..#%*..+%%-. .*%%-.....#%*..%%+.. ..%%%.\n", + ". ..-+++===========+++++.. . .. ..#%%. .:%%%###%%%=...#%*...+%%=...+%%####%%#...%%+.. ..%%%.\n", + " . ...-++==========+++:.... ... . .===. ... ..-+++=.. ..-=-....-==: ..:=+++-.. ..==-... .===.\n", + " ....-+=======+-...... .. . . ... . . .. ... . . .... . . . . ..... . ... ..... .\n", + " .... . ......:..... ... . .. . ... . . ... . . . ... . . . ... .. ..... . . \n", + "\n", + "\n", + " TinyπŸ”₯Torch\n", + " Build ML Systems from Scratch!\n", + " \n", + "\n", + "πŸ”₯ Full Profile Display:\n", + ". . ... ....... .... ... . . .. .... . .. . . . . . .... \n", + ". . .. .++. .. . . . .. ... . . . .. ... .. \n", + " . . . .=++=.. . . . .. . . .. . ... .. . . \n", + ". .. ... .++++=. . . . . .. . .. .\n", + ". . . . ....-+++++.... ... .. . .... .. . . . . . . . . . . . . \n", + " . .. ...-++++++-...... .. . ..... ..-:.. .. . .... .. . . .. . .. . . . \n", + " .. .. ..++++++++-.. . . ..##... -%#. . . . . . \n", + ". .. .:+++++++++.... ... . ...:%%:............:-:. ..... ...... . . ....... .. . . \n", + " ..+++++++++++. ... . .. .=#%%##+.-##..#%####%%=.=%%. .*%+.. . . . ... \n", + " . ..++++++++++++...-++..... . .%%... -##..##=...=%#..*%*..=%#.. . .. ... . . . . .. . ...\n", + " ..-+++++++++++++..=++++... .....%#.. -##..#%-.. -##. .%%=.%%.. . . . . . ... .\n", + ". .=++++++++++++++-+++++++.... . ...%%:...-##..#%-. .-%#. ..#%#%=.. . .. ... . . . .\n", + "..=+++++++++++++++++++++++-. . ..=%%%+.-%#..##-. .-%#....-%%*.. . .. . .. .. .. \n", + ".:+++++++++++=+++++++++++++. . ................ .......-%%... . .. . . .. . \n", + ".++++++++++===+++++++++++++: . .................... . ...%%%#:........ . .. ..... ......... ....\n", + ":+++++++++====+++++++++++++=.. ...-----------.....-+#*=:.....-------:.......:=*#+-.. ..--:.....--=.\n", + ":++++++++======++++++++++++=.. ...#%%%%%%%%%#..-#%%###%%#=...#%####%%%=...+%%%###%%#...#%+.. ..#%%.\n", + ".+++++++========+++++++++++- .. .#%%.. ..-%%+.. ..-%%+..#%*.. .*%%..*%%:. ..#%*..#%+... .#%%.\n", + ".=++++++==========+++++++++: . .#%%.....#%#.... .*%#..#%*...-%%*..#%+. ... . ..##%#####%%%.\n", + "..++++++===========+++++++-. . ...#%%. . .#%#. . .*%#..#%%%%%%#-. .#%+. . ....#%*-----#%%.\n", + "...+++++===========++++++=. . . . .#%%... -%%+.....=%%+..#%*..+%%-. .*%%-.....#%*..%%+.. ..%%%.\n", + ". ..-+++===========+++++.. . .. ..#%%. .:%%%###%%%=...#%*...+%%=...+%%####%%#...%%+.. ..%%%.\n", + " . ...-++==========+++:.... ... . .===. ... ..-+++=.. ..-=-....-==: ..:=+++-.. ..==-... .===.\n", + " ....-+=======+-...... .. . . ... . . .. ... . . .... . . . . ..... . ... ..... .\n", + " .... . ......:..... ... . .. . ... . . ... . . . ... . . . ... .. ..... . . \n", + "\n", + "\n", + " TinyπŸ”₯Torch\n", + " Build ML Systems from Scratch!\n", + " \n", + "\n", + "πŸ‘¨β€πŸ’» Developer: Vijay Janapa Reddi\n", + "πŸ›οΈ Affiliation: Harvard University\n", + "πŸ“§ Email: vj@eecs.harvard.edu\n", + "πŸ™ GitHub: @profvjreddi\n", + "πŸ”₯ Ready to build ML systems from scratch!\n", + "\n", + "\n", + "🎯 YOUR TURN: Create your own profile!\n", + "Uncomment and modify the lines below:\n", + "# my_profile = DeveloperProfile(\n", + "# name='Your Name',\n", + "# affiliation='Your University/Company',\n", + "# email='your.email@example.com',\n", + "# github_username='yourgithub',\n", + "# ascii_art='''\n", + "# Your Custom ASCII Art Here!\n", + "# Maybe your initials, a logo, or something fun!\n", + "# '''\n", + "# )\n", + "# print(f'My Profile: {my_profile}')\n", + "# print(f'My Signature: {my_profile.get_signature()}')\n", + "# print(my_profile.get_full_profile())\n" + ] + } + ], + "source": [ + "# Test the DeveloperProfile class\n", + "try:\n", + " print(\"Testing DeveloperProfile (with defaults):\")\n", + " # Default profile (instructor)\n", + " default_profile = DeveloperProfile()\n", + " print(f\"Profile: {default_profile}\")\n", + " print(f\"Signature: {default_profile.get_signature()}\")\n", + " print()\n", + " \n", + " print(\"🎨 ASCII Art Preview:\")\n", + " print(default_profile.get_ascii_art())\n", + " print()\n", + " \n", + " print(\"πŸ”₯ Full Profile Display:\")\n", + " print(default_profile.get_full_profile())\n", + " print()\n", + " \n", + " # TODO: Students should customize this with their own information!\n", + " print(\"🎯 YOUR TURN: Create your own profile!\")\n", + " print(\"Uncomment and modify the lines below:\")\n", + " print(\"# my_profile = DeveloperProfile(\")\n", + " print(\"# name='Your Name',\")\n", + " print(\"# affiliation='Your University/Company',\")\n", + " print(\"# email='your.email@example.com',\")\n", + " print(\"# github_username='yourgithub',\")\n", + " print(\"# ascii_art='''\")\n", + " print(\"# Your Custom ASCII Art Here!\")\n", + " print(\"# Maybe your initials, a logo, or something fun!\")\n", + " print(\"# '''\")\n", + " print(\"# )\")\n", + " print(\"# print(f'My Profile: {my_profile}')\")\n", + " print(\"# print(f'My Signature: {my_profile.get_signature()}')\")\n", + " print(\"# print(my_profile.get_full_profile())\")\n", + " \n", + "except NotImplementedError as e:\n", + " print(f\"⚠️ {e}\")\n", + " print(\"Implement the DeveloperProfile class above first!\")" + ] + }, + { + "cell_type": "markdown", + "id": "4f117574", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "### 🎨 Personalization Challenge\n", + "\n", + "**For Students**: Make TinyTorch truly yours by:\n", + "\n", + "1. **Update your profile** in the cell above with your real information\n", + "2. **Create custom ASCII art** - your initials, a simple logo, or something that represents you\n", + "3. **Customize the flame file** - edit `tinytorch_flame.txt` to create your own default art\n", + "4. **Add your signature** to code you write throughout the course\n", + "5. **Show off your full profile** with the `get_full_profile()` method\n", + "\n", + "This isn't just about customization - it's about taking ownership of your learning journey in ML systems!\n", + "\n", + "**ASCII Art Customization Options:**\n", + "\n", + "**Option 1: Custom ASCII Art Parameter**\n", + "```python\n", + "my_profile = DeveloperProfile(\n", + " name=\"Your Name\",\n", + " ascii_art='''\n", + " Your Custom ASCII Art Here!\n", + " Maybe your initials, a logo, or something fun!\n", + " '''\n", + ")\n", + "```\n", + "\n", + "**Option 2: Edit the Default Flame File**\n", + "- Edit `tinytorch_flame.txt` in this directory\n", + "- Replace with your own ASCII art design\n", + "- All students using defaults will see your custom art!\n", + "\n", + "**ASCII Art Ideas:**\n", + "- Your initials in block letters\n", + "- A simple logo or symbol that represents you\n", + "- Your university mascot in ASCII\n", + "- A coding-themed design\n", + "- Something that motivates you!\n", + "\n", + "**Pro Tip**: The `tinytorch_flame.txt` file contains the beautiful default flame art. You can:\n", + "- Edit it directly for a personalized default\n", + "- Create your own `.txt` file and modify the code to load it\n", + "- Use online ASCII art generators for inspiration" + ] + }, + { + "cell_type": "markdown", + "id": "3e34c7fe", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "## Step 4: Try the Export Process\n", + "\n", + "Now let's export our code! In your terminal, run:\n", + "\n", + "```bash\n", + "python bin/tito.py sync --module setup\n", + "```\n", + "\n", + "This will export the code marked with `#| export` to `tinytorch/core/utils.py`.\n", + "\n", + "**What happens during export:**\n", + "1. nbdev scans this notebook for `#| export` cells\n", + "2. Extracts the Python code \n", + "3. Writes it to `tinytorch/core/utils.py` (because of `#| default_exp core.utils`)\n", + "4. Handles imports and dependencies automatically\n", + "\n", + "**πŸ” Verification**: After export, check `tinytorch/core/utils.py` - you'll see your functions there with auto-generated headers pointing back to this notebook!\n", + "\n", + "**Note**: The export process will use the instructor solutions (from `#|hide` cells) so the package will have working implementations even if you haven't completed the exercises yet." + ] + }, + { + "cell_type": "markdown", + "id": "641ad5d7", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "## Step 5: Run Tests\n", + "\n", + "After exporting, run the tests:\n", + "\n", + "```bash\n", + "python bin/tito.py test --module setup\n", + "```\n", + "\n", + "This will run all tests for the setup module and verify your implementation works correctly.\n", + "\n", + "## Step 6: Check Your Progress\n", + "\n", + "See your overall progress:\n", + "\n", + "```bash\n", + "python bin/tito.py info\n", + "```\n", + "\n", + "This shows which modules are complete and which are pending." + ] + }, + { + "cell_type": "markdown", + "id": "7a09b00d", + "metadata": { + "cell_marker": "\"\"\"" + }, + "source": [ + "## πŸŽ‰ Congratulations!\n", + "\n", + "You've learned the TinyTorch development workflow:\n", + "\n", + "1. βœ… Write code in notebooks with `#| export`\n", + "2. βœ… Export with `tito sync --module setup` \n", + "3. βœ… Test with `tito test --module setup`\n", + "4. βœ… Check progress with `tito info`\n", + "\n", + "**This is the rhythm you'll use for every module in TinyTorch.**\n", + "\n", + "### Next Steps\n", + "\n", + "Ready for the real work? Head to **Module 1: Tensor** where you'll build the core data structures that power everything else in TinyTorch.\n", + "\n", + "**Development Tips:**\n", + "- Always test your code in the notebook first\n", + "- Export frequently to catch issues early \n", + "- Read error messages carefully - they're designed to help\n", + "- When stuck, check if your code exports cleanly first\n", + "\n", + "Happy building! πŸ”₯" + ] + } + ], + "metadata": { + "jupytext": { + "main_language": "python" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.3" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/_proc/setup/tests/test_setup b/_proc/setup/tests/test_setup new file mode 100644 index 00000000..00e91a40 --- /dev/null +++ b/_proc/setup/tests/test_setup @@ -0,0 +1,251 @@ +#!/usr/bin/env python3 +""" +Tests for the setup module using pytest. +""" + +import pytest +import sys +import os +from pathlib import Path +from io import StringIO + +# Add the parent directory to the path so we can import setup_dev +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from setup_dev import hello_tinytorch, add_numbers, SystemInfo, DeveloperProfile + + +class TestSetupFunctions: + """Test setup module functions.""" + + def test_hello_tinytorch_executes(self): + """Test that hello_tinytorch runs without error.""" + # Should not raise any exceptions + hello_tinytorch() + + def test_hello_tinytorch_prints_content(self, capsys): + """Test that hello_tinytorch prints the expected content.""" + hello_tinytorch() + captured = capsys.readouterr() + + # Should print the branding text + assert "TinyπŸ”₯Torch" in captured.out + assert "Build ML Systems from Scratch!" in captured.out + + def test_add_numbers_basic(self): + """Test basic addition functionality.""" + assert add_numbers(2, 3) == 5 + assert add_numbers(10, 15) == 25 + assert add_numbers(0, 0) == 0 + + def test_add_numbers_negative(self): + """Test addition with negative numbers.""" + assert add_numbers(-5, 3) == -2 + assert add_numbers(-10, -15) == -25 + assert add_numbers(10, -5) == 5 + + def test_add_numbers_floats(self): + """Test addition with floating point numbers.""" + assert abs(add_numbers(2.5, 3.7) - 6.2) < 1e-9 + assert abs(add_numbers(1.1, 2.2) - 3.3) < 1e-9 + + +class TestSystemInfo: + """Test SystemInfo class.""" + + def test_system_info_creation(self): + """Test SystemInfo class instantiation.""" + info = SystemInfo() + assert hasattr(info, 'python_version') + assert hasattr(info, 'platform') + assert hasattr(info, 'machine') + + def test_system_info_properties(self): + """Test SystemInfo properties.""" + info = SystemInfo() + + # Check python_version is a version tuple + assert hasattr(info.python_version, 'major') + assert hasattr(info.python_version, 'minor') + assert isinstance(info.python_version.major, int) + assert isinstance(info.python_version.minor, int) + + # Check platform is a string + assert isinstance(info.platform, str) + assert len(info.platform) > 0 + + # Check machine is a string + assert isinstance(info.machine, str) + assert len(info.machine) > 0 + + def test_system_info_str(self): + """Test SystemInfo string representation.""" + info = SystemInfo() + str_repr = str(info) + + assert isinstance(str_repr, str) + assert "Python" in str_repr + assert str(info.python_version.major) in str_repr + assert str(info.python_version.minor) in str_repr + assert info.platform in str_repr + assert info.machine in str_repr + + def test_is_compatible(self): + """Test SystemInfo compatibility check.""" + info = SystemInfo() + compatible = info.is_compatible() + + # Should return a boolean + assert isinstance(compatible, bool) + + # Since we're running this test, Python should be >= 3.8 + assert compatible is True + + +class TestDeveloperProfile: + """Test DeveloperProfile class.""" + + def test_developer_profile_creation_defaults(self): + """Test DeveloperProfile with default values.""" + profile = DeveloperProfile() + + # Check default values + assert profile.name == "Vijay Janapa Reddi" + assert profile.affiliation == "Harvard University" + assert profile.email == "vj@eecs.harvard.edu" + assert profile.github_username == "profvjreddi" + assert profile.ascii_art is not None # Should have default flame + + def test_developer_profile_creation_custom(self): + """Test DeveloperProfile with custom values.""" + custom_art = """ + Custom ASCII Art + **************** + """ + profile = DeveloperProfile( + name="Test Student", + affiliation="Test University", + email="test@example.com", + github_username="teststudent", + ascii_art=custom_art + ) + + assert profile.name == "Test Student" + assert profile.affiliation == "Test University" + assert profile.email == "test@example.com" + assert profile.github_username == "teststudent" + assert profile.ascii_art == custom_art + + def test_developer_profile_str(self): + """Test DeveloperProfile string representation.""" + profile = DeveloperProfile() + str_repr = str(profile) + + assert isinstance(str_repr, str) + assert "πŸ‘¨β€πŸ’»" in str_repr + assert "Vijay Janapa Reddi" in str_repr + assert "Harvard University" in str_repr + assert "@profvjreddi" in str_repr + + def test_developer_profile_signature(self): + """Test DeveloperProfile signature method.""" + profile = DeveloperProfile() + signature = profile.get_signature() + + assert isinstance(signature, str) + assert "Built by" in signature + assert "Vijay Janapa Reddi" in signature + assert "@profvjreddi" in signature + + def test_developer_profile_ascii_art(self): + """Test DeveloperProfile ASCII art functionality.""" + # Test default ASCII art + profile = DeveloperProfile() + ascii_art = profile.get_ascii_art() + + assert isinstance(ascii_art, str) + assert "TinyπŸ”₯Torch" in ascii_art + assert "Build ML Systems from Scratch!" in ascii_art + assert len(ascii_art) > 100 # Should be substantial ASCII art + + # Test custom ASCII art + custom_art = "My Custom Art!" + custom_profile = DeveloperProfile(ascii_art=custom_art) + assert custom_profile.get_ascii_art() == custom_art + + def test_developer_profile_full_profile(self): + """Test DeveloperProfile full profile display.""" + profile = DeveloperProfile() + full_profile = profile.get_full_profile() + + assert isinstance(full_profile, str) + assert "TinyπŸ”₯Torch" in full_profile + assert "Build ML Systems from Scratch!" in full_profile + assert "πŸ‘¨β€πŸ’» Developer: Vijay Janapa Reddi" in full_profile + assert "πŸ›οΈ Affiliation: Harvard University" in full_profile + assert "πŸ“§ Email: vj@eecs.harvard.edu" in full_profile + assert "πŸ™ GitHub: @profvjreddi" in full_profile + assert "πŸ”₯ Ready to build ML systems from scratch!" in full_profile + + +class TestFileOperations: + """Test file-related operations.""" + + def test_ascii_art_file_exists(self): + """Test that the ASCII art file exists.""" + art_file = Path(__file__).parent.parent / "tinytorch_flame.txt" + assert art_file.exists(), "ASCII art file should exist" + assert art_file.is_file(), "ASCII art should be a file" + + def test_ascii_art_file_has_content(self): + """Test that the ASCII art file has content.""" + art_file = Path(__file__).parent.parent / "tinytorch_flame.txt" + content = art_file.read_text() + + assert len(content) > 0, "ASCII art file should not be empty" + assert len(content.splitlines()) > 10, "ASCII art should have multiple lines" + + def test_hello_tinytorch_handles_missing_file(self, monkeypatch, capsys): + """Test that hello_tinytorch handles missing ASCII art file gracefully.""" + # Mock Path.exists to return False + def mock_exists(self): + return False + + monkeypatch.setattr(Path, "exists", mock_exists) + + # Should still work without the file + hello_tinytorch() + captured = capsys.readouterr() + + # Should still print the branding text + assert "πŸ”₯ TinyTorch πŸ”₯" in captured.out + assert "Build ML Systems from Scratch!" in captured.out + + +class TestModuleIntegration: + """Test integration between different parts of the setup module.""" + + def test_all_functions_work_together(self): + """Test that all setup functions work without conflicts.""" + # Test functions + hello_tinytorch() # Should not raise + sum_result = add_numbers(5, 10) + + # Test classes + info = SystemInfo() + profile = DeveloperProfile() + + # All should work without errors + assert sum_result == 15 + assert str(info) # Should not be empty + assert str(profile) # Should not be empty + assert profile.get_signature() # Should not be empty + assert profile.get_ascii_art() # Should not be empty + + def test_no_import_errors(self): + """Test that imports work correctly.""" + # If we got here, imports worked + assert callable(hello_tinytorch) + assert callable(add_numbers) + assert callable(SystemInfo) + assert callable(DeveloperProfile) \ No newline at end of file diff --git a/_proc/setup/tinytorch_flame.txt b/_proc/setup/tinytorch_flame.txt new file mode 100644 index 00000000..e200993d --- /dev/null +++ b/_proc/setup/tinytorch_flame.txt @@ -0,0 +1,25 @@ +. . ... ....... .... ... . . .. .... . .. . . . . . .... +. . .. .++. .. . . . .. ... . . . .. ... .. + . . . .=++=.. . . . .. . . .. . ... .. . . +. .. ... .++++=. . . . . .. . .. . +. . . . ....-+++++.... ... .. . .... .. . . . . . . . . . . . . + . .. ...-++++++-...... .. . ..... ..-:.. .. . .... .. . . .. . .. . . . + .. .. ..++++++++-.. . . ..##... -%#. . . . . . +. .. .:+++++++++.... ... . ...:%%:............:-:. ..... ...... . . ....... .. . . + ..+++++++++++. ... . .. .=#%%##+.-##..#%####%%=.=%%. .*%+.. . . . ... + . ..++++++++++++...-++..... . .%%... -##..##=...=%#..*%*..=%#.. . .. ... . . . . .. . ... + ..-+++++++++++++..=++++... .....%#.. -##..#%-.. -##. .%%=.%%.. . . . . . ... . +. .=++++++++++++++-+++++++.... . ...%%:...-##..#%-. .-%#. ..#%#%=.. . .. ... . . . . +..=+++++++++++++++++++++++-. . ..=%%%+.-%#..##-. .-%#....-%%*.. . .. . .. .. .. +.:+++++++++++=+++++++++++++. . ................ .......-%%... . .. . . .. . +.++++++++++===+++++++++++++: . .................... . ...%%%#:........ . .. ..... ......... .... +:+++++++++====+++++++++++++=.. ...-----------.....-+#*=:.....-------:.......:=*#+-.. ..--:.....--=. +:++++++++======++++++++++++=.. ...#%%%%%%%%%#..-#%%###%%#=...#%####%%%=...+%%%###%%#...#%+.. ..#%%. +.+++++++========+++++++++++- .. .#%%.. ..-%%+.. ..-%%+..#%*.. .*%%..*%%:. ..#%*..#%+... .#%%. +.=++++++==========+++++++++: . .#%%.....#%#.... .*%#..#%*...-%%*..#%+. ... . ..##%#####%%%. +..++++++===========+++++++-. . ...#%%. . .#%#. . .*%#..#%%%%%%#-. .#%+. . ....#%*-----#%%. +...+++++===========++++++=. . . . .#%%... -%%+.....=%%+..#%*..+%%-. .*%%-.....#%*..%%+.. ..%%%. +. ..-+++===========+++++.. . .. ..#%%. .:%%%###%%%=...#%*...+%%=...+%%####%%#...%%+.. ..%%%. + . ...-++==========+++:.... ... . .===. ... ..-+++=.. ..-=-....-==: ..:=+++-.. ..==-... .===. + ....-+=======+-...... .. . . ... . . .. ... . . .... . . . . ..... . ... ..... . + .... . ......:..... ... . .. . ... . . ... . . . ... . . . ... .. ..... . . diff --git a/_proc/sidebar.yml b/_proc/sidebar.yml new file mode 100644 index 00000000..ffbd6897 --- /dev/null +++ b/_proc/sidebar.yml @@ -0,0 +1,9 @@ +website: + sidebar: + contents: + - section: layers + contents: + - layers/layers_dev.ipynb + - section: setup + contents: + - setup/setup_dev.ipynb diff --git a/_proc/tensor/README.md b/_proc/tensor/README.md new file mode 100644 index 00000000..dd424495 --- /dev/null +++ b/_proc/tensor/README.md @@ -0,0 +1,202 @@ +# πŸ”₯ Module: Tensor + +Build the foundation of TinyTorch! This module implements the core Tensor class - the fundamental data structure that powers all neural networks and machine learning operations. + +## 🎯 Learning Objectives + +By the end of this module, you will: +- βœ… Understand what tensors are and why they're essential for ML +- βœ… Implement a complete Tensor class with core operations +- βœ… Handle tensor shapes, data types, and memory management +- βœ… Implement element-wise operations and reductions +- βœ… Have a solid foundation for building neural networks + +## πŸ“‹ Module Structure + +``` +modules/tensor/ +β”œβ”€β”€ README.md # πŸ“– This file - Module overview +β”œβ”€β”€ tensor_dev.ipynb # πŸ““ Main development notebook +β”œβ”€β”€ test_tensor.py # πŸ§ͺ Automated tests +└── check_tensor.py # βœ… Manual verification (coming soon) +``` + +## πŸš€ Getting Started + +### Step 1: Complete Prerequisites +Make sure you've completed the setup module: +```bash +python bin/tito.py test --module setup # Should pass +``` + +### Step 2: Open the Tensor Notebook +```bash +# Start from the tensor module directory +cd modules/tensor/ + +# Open the development notebook +jupyter lab tensor_dev.ipynb +``` + +### Step 3: Work Through the Implementation +The notebook guides you through building: +1. **Basic Tensor class** - Constructor and properties +2. **Shape management** - Understanding tensor dimensions +3. **Arithmetic operations** - Addition, multiplication, etc. +4. **Utility methods** - Reshape, transpose, sum, mean +5. **Error handling** - Robust edge case management + +### Step 4: Export and Test +```bash +# Export your tensor implementation +python bin/tito.py sync + +# Test your implementation +python bin/tito.py test --module tensor +``` + +## πŸ“š What You'll Implement + +### Core Tensor Class +You'll build a complete `Tensor` class that supports: + +#### 1. Construction and Properties +```python +# Creating tensors +a = Tensor([1, 2, 3]) # 1D tensor +b = Tensor([[1, 2], [3, 4]]) # 2D tensor +c = Tensor(5.0) # Scalar tensor + +# Properties +print(a.shape) # (3,) +print(b.size) # 4 +print(c.dtype) # float32 +``` + +#### 2. Arithmetic Operations +```python +# Element-wise operations +result = a + b # Addition +result = a * 2 # Scalar multiplication +result = a @ b # Matrix multiplication (bonus) +``` + +#### 3. Utility Methods +```python +# Shape manipulation +reshaped = b.reshape(1, 4) # Change shape +transposed = b.transpose() # Swap dimensions + +# Reductions +total = a.sum() # Sum all elements +mean_val = a.mean() # Average value +max_val = a.max() # Maximum value +``` + +### Technical Requirements +Your Tensor class must: +- Handle multiple data types (int, float) +- Support N-dimensional arrays +- Implement proper error checking +- Work with NumPy arrays internally +- Export to `tinytorch.core.tensor` + +## πŸ§ͺ Testing Your Implementation + +### Automated Tests +```bash +python bin/tito.py test --module tensor +``` + +Tests verify: +- βœ… Tensor creation (scalars, vectors, matrices) +- βœ… Property access (shape, size, dtype) +- βœ… Arithmetic operations (all combinations) +- βœ… Utility methods (reshape, transpose, reductions) +- βœ… Error handling (invalid operations) + +### Interactive Testing +```python +# Test in the notebook or Python REPL +from tinytorch.core.tensor import Tensor + +# Create and test tensors +a = Tensor([1, 2, 3]) +b = Tensor([[1, 2], [3, 4]]) +print(a + 5) # Should work +print(a.sum()) # Should return scalar +``` + +## 🎯 Success Criteria + +Your tensor module is complete when: + +1. **All tests pass**: `python bin/tito.py test --module tensor` +2. **Tensor imports correctly**: `from tinytorch.core.tensor import Tensor` +3. **Basic operations work**: Can create tensors and do arithmetic +4. **Properties work**: Shape, size, dtype return correct values +5. **Utilities work**: Reshape, transpose, reductions function properly + +## πŸ’‘ Implementation Tips + +### Start with the Basics +1. **Simple constructor** - Handle lists and NumPy arrays +2. **Basic properties** - Shape, size, dtype +3. **One operation** - Start with addition +4. **Test frequently** - Verify each feature works + +### Design Patterns +```python +class Tensor: + def __init__(self, data, dtype=None): + # Convert input to numpy array + # Store shape, size, dtype + + def __add__(self, other): + # Handle tensor + tensor + # Handle tensor + scalar + # Return new Tensor + + def sum(self, axis=None): + # Reduce along specified axis + # Return scalar or tensor +``` + +### Common Challenges +- **Shape compatibility** - Check dimensions for operations +- **Data type handling** - Convert inputs consistently +- **Memory efficiency** - Don't create unnecessary copies +- **Error messages** - Provide helpful debugging info + +## πŸ”§ Advanced Features (Optional) + +If you finish early, try implementing: +- **Broadcasting** - Operations on different-shaped tensors +- **Slicing** - `tensor[1:3, :]` syntax +- **In-place operations** - `tensor += other` +- **Matrix multiplication** - `tensor @ other` + +## πŸš€ Next Steps + +Once you complete the tensor module: + +1. **Move to Autograd**: `cd modules/autograd/` +2. **Build automatic differentiation**: Enable gradient computation +3. **Combine with tensors**: Make tensors differentiable +4. **Prepare for neural networks**: Ready for the MLP module + +## πŸ”— Why Tensors Matter + +Tensors are the foundation of all ML systems: +- **Neural networks** store weights and activations as tensors +- **Training** computes gradients on tensors +- **Data processing** represents batches as tensors +- **GPU acceleration** operates on tensor primitives + +Your tensor implementation will power everything else in TinyTorch! + +## πŸŽ‰ Ready to Build? + +The tensor module is where TinyTorch really begins. You're about to create the fundamental building block that will power neural networks, training loops, and production ML systems. + +Take your time, test thoroughly, and enjoy building something that really works! πŸ”₯ \ No newline at end of file diff --git a/_proc/tensor/tensor_dev b/_proc/tensor/tensor_dev new file mode 100644 index 00000000..68494408 --- /dev/null +++ b/_proc/tensor/tensor_dev @@ -0,0 +1,390 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.17.1 +# --- + +# %% [markdown] +""" +# Module 1: Tensor - Core Data Structure + +Welcome to the Tensor module! This is where TinyTorch really begins. You'll implement the fundamental data structure that powers all ML systems. + +## Learning Goals +- Understand tensors as N-dimensional arrays with ML-specific operations +- Implement a complete Tensor class with arithmetic operations +- Handle shape management, data types, and memory layout +- Build the foundation for neural networks and automatic differentiation + +## Module β†’ Package Structure +**πŸŽ“ Teaching vs. πŸ”§ Building**: +- **Learning side**: Work in `modules/tensor/tensor_dev.py` +- **Building side**: Exports to `tinytorch/core/tensor.py` + +This module builds the core data structure that all other TinyTorch components will use. +""" + +# %% +#| default_exp core.tensor + +# Setup and imports +import numpy as np +import sys +from typing import Union, List, Tuple, Optional, Any + +print("πŸ”₯ TinyTorch Tensor Module") +print(f"NumPy version: {np.__version__}") +print(f"Python version: {sys.version_info.major}.{sys.version_info.minor}") +print("Ready to build tensors!") + +# %% [markdown] +""" +## Step 1: What is a Tensor? + +A **tensor** is an N-dimensional array with ML-specific operations. Think of it as: +- **Scalar** (0D): A single number - `5.0` +- **Vector** (1D): A list of numbers - `[1, 2, 3]` +- **Matrix** (2D): A 2D array - `[[1, 2], [3, 4]]` +- **Higher dimensions**: 3D, 4D, etc. for images, video, batches + +**Why not just use NumPy?** We will use NumPy internally, but our Tensor class will add: +- ML-specific operations (later: gradients, GPU support) +- Consistent API for neural networks +- Type safety and error checking +- Integration with the rest of TinyTorch + +Let's start building! +""" + +# %% +#| export +class Tensor: + """ + TinyTorch Tensor: N-dimensional array with ML operations. + + The fundamental data structure for all TinyTorch operations. + Wraps NumPy arrays with ML-specific functionality. + + TODO: Implement the core Tensor class with data handling and properties. + """ + + def __init__(self, data: Union[int, float, List, np.ndarray], dtype: Optional[str] = None): + """ + Create a new tensor from data. + + Args: + data: Input data (scalar, list, or numpy array) + dtype: Data type ('float32', 'int32', etc.). Defaults to auto-detect. + + TODO: Implement tensor creation with proper type handling. + """ + raise NotImplementedError("Student implementation required") + + @property + def data(self) -> np.ndarray: + """Access underlying numpy array.""" + raise NotImplementedError("Student implementation required") + + @property + def shape(self) -> Tuple[int, ...]: + """Get tensor shape.""" + raise NotImplementedError("Student implementation required") + + @property + def size(self) -> int: + """Get total number of elements.""" + raise NotImplementedError("Student implementation required") + + @property + def dtype(self) -> np.dtype: + """Get data type as numpy dtype.""" + raise NotImplementedError("Student implementation required") + + def __repr__(self) -> str: + """String representation.""" + raise NotImplementedError("Student implementation required") + +# %% +#| hide +#| export +class Tensor: + """ + TinyTorch Tensor: N-dimensional array with ML operations. + + The fundamental data structure for all TinyTorch operations. + Wraps NumPy arrays with ML-specific functionality. + """ + + def __init__(self, data: Union[int, float, List, np.ndarray], dtype: Optional[str] = None): + """ + Create a new tensor from data. + + Args: + data: Input data (scalar, list, or numpy array) + dtype: Data type ('float32', 'int32', etc.). Defaults to auto-detect. + """ + # Convert input to numpy array + if isinstance(data, (int, float, np.number)): + # Handle Python and NumPy scalars + if dtype is None: + # Auto-detect type: int for integers, float32 for floats + if isinstance(data, int) or (isinstance(data, np.number) and np.issubdtype(type(data), np.integer)): + dtype = 'int32' + else: + dtype = 'float32' + self._data = np.array(data, dtype=dtype) + elif isinstance(data, list): + # Let NumPy auto-detect type, then convert if needed + temp_array = np.array(data) + if dtype is None: + # Keep NumPy's auto-detected type, but prefer common ML types + if np.issubdtype(temp_array.dtype, np.integer): + dtype = 'int32' + elif np.issubdtype(temp_array.dtype, np.floating): + dtype = 'float32' + else: + dtype = temp_array.dtype + self._data = temp_array.astype(dtype) + elif isinstance(data, np.ndarray): + self._data = data.astype(dtype or data.dtype) + else: + raise TypeError(f"Cannot create tensor from {type(data)}") + + @property + def data(self) -> np.ndarray: + """Access underlying numpy array.""" + return self._data + + @property + def shape(self) -> Tuple[int, ...]: + """Get tensor shape.""" + return self._data.shape + + @property + def size(self) -> int: + """Get total number of elements.""" + return self._data.size + + @property + def dtype(self) -> np.dtype: + """Get data type as numpy dtype.""" + return self._data.dtype + + def __repr__(self) -> str: + """String representation.""" + return f"Tensor({self._data.tolist()}, shape={self.shape}, dtype={self.dtype})" + +# %% [markdown] +""" +### πŸ§ͺ Test Your Tensor Class + +Once you implement the Tensor class above, run this cell to test it: +""" + +# %% +# Test the basic Tensor class +try: + print("=== Testing Tensor Creation ===") + + # Scalar tensor + scalar = Tensor(5.0) + print(f"Scalar: {scalar}") + + # Vector tensor + vector = Tensor([1, 2, 3]) + print(f"Vector: {vector}") + + # Matrix tensor + matrix = Tensor([[1, 2], [3, 4]]) + print(f"Matrix: {matrix}") + + print(f"\nProperties:") + print(f"Matrix shape: {matrix.shape}") + print(f"Matrix size: {matrix.size}") + print(f"Matrix dtype: {matrix.dtype}") + +except NotImplementedError as e: + print(f"⚠️ {e}") + print("Implement the Tensor class above first!") + +# %% [markdown] +""" +## Step 2: Arithmetic Operations + +Now let's add the core arithmetic operations. These are essential for neural networks: +- **Addition**: `tensor + other` +- **Subtraction**: `tensor - other` +- **Multiplication**: `tensor * other` +- **Division**: `tensor / other` + +Each operation should handle both **tensor + tensor** and **tensor + scalar** cases. +""" + +# %% +#| export +def _add_arithmetic_methods(): + """ + Add arithmetic operations to Tensor class. + + TODO: Implement arithmetic methods (__add__, __sub__, __mul__, __truediv__) + and their reverse operations (__radd__, __rsub__, etc.) + """ + + def __add__(self, other: Union['Tensor', int, float]) -> 'Tensor': + """Addition: tensor + other""" + raise NotImplementedError("Student implementation required") + + def __sub__(self, other: Union['Tensor', int, float]) -> 'Tensor': + """Subtraction: tensor - other""" + raise NotImplementedError("Student implementation required") + + def __mul__(self, other: Union['Tensor', int, float]) -> 'Tensor': + """Multiplication: tensor * other""" + raise NotImplementedError("Student implementation required") + + def __truediv__(self, other: Union['Tensor', int, float]) -> 'Tensor': + """Division: tensor / other""" + raise NotImplementedError("Student implementation required") + + # Add methods to Tensor class + Tensor.__add__ = __add__ + Tensor.__sub__ = __sub__ + Tensor.__mul__ = __mul__ + Tensor.__truediv__ = __truediv__ + +# %% +#| hide +#| export +def _add_arithmetic_methods(): + """Add arithmetic operations to Tensor class.""" + + def __add__(self, other: Union['Tensor', int, float]) -> 'Tensor': + """Addition: tensor + other""" + if isinstance(other, Tensor): + return Tensor(self._data + other._data) + else: # scalar + return Tensor(self._data + other) + + def __sub__(self, other: Union['Tensor', int, float]) -> 'Tensor': + """Subtraction: tensor - other""" + if isinstance(other, Tensor): + return Tensor(self._data - other._data) + else: # scalar + return Tensor(self._data - other) + + def __mul__(self, other: Union['Tensor', int, float]) -> 'Tensor': + """Multiplication: tensor * other""" + if isinstance(other, Tensor): + return Tensor(self._data * other._data) + else: # scalar + return Tensor(self._data * other) + + def __truediv__(self, other: Union['Tensor', int, float]) -> 'Tensor': + """Division: tensor / other""" + if isinstance(other, Tensor): + return Tensor(self._data / other._data) + else: # scalar + return Tensor(self._data / other) + + def __radd__(self, other: Union[int, float]) -> 'Tensor': + """Reverse addition: scalar + tensor""" + return Tensor(other + self._data) + + def __rsub__(self, other: Union[int, float]) -> 'Tensor': + """Reverse subtraction: scalar - tensor""" + return Tensor(other - self._data) + + def __rmul__(self, other: Union[int, float]) -> 'Tensor': + """Reverse multiplication: scalar * tensor""" + return Tensor(other * self._data) + + def __rtruediv__(self, other: Union[int, float]) -> 'Tensor': + """Reverse division: scalar / tensor""" + return Tensor(other / self._data) + + # Add methods to Tensor class + Tensor.__add__ = __add__ + Tensor.__sub__ = __sub__ + Tensor.__mul__ = __mul__ + Tensor.__truediv__ = __truediv__ + Tensor.__radd__ = __radd__ + Tensor.__rsub__ = __rsub__ + Tensor.__rmul__ = __rmul__ + Tensor.__rtruediv__ = __rtruediv__ + +# Call the function to add arithmetic methods +_add_arithmetic_methods() + +# %% [markdown] +""" +### πŸ§ͺ Test Your Arithmetic Operations + +Once you implement the arithmetic methods above, run this cell to test them: +""" + +# %% +# Test arithmetic operations +try: + print("=== Testing Arithmetic Operations ===") + + a = Tensor([1, 2, 3]) + b = Tensor([4, 5, 6]) + + print(f"a = {a}") + print(f"b = {b}") + print() + + # Tensor + Tensor + print(f"a + b = {a + b}") + print(f"a - b = {a - b}") + print(f"a * b = {a * b}") + print(f"a / b = {a / b}") + print() + + # Tensor + Scalar + print(f"a + 10 = {a + 10}") + print(f"a * 2 = {a * 2}") + print() + + # Scalar + Tensor (reverse operations) + print(f"10 + a = {10 + a}") + print(f"2 * a = {2 * a}") + +except (NotImplementedError, AttributeError) as e: + print(f"⚠️ {e}") + print("Implement the arithmetic methods above first!") + +# %% [markdown] +""" +## Step 3: Try the Export Process + +Now let's export our tensor code! In your terminal, run: + +```bash +python bin/tito.py sync --module tensor +``` + +This will export the code marked with `#| export` to `tinytorch/core/tensor.py`. + +Then test it with: + +```bash +python bin/tito.py test --module tensor +``` + +## Next Steps + +πŸŽ‰ **Congratulations!** You've built the foundation of TinyTorch - the Tensor class. + +In the next modules, you'll add: +- **Automatic differentiation** (gradients) +- **Neural network layers** +- **Optimizers and training loops** +- **GPU acceleration** + +Each builds on this tensor foundation! +""" \ No newline at end of file diff --git a/_proc/tensor/tests/test_tensor b/_proc/tensor/tests/test_tensor new file mode 100644 index 00000000..7247af12 --- /dev/null +++ b/_proc/tensor/tests/test_tensor @@ -0,0 +1,346 @@ +""" +Tests for TinyTorch Tensor module. + +Tests the core tensor functionality including creation, arithmetic operations, +utility methods, and edge cases. + +These tests work with the current implementation and provide stretch goals +for students to implement additional methods. +""" + +import sys +import os +import pytest +import numpy as np + +# Add the parent directory to path to import tensor_dev +sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) + +# Import from the module's development file +# Note: This imports the instructor version with full implementation +from tensor_dev import Tensor + +def safe_numpy(tensor): + """Get numpy array from tensor, using .numpy() if available, otherwise .data""" + if hasattr(tensor, 'numpy'): + return tensor.numpy() + else: + return tensor.data + +def safe_item(tensor): + """Get scalar value from tensor, using .item() if available, otherwise .data""" + if hasattr(tensor, 'item'): + return tensor.item() + else: + return float(tensor.data) + +class TestTensorCreation: + """Test tensor creation from different data types.""" + + def test_scalar_creation(self): + """Test creating tensors from scalars.""" + # Float scalar + t1 = Tensor(5.0) + assert t1.shape == () + assert t1.size == 1 + assert safe_item(t1) == 5.0 + + # Integer scalar + t2 = Tensor(42) + assert t2.shape == () + assert t2.size == 1 + assert safe_item(t2) == 42.0 # Should convert to float32 + + def test_vector_creation(self): + """Test creating 1D tensors.""" + t = Tensor([1, 2, 3, 4]) + assert t.shape == (4,) + assert t.size == 4 + assert t.dtype == np.int32 # Integer list defaults to int32 + np.testing.assert_array_equal(safe_numpy(t), [1, 2, 3, 4]) + + def test_matrix_creation(self): + """Test creating 2D tensors.""" + t = Tensor([[1, 2], [3, 4]]) + assert t.shape == (2, 2) + assert t.size == 4 + expected = np.array([[1.0, 2.0], [3.0, 4.0]], dtype='float32') + np.testing.assert_array_equal(safe_numpy(t), expected) + + def test_numpy_array_creation(self): + """Test creating tensors from numpy arrays.""" + arr = np.array([1, 2, 3], dtype='int32') + t = Tensor(arr) + assert t.shape == (3,) + assert t.dtype in ['int32', 'float32'] # May convert + + def test_dtype_specification(self): + """Test explicit dtype specification.""" + t = Tensor([1, 2, 3], dtype='int32') + assert t.dtype == np.int32 + + def test_invalid_data_type(self): + """Test error handling for invalid data types.""" + with pytest.raises(TypeError): + Tensor("invalid") + with pytest.raises(TypeError): + Tensor({"dict": "invalid"}) + +class TestTensorProperties: + """Test tensor properties and methods.""" + + def test_shape_property(self): + """Test shape property for different dimensions.""" + assert Tensor(5).shape == () + assert Tensor([1, 2, 3]).shape == (3,) + assert Tensor([[1, 2], [3, 4]]).shape == (2, 2) + assert Tensor([[[1]]]).shape == (1, 1, 1) + + def test_size_property(self): + """Test size property.""" + assert Tensor(5).size == 1 + assert Tensor([1, 2, 3]).size == 3 + assert Tensor([[1, 2], [3, 4]]).size == 4 + assert Tensor([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]).size == 8 + + def test_dtype_property(self): + """Test dtype property.""" + t1 = Tensor(5.0) + assert t1.dtype == np.float32 + + t2 = Tensor([1, 2, 3], dtype='int32') + assert t2.dtype == np.int32 + + def test_repr(self): + """Test string representation.""" + t = Tensor([1, 2, 3]) + repr_str = repr(t) + assert 'Tensor' in repr_str + assert 'shape=' in repr_str + assert 'dtype=' in repr_str + +class TestArithmeticOperations: + """Test tensor arithmetic operations.""" + + def test_tensor_addition(self): + """Test tensor + tensor addition.""" + a = Tensor([1, 2, 3]) + b = Tensor([4, 5, 6]) + result = a + b + expected = [5.0, 7.0, 9.0] + np.testing.assert_array_equal(safe_numpy(result), expected) + + def test_scalar_addition(self): + """Test tensor + scalar addition.""" + a = Tensor([1, 2, 3]) + result = a + 10 + expected = [11.0, 12.0, 13.0] + np.testing.assert_array_equal(safe_numpy(result), expected) + + def test_reverse_addition(self): + """Test scalar + tensor addition.""" + a = Tensor([1, 2, 3]) + result = 10 + a + expected = [11.0, 12.0, 13.0] + np.testing.assert_array_equal(safe_numpy(result), expected) + + def test_tensor_subtraction(self): + """Test tensor - tensor subtraction.""" + a = Tensor([5, 7, 9]) + b = Tensor([1, 2, 3]) + result = a - b + expected = [4.0, 5.0, 6.0] + np.testing.assert_array_equal(safe_numpy(result), expected) + + def test_scalar_subtraction(self): + """Test tensor - scalar subtraction.""" + a = Tensor([10, 20, 30]) + result = a - 5 + expected = [5.0, 15.0, 25.0] + np.testing.assert_array_equal(safe_numpy(result), expected) + + def test_tensor_multiplication(self): + """Test tensor * tensor multiplication.""" + a = Tensor([2, 3, 4]) + b = Tensor([5, 6, 7]) + result = a * b + expected = [10.0, 18.0, 28.0] + np.testing.assert_array_equal(safe_numpy(result), expected) + + def test_scalar_multiplication(self): + """Test tensor * scalar multiplication.""" + a = Tensor([1, 2, 3]) + result = a * 3 + expected = [3.0, 6.0, 9.0] + np.testing.assert_array_equal(safe_numpy(result), expected) + + def test_reverse_multiplication(self): + """Test scalar * tensor multiplication.""" + a = Tensor([1, 2, 3]) + result = 3 * a + expected = [3.0, 6.0, 9.0] + np.testing.assert_array_equal(safe_numpy(result), expected) + + def test_tensor_division(self): + """Test tensor / tensor division.""" + a = Tensor([6, 8, 10]) + b = Tensor([2, 4, 5]) + result = a / b + expected = [3.0, 2.0, 2.0] + np.testing.assert_array_equal(safe_numpy(result), expected) + + def test_scalar_division(self): + """Test tensor / scalar division.""" + a = Tensor([6, 8, 10]) + result = a / 2 + expected = [3.0, 4.0, 5.0] + np.testing.assert_array_equal(safe_numpy(result), expected) + +class TestUtilityMethods: + """Test tensor utility methods (stretch goals for students).""" + + def test_reshape(self): + """Test tensor reshaping (if implemented).""" + t = Tensor([[1, 2], [3, 4]]) + if hasattr(t, 'reshape'): + reshaped = t.reshape(4) + assert reshaped.shape == (4,) + expected = [1.0, 2.0, 3.0, 4.0] + np.testing.assert_array_equal(safe_numpy(reshaped), expected) + + # Reshape to 2D + reshaped2 = t.reshape(1, 4) + assert reshaped2.shape == (1, 4) + else: + pytest.skip("reshape method not implemented - stretch goal for students") + + def test_transpose(self): + """Test tensor transpose (if implemented).""" + t = Tensor([[1, 2, 3], [4, 5, 6]]) + if hasattr(t, 'transpose'): + transposed = t.transpose() + assert transposed.shape == (3, 2) + expected = [[1.0, 4.0], [2.0, 5.0], [3.0, 6.0]] + np.testing.assert_array_equal(safe_numpy(transposed), expected) + else: + pytest.skip("transpose method not implemented - stretch goal for students") + + def test_sum_all(self): + """Test summing all elements (if implemented).""" + t = Tensor([[1, 2], [3, 4]]) + if hasattr(t, 'sum'): + result = t.sum() + expected = 10.0 + assert abs(safe_item(result) - expected) < 1e-6 + else: + pytest.skip("sum method not implemented - stretch goal for students") + + def test_sum_axis(self): + """Test summing along specific axes (if implemented).""" + t = Tensor([[1, 2], [3, 4]]) + if hasattr(t, 'sum'): + # Sum along axis 0 (columns) + sum0 = t.sum(axis=0) + expected0 = [4.0, 6.0] + np.testing.assert_array_equal(safe_numpy(sum0), expected0) + + # Sum along axis 1 (rows) + sum1 = t.sum(axis=1) + expected1 = [3.0, 7.0] + np.testing.assert_array_equal(safe_numpy(sum1), expected1) + else: + pytest.skip("sum method not implemented - stretch goal for students") + + def test_mean(self): + """Test mean calculation (if implemented).""" + t = Tensor([[1, 2], [3, 4]]) + if hasattr(t, 'mean'): + result = t.mean() + expected = 2.5 + assert abs(safe_item(result) - expected) < 1e-6 + else: + pytest.skip("mean method not implemented - stretch goal for students") + + def test_max(self): + """Test maximum value (if implemented).""" + t = Tensor([[1, 2], [3, 4]]) + if hasattr(t, 'max'): + result = t.max() + expected = 4.0 + assert abs(safe_item(result) - expected) < 1e-6 + else: + pytest.skip("max method not implemented - stretch goal for students") + + def test_min(self): + """Test minimum value (if implemented).""" + t = Tensor([[1, 2], [3, 4]]) + if hasattr(t, 'min'): + result = t.min() + expected = 1.0 + assert abs(safe_item(result) - expected) < 1e-6 + else: + pytest.skip("min method not implemented - stretch goal for students") + + def test_item_scalar(self): + """Test converting single-element tensor to scalar (if implemented).""" + t = Tensor(42.0) + if hasattr(t, 'item'): + assert t.item() == 42.0 + else: + pytest.skip("item method not implemented - stretch goal for students") + + def test_item_error(self): + """Test item() error for multi-element tensors (if implemented).""" + t = Tensor([1, 2, 3]) + if hasattr(t, 'item'): + with pytest.raises(ValueError): + t.item() + else: + pytest.skip("item method not implemented - stretch goal for students") + + def test_numpy_conversion(self): + """Test converting tensor to numpy array (if implemented).""" + t = Tensor([[1, 2], [3, 4]]) + if hasattr(t, 'numpy'): + arr = t.numpy() + assert isinstance(arr, np.ndarray) + expected = [[1.0, 2.0], [3.0, 4.0]] + np.testing.assert_array_equal(arr, expected) + else: + pytest.skip("numpy method not implemented - stretch goal for students") + +class TestEdgeCases: + """Test edge cases and error handling.""" + + def test_empty_list(self): + """Test creating tensor from empty list.""" + t = Tensor([]) + assert t.shape == (0,) + assert t.size == 0 + + def test_mixed_operations(self): + """Test combining different operations.""" + a = Tensor([[1, 2], [3, 4]]) + b = Tensor([[2, 2], [2, 2]]) + + # Complex expression + result = (a + b) * 2 - 1 + expected = [[5.0, 7.0], [9.0, 11.0]] + np.testing.assert_array_equal(safe_numpy(result), expected) + + def test_chained_operations(self): + """Test chaining multiple operations (if methods implemented).""" + t = Tensor([[1, 2, 3], [4, 5, 6]]) + if hasattr(t, 'sum') and hasattr(t, 'mean'): + result = t.sum(axis=1).mean() + expected = 10.5 # (6 + 15) / 2 + assert abs(safe_item(result) - expected) < 1e-6 + else: + pytest.skip("Advanced methods not implemented - stretch goal for students") + +def run_tensor_tests(): + """Run all tensor tests.""" + pytest.main([__file__, "-v"]) + +if __name__ == "__main__": + run_tensor_tests() \ No newline at end of file diff --git a/bin/tito.py b/bin/tito.py index 1c4226fa..b41ed294 100755 --- a/bin/tito.py +++ b/bin/tito.py @@ -816,25 +816,31 @@ def cmd_notebooks(args): # Build all modules console.print(f"πŸ”„ Building notebooks for {len(dev_files)} modules...") - # Convert each file + # Convert each file using the separate tool success_count = 0 error_count = 0 for dev_file in dev_files: try: - # Use the existing py_to_notebook.py tool + # Use the separate py_to_notebook.py tool result = subprocess.run([ sys.executable, "tools/py_to_notebook.py", str(dev_file) ], capture_output=True, text=True) + module_name = dev_file.parent.name + if result.returncode == 0: success_count += 1 - module_name = dev_file.parent.name - console.print(f" βœ… {module_name}: {dev_file.name} β†’ {dev_file.with_suffix('.ipynb').name}") + # Extract success message from the tool output + output_lines = result.stdout.strip().split('\n') + success_msg = output_lines[-1] if output_lines else f"{dev_file.name} β†’ {dev_file.with_suffix('.ipynb').name}" + # Clean up the message to remove the βœ… emoji since we'll add our own + clean_msg = success_msg.replace('βœ… ', '').replace('Converted ', '') + console.print(f" βœ… {module_name}: {clean_msg}") else: error_count += 1 - module_name = dev_file.parent.name - console.print(f" ❌ {module_name}: {result.stderr.strip()}") + error_msg = result.stderr.strip() if result.stderr.strip() else "Conversion failed" + console.print(f" ❌ {module_name}: {error_msg}") except Exception as e: error_count += 1 diff --git a/docs/development/module-development-guide.md b/docs/development/module-development-guide.md index a1fa5678..c1bc41f0 100644 --- a/docs/development/module-development-guide.md +++ b/docs/development/module-development-guide.md @@ -37,7 +37,7 @@ Mark what students should implement vs. what to provide. ### Step 4: Convert and Generate ```bash # Convert Python to notebook -python3 tools/py_to_notebook.py modules/{module}/{module}_dev.py +python bin/tito.py notebooks --module {module} # Generate student version python3 bin/generate_student_notebooks.py --module {module} @@ -203,7 +203,7 @@ Students implement: ### Convert Python to Notebook ```bash -python3 tools/py_to_notebook.py modules/{module}/{module}_dev.py +python bin/tito.py notebooks --module {module} ``` ### Generate Student Version diff --git a/docs/development/quick-module-reference.md b/docs/development/quick-module-reference.md index b49a6ba5..7af87a1a 100644 --- a/docs/development/quick-module-reference.md +++ b/docs/development/quick-module-reference.md @@ -34,7 +34,7 @@ class YourClass: ### 3. Convert and Generate ```bash # Convert Python to notebook -python3 tools/py_to_notebook.py modules/{module}/{module}_dev.py +python bin/tito.py notebooks --module {module} # Generate student version python3 bin/generate_student_notebooks.py --module {module} @@ -105,7 +105,7 @@ mkdir modules/{module} cp modules/example/example_dev.py modules/{module}/{module}_dev.py # Full workflow -python3 tools/py_to_notebook.py modules/{module}/{module}_dev.py +python bin/tito.py notebooks --module {module} python3 bin/generate_student_notebooks.py --module {module} # Test everything diff --git a/modules/_quarto.yml b/modules/_quarto.yml new file mode 100644 index 00000000..59a1e49c --- /dev/null +++ b/modules/_quarto.yml @@ -0,0 +1,22 @@ +project: + type: website + +format: + html: + theme: cosmo + css: styles.css + toc: true + keep-md: true + commonmark: default + +website: + twitter-card: true + open-graph: true + repo-actions: [issue] + navbar: + background: primary + search: true + sidebar: + style: floating + +metadata-files: [nbdev.yml, sidebar.yml] \ No newline at end of file diff --git a/modules/activations/activations_dev.ipynb b/modules/activations/activations_dev.ipynb new file mode 100644 index 00000000..31e80ea3 --- /dev/null +++ b/modules/activations/activations_dev.ipynb @@ -0,0 +1,1383 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "jupyter:\n", + " jupytext:\n", + " text_representation:\n", + " extension: .py\n", + " format_name: percent\n", + " format_version: '1.3'\n", + " jupytext_version: 1.17.1\n", + "---\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "# \ud83d\udd25 TinyTorch Activations Module\n", + "\n", + "Welcome to the **Activations** module! This is where you'll implement the mathematical functions that give neural networks their power.\n", + "\n", + "## \ud83c\udfaf Learning Objectives\n", + "\n", + "By the end of this module, you will:\n", + "1. **Understand** why activation functions are essential for neural networks\n", + "2. **Implement** the three most important activation functions: ReLU, Sigmoid, and Tanh\n", + "3. **Test** your functions with various inputs to understand their behavior\n", + "4. **Use** these functions as building blocks for neural networks\n", + "\n", + "## \ud83e\udde0 Why Activation Functions Matter\n", + "\n", + "**Without activation functions, neural networks are just linear transformations!**\n", + "\n", + "```\n", + "Linear \u2192 Linear \u2192 Linear = Still just Linear\n", + "Linear \u2192 Activation \u2192 Linear = Can learn complex patterns!\n", + "```\n", + "\n", + "**Key insight**: Activation functions add **nonlinearity**, allowing networks to learn complex patterns that linear functions cannot capture.\n", + "\n", + "## \ud83d\udcda What You'll Build\n", + "\n", + "- **ReLU**: `f(x) = max(0, x)` - The workhorse of deep learning\n", + "- **Sigmoid**: `f(x) = 1 / (1 + e^(-x))` - Squashes to (0, 1)\n", + "- **Tanh**: `f(x) = tanh(x)` - Squashes to (-1, 1)\n", + "\n", + "Each function serves different purposes and has different mathematical properties.\n", + "\n", + "---\n", + "\n", + "Let's start building! \ud83d\ude80\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp core.activations\n", + "\n", + "# Standard library imports\n", + "import math\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import os\n", + "import sys\n", + "\n", + "# TinyTorch imports\n", + "from tinytorch.core.tensor import Tensor" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Helper function to detect if we're in a testing environment\n", + "def _should_show_plots():\n", + " \"\"\"\n", + " Determine if we should show plots based on the execution context.\n", + " \n", + " Returns False if:\n", + " - Running in pytest (detected by 'pytest' in sys.modules)\n", + " - Running in test environment (detected by environment variables)\n", + " - Running from command line test runner\n", + " \n", + " Returns True if:\n", + " - Running in Jupyter notebook\n", + " - Running interactively in Python\n", + " \"\"\"\n", + " # Check if we're running in pytest\n", + " if 'pytest' in sys.modules:\n", + " return False\n", + " \n", + " # Check if we're in a test environment\n", + " if os.environ.get('PYTEST_CURRENT_TEST'):\n", + " return False\n", + " \n", + " # Check if we're running from a test file (more specific check)\n", + " if any(arg.endswith('.py') and 'test_' in os.path.basename(arg) and 'tests/' in arg for arg in sys.argv):\n", + " return False\n", + " \n", + " # Check if we're running from the tito CLI test command\n", + " if len(sys.argv) > 0 and 'tito.py' in sys.argv[0] and 'test' in sys.argv:\n", + " return False\n", + " \n", + " # Default to showing plots (notebook/interactive environment)\n", + " return True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "## Step 1: ReLU Activation Function\n", + "\n", + "**ReLU** (Rectified Linear Unit) is the most popular activation function in deep learning.\n", + "\n", + "**Formula**: `f(x) = max(0, x)`\n", + "\n", + "**Properties**:\n", + "- **Simple**: Easy to compute and understand\n", + "- **Sparse**: Outputs exactly zero for negative inputs\n", + "- **Unbounded**: No upper limit on positive outputs\n", + "- **Non-saturating**: Doesn't suffer from vanishing gradients\n", + "\n", + "**When to use**: Almost everywhere! It's the default choice for hidden layers.\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "class ReLU:\n", + " \"\"\"\n", + " ReLU Activation: f(x) = max(0, x)\n", + " \n", + " The most popular activation function in deep learning.\n", + " Simple, effective, and computationally efficient.\n", + " \n", + " TODO: Implement ReLU activation function.\n", + " \"\"\"\n", + " \n", + " def forward(self, x: Tensor) -> Tensor:\n", + " \"\"\"\n", + " Apply ReLU: f(x) = max(0, x)\n", + " \n", + " Args:\n", + " x: Input tensor\n", + " \n", + " Returns:\n", + " Output tensor with ReLU applied element-wise\n", + " \n", + " TODO: Implement element-wise max(0, x) operation\n", + " Hint: Use np.maximum(0, x.data)\n", + " \"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")\n", + " \n", + " def __call__(self, x: Tensor) -> Tensor:\n", + " \"\"\"Make activation callable: relu(x) same as relu.forward(x)\"\"\"\n", + " return self.forward(x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "#| export\n", + "class ReLU:\n", + " \"\"\"ReLU Activation: f(x) = max(0, x)\"\"\"\n", + " \n", + " def forward(self, x: Tensor) -> Tensor:\n", + " \"\"\"Apply ReLU: f(x) = max(0, x)\"\"\"\n", + " return Tensor(np.maximum(0, x.data))\n", + " \n", + " def __call__(self, x: Tensor) -> Tensor:\n", + " return self.forward(x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "### \ud83e\uddea Test Your ReLU Function\n", + "\n", + "Once you implement ReLU above, run this cell to test it:\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test ReLU function\n", + "try:\n", + " print(\"=== Testing ReLU Function ===\")\n", + " \n", + " # Test data: mix of positive, negative, and zero\n", + " x = Tensor([[-3.0, -1.0, 0.0, 1.0, 3.0]])\n", + " print(f\"Input: {x.data}\")\n", + " \n", + " # Test ReLU\n", + " relu = ReLU()\n", + " y = relu(x)\n", + " print(f\"ReLU output: {y.data}\")\n", + " print(f\"Expected: [[0. 0. 0. 1. 3.]]\")\n", + " \n", + " # Test with different shapes\n", + " x_2d = Tensor([[-2.0, 1.0], [0.5, -0.5]])\n", + " y_2d = relu(x_2d)\n", + " print(f\"\\n2D Input: {x_2d.data}\")\n", + " print(f\"2D ReLU output: {y_2d.data}\")\n", + " \n", + " print(\"\u2705 ReLU working!\")\n", + " \n", + "except Exception as e:\n", + " print(f\"\u274c Error: {e}\")\n", + " print(\"Make sure to implement the ReLU function above!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "### \ud83d\udcca Visualize ReLU Function\n", + "\n", + "Let's plot the ReLU function to see how it transforms inputs:\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Plot ReLU function\n", + "try:\n", + " print(\"=== Plotting ReLU Function ===\")\n", + " \n", + " # Create a range of input values\n", + " x_range = np.linspace(-5, 5, 100)\n", + " x_tensor = Tensor([x_range])\n", + " \n", + " # Apply ReLU (student implementation)\n", + " relu = ReLU()\n", + " y_tensor = relu(x_tensor)\n", + " y_range = y_tensor.data[0]\n", + " \n", + " # Create ideal ReLU for comparison\n", + " y_ideal = np.maximum(0, x_range)\n", + " \n", + " # Only show plots if we're not in a testing environment\n", + " if _should_show_plots():\n", + " # Create the plot\n", + " plt.figure(figsize=(12, 8))\n", + " \n", + " # Plot both student implementation and ideal\n", + " plt.subplot(2, 2, 1)\n", + " plt.plot(x_range, y_range, 'b-', linewidth=3, label='Your ReLU Implementation')\n", + " plt.plot(x_range, y_ideal, 'r--', linewidth=2, alpha=0.7, label='Ideal ReLU')\n", + " plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)\n", + " plt.axvline(x=0, color='k', linestyle='-', alpha=0.3)\n", + " plt.xlabel('Input (x)')\n", + " plt.ylabel('Output')\n", + " plt.title('ReLU: Your Implementation vs Ideal')\n", + " plt.grid(True, alpha=0.3)\n", + " plt.legend()\n", + " plt.xlim(-5, 5)\n", + " plt.ylim(-1, 5)\n", + " \n", + " # Mathematical explanation plot\n", + " plt.subplot(2, 2, 2)\n", + " # Show the mathematical definition\n", + " x_math = np.array([-3, -2, -1, 0, 1, 2, 3])\n", + " y_math = np.maximum(0, x_math)\n", + " plt.stem(x_math, y_math, basefmt=' ', linefmt='g-', markerfmt='go')\n", + " plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)\n", + " plt.axvline(x=0, color='k', linestyle='-', alpha=0.3)\n", + " plt.xlabel('Input (x)')\n", + " plt.ylabel('max(0, x)')\n", + " plt.title('Mathematical Definition: max(0, x)')\n", + " plt.grid(True, alpha=0.3)\n", + " plt.xlim(-4, 4)\n", + " plt.ylim(-0.5, 3.5)\n", + " \n", + " # Show the piecewise nature\n", + " plt.subplot(2, 2, 3)\n", + " x_left = np.linspace(-5, 0, 50)\n", + " x_right = np.linspace(0, 5, 50)\n", + " plt.plot(x_left, np.zeros_like(x_left), 'r-', linewidth=3, label='f(x) = 0 for x < 0')\n", + " plt.plot(x_right, x_right, 'b-', linewidth=3, label='f(x) = x for x \u2265 0')\n", + " plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)\n", + " plt.axvline(x=0, color='k', linestyle='-', alpha=0.3)\n", + " plt.xlabel('Input (x)')\n", + " plt.ylabel('Output')\n", + " plt.title('Piecewise Function Definition')\n", + " plt.grid(True, alpha=0.3)\n", + " plt.legend()\n", + " plt.xlim(-5, 5)\n", + " plt.ylim(-1, 5)\n", + " \n", + " # Error analysis\n", + " plt.subplot(2, 2, 4)\n", + " difference = np.abs(y_range - y_ideal)\n", + " max_error = np.max(difference)\n", + " plt.plot(x_range, difference, 'purple', linewidth=2)\n", + " plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)\n", + " plt.xlabel('Input (x)')\n", + " plt.ylabel('|Your Output - Ideal Output|')\n", + " plt.title(f'Implementation Error (Max: {max_error:.6f})')\n", + " plt.grid(True, alpha=0.3)\n", + " plt.xlim(-5, 5)\n", + " \n", + " plt.tight_layout()\n", + " plt.show()\n", + " \n", + " # Print analysis\n", + " print(f\"\\n\ud83d\udcca Analysis:\")\n", + " print(f\"\u2705 Maximum error: {max_error:.10f}\")\n", + " if max_error < 1e-10:\n", + " print(\"\ud83c\udf89 Perfect implementation!\")\n", + " elif max_error < 1e-6:\n", + " print(\"\ud83c\udf1f Excellent implementation!\")\n", + " elif max_error < 1e-3:\n", + " print(\"\ud83d\udc4d Good implementation!\")\n", + " else:\n", + " print(\"\ud83d\udd27 Implementation needs work.\")\n", + " \n", + " print(f\"\ud83d\udcc8 Function properties:\")\n", + " print(f\" \u2022 Range: [0, \u221e)\")\n", + " print(f\" \u2022 Piecewise: f(x) = 0 for x < 0, f(x) = x for x \u2265 0\")\n", + " print(f\" \u2022 Monotonic: Always increasing for x \u2265 0\")\n", + " print(f\" \u2022 Sparse: Exactly zero for negative inputs\")\n", + " else:\n", + " print(\"\ud83d\udcca Plots disabled during testing - this is normal!\")\n", + " \n", + " # Always show the mathematical analysis\n", + " difference = np.abs(y_range - y_ideal)\n", + " max_error = np.max(difference)\n", + " print(f\"\\n\ud83d\udcca Mathematical Analysis:\")\n", + " print(f\"\u2705 Maximum error: {max_error:.10f}\")\n", + " if max_error < 1e-10:\n", + " print(\"\ud83c\udf89 Perfect implementation!\")\n", + " elif max_error < 1e-6:\n", + " print(\"\ud83c\udf1f Excellent implementation!\")\n", + " elif max_error < 1e-3:\n", + " print(\"\ud83d\udc4d Good implementation!\")\n", + " else:\n", + " print(\"\ud83d\udd27 Implementation needs work.\")\n", + " \n", + "except Exception as e:\n", + " print(f\"\u274c Error in plotting: {e}\")\n", + " print(\"Make sure to implement the ReLU function above!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "## Step 2: Sigmoid Activation Function\n", + "\n", + "**Sigmoid** squashes any input to the range (0, 1), making it useful for probabilities.\n", + "\n", + "**Formula**: `f(x) = 1 / (1 + e^(-x))`\n", + "\n", + "**Properties**:\n", + "- **Bounded**: Always outputs between 0 and 1\n", + "- **Smooth**: Differentiable everywhere\n", + "- **S-shaped**: Smooth transition from 0 to 1\n", + "- **Saturating**: Can suffer from vanishing gradients\n", + "\n", + "**When to use**: Binary classification (final layer), gates in RNNs/LSTMs.\n", + "\n", + "**\u26a0\ufe0f Numerical Stability**: Be careful with large inputs to avoid overflow!\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "class Sigmoid:\n", + " \"\"\"\n", + " Sigmoid Activation: f(x) = 1 / (1 + e^(-x))\n", + " \n", + " Squashes input to range (0, 1). Often used for binary classification.\n", + " \n", + " TODO: Implement Sigmoid activation function.\n", + " \"\"\"\n", + " \n", + " def forward(self, x: Tensor) -> Tensor:\n", + " \"\"\"\n", + " Apply Sigmoid: f(x) = 1 / (1 + e^(-x))\n", + " \n", + " Args:\n", + " x: Input tensor\n", + " \n", + " Returns:\n", + " Output tensor with Sigmoid applied element-wise\n", + " \n", + " TODO: Implement sigmoid function (be careful with numerical stability!)\n", + " \n", + " Hint: For numerical stability, use:\n", + " - For x >= 0: sigmoid(x) = 1 / (1 + exp(-x))\n", + " - For x < 0: sigmoid(x) = exp(x) / (1 + exp(x))\n", + " \"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")\n", + " \n", + " def __call__(self, x: Tensor) -> Tensor:\n", + " return self.forward(x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "#| export\n", + "class Sigmoid:\n", + " \"\"\"Sigmoid Activation: f(x) = 1 / (1 + e^(-x))\"\"\"\n", + " \n", + " def forward(self, x: Tensor) -> Tensor:\n", + " \"\"\"Apply Sigmoid with numerical stability\"\"\"\n", + " # Use the numerically stable version to avoid overflow\n", + " # For x >= 0: sigmoid(x) = 1 / (1 + exp(-x))\n", + " # For x < 0: sigmoid(x) = exp(x) / (1 + exp(x))\n", + " x_data = x.data\n", + " result = np.zeros_like(x_data)\n", + " \n", + " # Stable computation\n", + " positive_mask = x_data >= 0\n", + " result[positive_mask] = 1.0 / (1.0 + np.exp(-x_data[positive_mask]))\n", + " result[~positive_mask] = np.exp(x_data[~positive_mask]) / (1.0 + np.exp(x_data[~positive_mask]))\n", + " \n", + " return Tensor(result)\n", + " \n", + " def __call__(self, x: Tensor) -> Tensor:\n", + " return self.forward(x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "### \ud83e\uddea Test Your Sigmoid Function\n", + "\n", + "Once you implement Sigmoid above, run this cell to test it:\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test Sigmoid function\n", + "try:\n", + " print(\"=== Testing Sigmoid Function ===\")\n", + " \n", + " # Test data: mix of positive, negative, and zero\n", + " x = Tensor([[-5.0, -1.0, 0.0, 1.0, 5.0]])\n", + " print(f\"Input: {x.data}\")\n", + " \n", + " # Test Sigmoid\n", + " sigmoid = Sigmoid()\n", + " y = sigmoid(x)\n", + " print(f\"Sigmoid output: {y.data}\")\n", + " print(\"Expected: values between 0 and 1\")\n", + " print(f\"All values in (0,1)? {np.all((y.data > 0) & (y.data < 1))}\")\n", + " \n", + " # Test specific values\n", + " x_zero = Tensor([[0.0]])\n", + " y_zero = sigmoid(x_zero)\n", + " print(f\"\\nSigmoid(0) = {y_zero.data[0, 0]:.4f} (should be 0.5)\")\n", + " \n", + " # Test extreme values (numerical stability)\n", + " x_extreme = Tensor([[-100.0, 100.0]])\n", + " y_extreme = sigmoid(x_extreme)\n", + " print(f\"Sigmoid([-100, 100]) = {y_extreme.data}\")\n", + " print(\"Should be close to [0, 1] without overflow errors\")\n", + " \n", + " print(\"\u2705 Sigmoid working!\")\n", + " \n", + "except Exception as e:\n", + " print(f\"\u274c Error: {e}\")\n", + " print(\"Make sure to implement the Sigmoid function above!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "### \ud83d\udcca Visualize Sigmoid Function\n", + "\n", + "Let's plot the Sigmoid function to see its S-shaped curve:\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Plot Sigmoid function\n", + "try:\n", + " print(\"=== Plotting Sigmoid Function ===\")\n", + " \n", + " # Create a range of input values\n", + " x_range = np.linspace(-10, 10, 100)\n", + " x_tensor = Tensor([x_range])\n", + " \n", + " # Apply Sigmoid (student implementation)\n", + " sigmoid = Sigmoid()\n", + " y_tensor = sigmoid(x_tensor)\n", + " y_range = y_tensor.data[0]\n", + " \n", + " # Create ideal Sigmoid for comparison\n", + " y_ideal = 1.0 / (1.0 + np.exp(-x_range))\n", + " \n", + " # Only show plots if we're not in a testing environment\n", + " if _should_show_plots():\n", + " # Create the plot\n", + " plt.figure(figsize=(12, 8))\n", + " \n", + " # Plot both student implementation and ideal\n", + " plt.subplot(2, 2, 1)\n", + " plt.plot(x_range, y_range, 'g-', linewidth=3, label='Your Sigmoid Implementation')\n", + " plt.plot(x_range, y_ideal, 'r--', linewidth=2, alpha=0.7, label='Ideal Sigmoid')\n", + " plt.axhline(y=0.5, color='orange', linestyle='--', alpha=0.5, label='y = 0.5')\n", + " plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)\n", + " plt.axhline(y=1, color='k', linestyle='-', alpha=0.3)\n", + " plt.axvline(x=0, color='k', linestyle='-', alpha=0.3)\n", + " plt.xlabel('Input (x)')\n", + " plt.ylabel('Output')\n", + " plt.title('Sigmoid: Your Implementation vs Ideal')\n", + " plt.grid(True, alpha=0.3)\n", + " plt.legend()\n", + " plt.xlim(-10, 10)\n", + " plt.ylim(-0.1, 1.1)\n", + " \n", + " # Mathematical explanation plot\n", + " plt.subplot(2, 2, 2)\n", + " # Show key points\n", + " x_key = np.array([-5, -2, -1, 0, 1, 2, 5])\n", + " y_key = 1.0 / (1.0 + np.exp(-x_key))\n", + " plt.stem(x_key, y_key, basefmt=' ', linefmt='orange', markerfmt='o')\n", + " plt.axhline(y=0.5, color='orange', linestyle='--', alpha=0.5)\n", + " plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)\n", + " plt.axhline(y=1, color='k', linestyle='-', alpha=0.3)\n", + " plt.axvline(x=0, color='k', linestyle='-', alpha=0.3)\n", + " plt.xlabel('Input (x)')\n", + " plt.ylabel('1/(1+e^(-x))')\n", + " plt.title('Mathematical Definition: 1/(1+e^(-x))')\n", + " plt.grid(True, alpha=0.3)\n", + " plt.xlim(-6, 6)\n", + " plt.ylim(-0.1, 1.1)\n", + " \n", + " # Show the S-curve properties\n", + " plt.subplot(2, 2, 3)\n", + " x_detailed = np.linspace(-8, 8, 200)\n", + " y_detailed = 1.0 / (1.0 + np.exp(-x_detailed))\n", + " plt.plot(x_detailed, y_detailed, 'g-', linewidth=3)\n", + " # Add asymptotes\n", + " plt.axhline(y=0, color='r', linestyle='--', alpha=0.7, label='Lower asymptote: y = 0')\n", + " plt.axhline(y=1, color='r', linestyle='--', alpha=0.7, label='Upper asymptote: y = 1')\n", + " plt.axhline(y=0.5, color='orange', linestyle='--', alpha=0.7, label='Midpoint: y = 0.5')\n", + " plt.axvline(x=0, color='k', linestyle='-', alpha=0.3)\n", + " plt.xlabel('Input (x)')\n", + " plt.ylabel('Output')\n", + " plt.title('S-Curve Properties')\n", + " plt.grid(True, alpha=0.3)\n", + " plt.legend()\n", + " plt.xlim(-8, 8)\n", + " plt.ylim(-0.1, 1.1)\n", + " \n", + " # Error analysis\n", + " plt.subplot(2, 2, 4)\n", + " difference = np.abs(y_range - y_ideal)\n", + " max_error = np.max(difference)\n", + " plt.plot(x_range, difference, 'purple', linewidth=2)\n", + " plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)\n", + " plt.xlabel('Input (x)')\n", + " plt.ylabel('|Your Output - Ideal Output|')\n", + " plt.title(f'Implementation Error (Max: {max_error:.6f})')\n", + " plt.grid(True, alpha=0.3)\n", + " plt.xlim(-10, 10)\n", + " \n", + " plt.tight_layout()\n", + " plt.show()\n", + " \n", + " # Print analysis\n", + " print(f\"\\n\ud83d\udcca Analysis:\")\n", + " print(f\"\u2705 Maximum error: {max_error:.10f}\")\n", + " if max_error < 1e-10:\n", + " print(\"\ud83c\udf89 Perfect implementation!\")\n", + " elif max_error < 1e-6:\n", + " print(\"\ud83c\udf1f Excellent implementation!\")\n", + " elif max_error < 1e-3:\n", + " print(\"\ud83d\udc4d Good implementation!\")\n", + " else:\n", + " print(\"\ud83d\udd27 Implementation needs work.\")\n", + " \n", + " print(f\"\ud83d\udcc8 Function properties:\")\n", + " print(f\" \u2022 Range: (0, 1)\")\n", + " print(f\" \u2022 Symmetric around (0, 0.5)\")\n", + " print(f\" \u2022 Smooth and differentiable everywhere\")\n", + " print(f\" \u2022 Saturates for large |x| (vanishing gradient problem)\")\n", + " print(f\" \u2022 Useful for binary classification (outputs probabilities)\")\n", + " else:\n", + " print(\"\ud83d\udcca Plots disabled during testing - this is normal!\")\n", + " \n", + " # Always show the mathematical analysis\n", + " difference = np.abs(y_range - y_ideal)\n", + " max_error = np.max(difference)\n", + " print(f\"\\n\ud83d\udcca Mathematical Analysis:\")\n", + " print(f\"\u2705 Maximum error: {max_error:.10f}\")\n", + " if max_error < 1e-10:\n", + " print(\"\ud83c\udf89 Perfect implementation!\")\n", + " elif max_error < 1e-6:\n", + " print(\"\ud83c\udf1f Excellent implementation!\")\n", + " elif max_error < 1e-3:\n", + " print(\"\ud83d\udc4d Good implementation!\")\n", + " else:\n", + " print(\"\ud83d\udd27 Implementation needs work.\")\n", + " \n", + "except Exception as e:\n", + " print(f\"\u274c Error in plotting: {e}\")\n", + " print(\"Make sure to implement the Sigmoid function above!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "## Step 3: Tanh Activation Function\n", + "\n", + "**Tanh** (Hyperbolic Tangent) squashes inputs to the range (-1, 1).\n", + "\n", + "**Formula**: `f(x) = tanh(x) = (e^x - e^(-x)) / (e^x + e^(-x))`\n", + "\n", + "**Properties**:\n", + "- **Bounded**: Always outputs between -1 and 1\n", + "- **Zero-centered**: Output is centered around 0\n", + "- **Smooth**: Differentiable everywhere\n", + "- **Stronger gradients**: Than sigmoid around zero\n", + "\n", + "**When to use**: Hidden layers when you want zero-centered outputs, RNNs.\n", + "\n", + "**Advantage over Sigmoid**: Zero-centered outputs help with gradient flow.\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "class Tanh:\n", + " \"\"\"\n", + " Tanh Activation: f(x) = tanh(x)\n", + " \n", + " Squashes input to range (-1, 1). Zero-centered output.\n", + " \n", + " TODO: Implement Tanh activation function.\n", + " \"\"\"\n", + " \n", + " def forward(self, x: Tensor) -> Tensor:\n", + " \"\"\"\n", + " Apply Tanh: f(x) = tanh(x)\n", + " \n", + " Args:\n", + " x: Input tensor\n", + " \n", + " Returns:\n", + " Output tensor with Tanh applied element-wise\n", + " \n", + " TODO: Implement tanh function\n", + " Hint: Use np.tanh(x.data)\n", + " \"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")\n", + " \n", + " def __call__(self, x: Tensor) -> Tensor:\n", + " return self.forward(x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "#| export\n", + "class Tanh:\n", + " \"\"\"Tanh Activation: f(x) = tanh(x)\"\"\"\n", + " \n", + " def forward(self, x: Tensor) -> Tensor:\n", + " \"\"\"Apply Tanh\"\"\"\n", + " return Tensor(np.tanh(x.data))\n", + " \n", + " def __call__(self, x: Tensor) -> Tensor:\n", + " return self.forward(x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "### \ud83e\uddea Test Your Tanh Function\n", + "\n", + "Once you implement Tanh above, run this cell to test it:\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test Tanh function\n", + "try:\n", + " print(\"=== Testing Tanh Function ===\")\n", + " \n", + " # Test data: mix of positive, negative, and zero\n", + " x = Tensor([[-3.0, -1.0, 0.0, 1.0, 3.0]])\n", + " print(f\"Input: {x.data}\")\n", + " \n", + " # Test Tanh\n", + " tanh = Tanh()\n", + " y = tanh(x)\n", + " print(f\"Tanh output: {y.data}\")\n", + " print(\"Expected: values between -1 and 1\")\n", + " print(f\"All values in (-1,1)? {np.all((y.data > -1) & (y.data < 1))}\")\n", + " \n", + " # Test specific values\n", + " x_zero = Tensor([[0.0]])\n", + " y_zero = tanh(x_zero)\n", + " print(f\"\\nTanh(0) = {y_zero.data[0, 0]:.4f} (should be 0.0)\")\n", + " \n", + " # Test extreme values\n", + " x_extreme = Tensor([[-10.0, 10.0]])\n", + " y_extreme = tanh(x_extreme)\n", + " print(f\"Tanh([-10, 10]) = {y_extreme.data}\")\n", + " print(\"Should be close to [-1, 1]\")\n", + " \n", + " print(\"\u2705 Tanh working!\")\n", + " \n", + "except Exception as e:\n", + " print(f\"\u274c Error: {e}\")\n", + " print(\"Make sure to implement the Tanh function above!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "### \ud83d\udcca Visualize Tanh Function\n", + "\n", + "Let's plot the Tanh function to see its zero-centered S-shaped curve:\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Plot Tanh function\n", + "try:\n", + " print(\"=== Plotting Tanh Function ===\")\n", + " \n", + " # Create a range of input values\n", + " x_range = np.linspace(-5, 5, 100)\n", + " x_tensor = Tensor([x_range])\n", + " \n", + " # Apply Tanh (student implementation)\n", + " tanh = Tanh()\n", + " y_tensor = tanh(x_tensor)\n", + " y_range = y_tensor.data[0]\n", + " \n", + " # Create ideal Tanh for comparison\n", + " y_ideal = np.tanh(x_range)\n", + " \n", + " # Only show plots if we're not in a testing environment\n", + " if _should_show_plots():\n", + " # Create the plot\n", + " plt.figure(figsize=(12, 8))\n", + " \n", + " # Plot both student implementation and ideal\n", + " plt.subplot(2, 2, 1)\n", + " plt.plot(x_range, y_range, 'orange', linewidth=3, label='Your Tanh Implementation')\n", + " plt.plot(x_range, y_ideal, 'r--', linewidth=2, alpha=0.7, label='Ideal Tanh')\n", + " plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)\n", + " plt.axhline(y=1, color='k', linestyle='--', alpha=0.3)\n", + " plt.axhline(y=-1, color='k', linestyle='--', alpha=0.3)\n", + " plt.axvline(x=0, color='k', linestyle='-', alpha=0.3)\n", + " plt.xlabel('Input (x)')\n", + " plt.ylabel('Output')\n", + " plt.title('Tanh: Your Implementation vs Ideal')\n", + " plt.grid(True, alpha=0.3)\n", + " plt.legend()\n", + " plt.xlim(-5, 5)\n", + " plt.ylim(-1.2, 1.2)\n", + " \n", + " # Mathematical explanation plot\n", + " plt.subplot(2, 2, 2)\n", + " # Show key points\n", + " x_key = np.array([-3, -2, -1, 0, 1, 2, 3])\n", + " y_key = np.tanh(x_key)\n", + " plt.stem(x_key, y_key, basefmt=' ', linefmt='purple', markerfmt='o')\n", + " plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)\n", + " plt.axhline(y=1, color='k', linestyle='--', alpha=0.3)\n", + " plt.axhline(y=-1, color='k', linestyle='--', alpha=0.3)\n", + " plt.axvline(x=0, color='k', linestyle='-', alpha=0.3)\n", + " plt.xlabel('Input (x)')\n", + " plt.ylabel('tanh(x)')\n", + " plt.title('Mathematical Definition: tanh(x)')\n", + " plt.grid(True, alpha=0.3)\n", + " plt.xlim(-4, 4)\n", + " plt.ylim(-1.2, 1.2)\n", + " \n", + " # Show symmetry property\n", + " plt.subplot(2, 2, 3)\n", + " x_sym = np.linspace(-4, 4, 100)\n", + " y_sym = np.tanh(x_sym)\n", + " plt.plot(x_sym, y_sym, 'orange', linewidth=3, label='tanh(x)')\n", + " plt.plot(-x_sym, -y_sym, 'b--', linewidth=2, alpha=0.7, label='-tanh(-x)')\n", + " plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)\n", + " plt.axhline(y=1, color='r', linestyle='--', alpha=0.7, label='Upper asymptote: y = 1')\n", + " plt.axhline(y=-1, color='r', linestyle='--', alpha=0.7, label='Lower asymptote: y = -1')\n", + " plt.axvline(x=0, color='k', linestyle='-', alpha=0.3)\n", + " plt.xlabel('Input (x)')\n", + " plt.ylabel('Output')\n", + " plt.title('Symmetry: tanh(-x) = -tanh(x)')\n", + " plt.grid(True, alpha=0.3)\n", + " plt.legend()\n", + " plt.xlim(-4, 4)\n", + " plt.ylim(-1.2, 1.2)\n", + " \n", + " # Error analysis\n", + " plt.subplot(2, 2, 4)\n", + " difference = np.abs(y_range - y_ideal)\n", + " max_error = np.max(difference)\n", + " plt.plot(x_range, difference, 'purple', linewidth=2)\n", + " plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)\n", + " plt.xlabel('Input (x)')\n", + " plt.ylabel('|Your Output - Ideal Output|')\n", + " plt.title(f'Implementation Error (Max: {max_error:.6f})')\n", + " plt.grid(True, alpha=0.3)\n", + " plt.xlim(-5, 5)\n", + " \n", + " plt.tight_layout()\n", + " plt.show()\n", + " \n", + " # Print analysis\n", + " print(f\"\\n\ud83d\udcca Analysis:\")\n", + " print(f\"\u2705 Maximum error: {max_error:.10f}\")\n", + " if max_error < 1e-10:\n", + " print(\"\ud83c\udf89 Perfect implementation!\")\n", + " elif max_error < 1e-6:\n", + " print(\"\ud83c\udf1f Excellent implementation!\")\n", + " elif max_error < 1e-3:\n", + " print(\"\ud83d\udc4d Good implementation!\")\n", + " else:\n", + " print(\"\ud83d\udd27 Implementation needs work.\")\n", + " \n", + " print(f\"\ud83d\udcc8 Function properties:\")\n", + " print(f\" \u2022 Range: (-1, 1)\")\n", + " print(f\" \u2022 Odd function: tanh(-x) = -tanh(x)\")\n", + " print(f\" \u2022 Symmetric around origin (0, 0)\")\n", + " print(f\" \u2022 Smooth and differentiable everywhere\")\n", + " print(f\" \u2022 Stronger gradients than sigmoid around zero\")\n", + " print(f\" \u2022 Related to sigmoid: tanh(x) = 2*sigmoid(2x) - 1\")\n", + " else:\n", + " print(\"\ud83d\udcca Plots disabled during testing - this is normal!\")\n", + " \n", + " # Always show the mathematical analysis\n", + " difference = np.abs(y_range - y_ideal)\n", + " max_error = np.max(difference)\n", + " print(f\"\\n\ud83d\udcca Mathematical Analysis:\")\n", + " print(f\"\u2705 Maximum error: {max_error:.10f}\")\n", + " if max_error < 1e-10:\n", + " print(\"\ud83c\udf89 Perfect implementation!\")\n", + " elif max_error < 1e-6:\n", + " print(\"\ud83c\udf1f Excellent implementation!\")\n", + " elif max_error < 1e-3:\n", + " print(\"\ud83d\udc4d Good implementation!\")\n", + " else:\n", + " print(\"\ud83d\udd27 Implementation needs work.\")\n", + " \n", + "except Exception as e:\n", + " print(f\"\u274c Error in plotting: {e}\")\n", + " print(\"Make sure to implement the Tanh function above!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "## Step 4: Compare All Activation Functions\n", + "\n", + "Let's see how all three functions behave on the same input:\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Compare all activation functions\n", + "try:\n", + " print(\"=== Comparing All Activation Functions ===\")\n", + " \n", + " # Test data: range from -5 to 5\n", + " x = Tensor([[-5.0, -2.0, -1.0, 0.0, 1.0, 2.0, 5.0]])\n", + " print(f\"Input: {x.data}\")\n", + " \n", + " # Apply all activations\n", + " relu = ReLU()\n", + " sigmoid = Sigmoid()\n", + " tanh = Tanh()\n", + " \n", + " y_relu = relu(x)\n", + " y_sigmoid = sigmoid(x)\n", + " y_tanh = tanh(x)\n", + " \n", + " print(f\"\\nReLU: {y_relu.data}\")\n", + " print(f\"Sigmoid: {y_sigmoid.data}\")\n", + " print(f\"Tanh: {y_tanh.data}\")\n", + " \n", + " print(\"\\n\ud83d\udcca Key Differences:\")\n", + " print(\"- ReLU: Zeros out negative values, unbounded positive\")\n", + " print(\"- Sigmoid: Squashes to (0, 1), always positive\")\n", + " print(\"- Tanh: Squashes to (-1, 1), zero-centered\")\n", + " \n", + " print(\"\\n\u2705 All activation functions working!\")\n", + " \n", + "except Exception as e:\n", + " print(f\"\u274c Error: {e}\")\n", + " print(\"Make sure to implement all activation functions above!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "### \ud83d\udcca Comprehensive Activation Function Comparison\n", + "\n", + "Let's plot all three functions together to see their differences:\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Plot all activation functions together\n", + "try:\n", + " print(\"=== Plotting All Activation Functions Together ===\")\n", + " \n", + " # Create a range of input values\n", + " x_range = np.linspace(-5, 5, 100)\n", + " x_tensor = Tensor([x_range])\n", + " \n", + " # Apply all activations (student implementations)\n", + " relu = ReLU()\n", + " sigmoid = Sigmoid()\n", + " tanh = Tanh()\n", + " \n", + " y_relu = relu(x_tensor).data[0]\n", + " y_sigmoid = sigmoid(x_tensor).data[0]\n", + " y_tanh = tanh(x_tensor).data[0]\n", + " \n", + " # Create ideal functions for comparison\n", + " y_relu_ideal = np.maximum(0, x_range)\n", + " y_sigmoid_ideal = 1.0 / (1.0 + np.exp(-x_range))\n", + " y_tanh_ideal = np.tanh(x_range)\n", + " \n", + " # Only show plots if we're not in a testing environment\n", + " if _should_show_plots():\n", + " # Create the comprehensive plot\n", + " plt.figure(figsize=(15, 10))\n", + " \n", + " # Main comparison plot\n", + " plt.subplot(2, 3, (1, 2))\n", + " plt.plot(x_range, y_relu, 'b-', linewidth=3, label='Your ReLU')\n", + " plt.plot(x_range, y_sigmoid, 'g-', linewidth=3, label='Your Sigmoid')\n", + " plt.plot(x_range, y_tanh, 'orange', linewidth=3, label='Your Tanh')\n", + " \n", + " # Add ideal functions as dashed lines\n", + " plt.plot(x_range, y_relu_ideal, 'b--', linewidth=1, alpha=0.7, label='Ideal ReLU')\n", + " plt.plot(x_range, y_sigmoid_ideal, 'g--', linewidth=1, alpha=0.7, label='Ideal Sigmoid')\n", + " plt.plot(x_range, y_tanh_ideal, '--', color='orange', linewidth=1, alpha=0.7, label='Ideal Tanh')\n", + " \n", + " # Add reference lines\n", + " plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)\n", + " plt.axhline(y=1, color='k', linestyle='--', alpha=0.3)\n", + " plt.axhline(y=-1, color='k', linestyle='--', alpha=0.3)\n", + " plt.axvline(x=0, color='k', linestyle='-', alpha=0.3)\n", + " \n", + " # Formatting\n", + " plt.xlabel('Input (x)', fontsize=12)\n", + " plt.ylabel('Output f(x)', fontsize=12)\n", + " plt.title('Activation Functions: Your Implementation vs Ideal', fontsize=14, fontweight='bold')\n", + " plt.grid(True, alpha=0.3)\n", + " plt.legend(fontsize=10, loc='upper left')\n", + " plt.xlim(-5, 5)\n", + " plt.ylim(-1.5, 5)\n", + " \n", + " # Mathematical definitions\n", + " plt.subplot(2, 3, 3)\n", + " plt.text(0.05, 0.95, 'Mathematical Definitions:', fontsize=12, fontweight='bold', \n", + " transform=plt.gca().transAxes, verticalalignment='top')\n", + " plt.text(0.05, 0.85, 'ReLU:', fontsize=11, fontweight='bold', color='blue',\n", + " transform=plt.gca().transAxes, verticalalignment='top')\n", + " plt.text(0.05, 0.80, 'f(x) = max(0, x)', fontsize=10, fontfamily='monospace',\n", + " transform=plt.gca().transAxes, verticalalignment='top')\n", + " plt.text(0.05, 0.70, 'Sigmoid:', fontsize=11, fontweight='bold', color='green',\n", + " transform=plt.gca().transAxes, verticalalignment='top')\n", + " plt.text(0.05, 0.65, 'f(x) = 1/(1+e^(-x))', fontsize=10, fontfamily='monospace',\n", + " transform=plt.gca().transAxes, verticalalignment='top')\n", + " plt.text(0.05, 0.55, 'Tanh:', fontsize=11, fontweight='bold', color='orange',\n", + " transform=plt.gca().transAxes, verticalalignment='top')\n", + " plt.text(0.05, 0.50, 'f(x) = tanh(x)', fontsize=10, fontfamily='monospace',\n", + " transform=plt.gca().transAxes, verticalalignment='top')\n", + " plt.text(0.05, 0.45, ' = (e^x-e^(-x))/(e^x+e^(-x))', fontsize=10, fontfamily='monospace',\n", + " transform=plt.gca().transAxes, verticalalignment='top')\n", + " \n", + " plt.text(0.05, 0.30, 'Key Properties:', fontsize=12, fontweight='bold',\n", + " transform=plt.gca().transAxes, verticalalignment='top')\n", + " plt.text(0.05, 0.25, '\u2022 ReLU: Sparse, unbounded', fontsize=10, color='blue',\n", + " transform=plt.gca().transAxes, verticalalignment='top')\n", + " plt.text(0.05, 0.20, '\u2022 Sigmoid: Bounded (0,1)', fontsize=10, color='green',\n", + " transform=plt.gca().transAxes, verticalalignment='top')\n", + " plt.text(0.05, 0.15, '\u2022 Tanh: Zero-centered (-1,1)', fontsize=10, color='orange',\n", + " transform=plt.gca().transAxes, verticalalignment='top')\n", + " plt.axis('off')\n", + " \n", + " # Error analysis for ReLU\n", + " plt.subplot(2, 3, 4)\n", + " error_relu = np.abs(y_relu - y_relu_ideal)\n", + " plt.plot(x_range, error_relu, 'b-', linewidth=2)\n", + " plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)\n", + " plt.xlabel('Input (x)')\n", + " plt.ylabel('Error')\n", + " plt.title(f'ReLU Error (Max: {np.max(error_relu):.2e})')\n", + " plt.grid(True, alpha=0.3)\n", + " plt.xlim(-5, 5)\n", + " \n", + " # Error analysis for Sigmoid\n", + " plt.subplot(2, 3, 5)\n", + " error_sigmoid = np.abs(y_sigmoid - y_sigmoid_ideal)\n", + " plt.plot(x_range, error_sigmoid, 'g-', linewidth=2)\n", + " plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)\n", + " plt.xlabel('Input (x)')\n", + " plt.ylabel('Error')\n", + " plt.title(f'Sigmoid Error (Max: {np.max(error_sigmoid):.2e})')\n", + " plt.grid(True, alpha=0.3)\n", + " plt.xlim(-5, 5)\n", + " \n", + " # Error analysis for Tanh\n", + " plt.subplot(2, 3, 6)\n", + " error_tanh = np.abs(y_tanh - y_tanh_ideal)\n", + " plt.plot(x_range, error_tanh, 'orange', linewidth=2)\n", + " plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)\n", + " plt.xlabel('Input (x)')\n", + " plt.ylabel('Error')\n", + " plt.title(f'Tanh Error (Max: {np.max(error_tanh):.2e})')\n", + " plt.grid(True, alpha=0.3)\n", + " plt.xlim(-5, 5)\n", + " \n", + " plt.tight_layout()\n", + " plt.show()\n", + " \n", + " # Comprehensive analysis\n", + " print(\"\\n\ud83d\udcca Comprehensive Analysis:\")\n", + " print(\"=\" * 60)\n", + " \n", + " # Function ranges\n", + " print(\"\ud83d\udcc8 Output Ranges:\")\n", + " print(f\" ReLU: [{np.min(y_relu):.3f}, {np.max(y_relu):.3f}]\")\n", + " print(f\" Sigmoid: [{np.min(y_sigmoid):.3f}, {np.max(y_sigmoid):.3f}]\")\n", + " print(f\" Tanh: [{np.min(y_tanh):.3f}, {np.max(y_tanh):.3f}]\")\n", + " \n", + " # Implementation accuracy\n", + " print(\"\\n\ud83c\udfaf Implementation Accuracy:\")\n", + " max_errors = [np.max(error_relu), np.max(error_sigmoid), np.max(error_tanh)]\n", + " functions = ['ReLU', 'Sigmoid', 'Tanh']\n", + " \n", + " for func, error in zip(functions, max_errors):\n", + " if error < 1e-10:\n", + " status = \"\u2705 PERFECT\"\n", + " elif error < 1e-6:\n", + " status = \"\u2705 EXCELLENT\"\n", + " elif error < 1e-3:\n", + " status = \"\u26a0\ufe0f GOOD\"\n", + " else:\n", + " status = \"\u274c NEEDS WORK\"\n", + " print(f\" {func:8s}: {status:12s} (error: {error:.2e})\")\n", + " \n", + " # Mathematical properties verification\n", + " print(\"\\n\ud83d\udd0d Mathematical Properties:\")\n", + " \n", + " # Zero-centered test\n", + " x_zero = Tensor([[0.0]])\n", + " print(\" Zero-centered test (f(0) should be 0):\")\n", + " for name, func in [(\"ReLU\", relu), (\"Sigmoid\", sigmoid), (\"Tanh\", tanh)]:\n", + " output = func(x_zero).data[0, 0]\n", + " is_zero = abs(output) < 1e-6\n", + " expected = 0.0 if name != \"Sigmoid\" else 0.5\n", + " print(f\" {name:8s}: f(0) = {output:.4f} {'\u2705' if abs(output - expected) < 1e-6 else '\u274c'}\")\n", + " \n", + " # Monotonicity test\n", + " print(\" Monotonicity test (should be increasing):\")\n", + " test_vals = np.array([-2, -1, 0, 1, 2])\n", + " x_test = Tensor([test_vals])\n", + " for name, func in [(\"ReLU\", relu), (\"Sigmoid\", sigmoid), (\"Tanh\", tanh)]:\n", + " outputs = func(x_test).data[0]\n", + " is_monotonic = np.all(outputs[1:] >= outputs[:-1])\n", + " print(f\" {name:8s}: {'\u2705 Monotonic' if is_monotonic else '\u274c Not monotonic'}\")\n", + " \n", + " print(\"\\n\ud83c\udf89 Comparison complete! Use these insights to understand each function's role in neural networks.\")\n", + " else:\n", + " print(\"\ud83d\udcca Plots disabled during testing - this is normal!\")\n", + " \n", + "except Exception as e:\n", + " print(f\"\u274c Error in plotting: {e}\")\n", + " print(\"Make sure matplotlib is installed and all functions are implemented!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "## Step 5: Understanding Activation Function Properties\n", + "\n", + "Let's explore the mathematical properties of each function:\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Explore activation function properties\n", + "try:\n", + " print(\"=== Activation Function Properties ===\")\n", + " \n", + " # Create test functions\n", + " relu = ReLU()\n", + " sigmoid = Sigmoid()\n", + " tanh = Tanh()\n", + " \n", + " # Test with a range of values\n", + " test_values = np.linspace(-5, 5, 11)\n", + " x = Tensor([test_values])\n", + " \n", + " print(f\"Input range: {test_values}\")\n", + " print(f\"ReLU range: [{np.min(relu(x).data):.2f}, {np.max(relu(x).data):.2f}]\")\n", + " print(f\"Sigmoid range: [{np.min(sigmoid(x).data):.2f}, {np.max(sigmoid(x).data):.2f}]\")\n", + " print(f\"Tanh range: [{np.min(tanh(x).data):.2f}, {np.max(tanh(x).data):.2f}]\")\n", + " \n", + " # Test monotonicity (should all be increasing functions)\n", + " print(f\"\\n\ud83d\udcc8 Monotonicity Test:\")\n", + " for name, func in [(\"ReLU\", relu), (\"Sigmoid\", sigmoid), (\"Tanh\", tanh)]:\n", + " outputs = func(x).data[0]\n", + " is_monotonic = np.all(outputs[1:] >= outputs[:-1])\n", + " print(f\"{name}: {'\u2705 Monotonic' if is_monotonic else '\u274c Not monotonic'}\")\n", + " \n", + " # Test zero-centered property\n", + " print(f\"\\n\ud83c\udfaf Zero-Centered Test (f(0) = 0):\")\n", + " x_zero = Tensor([[0.0]])\n", + " for name, func in [(\"ReLU\", relu), (\"Sigmoid\", sigmoid), (\"Tanh\", tanh)]:\n", + " output = func(x_zero).data[0, 0]\n", + " is_zero_centered = abs(output) < 1e-6\n", + " print(f\"{name}: f(0) = {output:.4f} {'\u2705 Zero-centered' if is_zero_centered else '\u274c Not zero-centered'}\")\n", + " \n", + " print(\"\\n\ud83c\udf89 Property analysis complete!\")\n", + " \n", + "except Exception as e:\n", + " print(f\"\u274c Error: {e}\")\n", + " print(\"Check your activation function implementations!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "## Step 6: Practical Usage Examples\n", + "\n", + "Let's see how these functions would be used in practice:\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Practical usage examples\n", + "try:\n", + " print(\"=== Practical Usage Examples ===\")\n", + " \n", + " # Example 1: Binary classification with sigmoid\n", + " print(\"1. Binary Classification (Sigmoid):\")\n", + " logits = Tensor([[2.5, -1.2, 0.8, -0.3]]) # Raw network outputs\n", + " sigmoid = Sigmoid()\n", + " probabilities = sigmoid(logits)\n", + " print(f\" Logits: {logits.data}\")\n", + " print(f\" Probabilities: {probabilities.data}\")\n", + " print(f\" Predictions: {(probabilities.data > 0.5).astype(int)}\")\n", + " \n", + " # Example 2: Feature processing with ReLU\n", + " print(\"\\n2. Feature Processing (ReLU):\")\n", + " features = Tensor([[-0.5, 1.2, -2.1, 0.8, -0.1]]) # Mixed positive/negative\n", + " relu = ReLU()\n", + " processed = relu(features)\n", + " print(f\" Raw features: {features.data}\")\n", + " print(f\" After ReLU: {processed.data}\")\n", + " print(f\" Sparsity: {np.mean(processed.data == 0):.1%} zeros\")\n", + " \n", + " # Example 3: Normalized features with Tanh\n", + " print(\"\\n3. Normalized Features (Tanh):\")\n", + " raw_features = Tensor([[3.2, -1.8, 0.5, -2.4, 1.1]])\n", + " tanh = Tanh()\n", + " normalized = tanh(raw_features)\n", + " print(f\" Raw features: {raw_features.data}\")\n", + " print(f\" Normalized: {normalized.data}\")\n", + " print(f\" Mean: {np.mean(normalized.data):.3f} (close to 0)\")\n", + " \n", + " print(\"\\n\u2705 Practical examples complete!\")\n", + " \n", + "except Exception as e:\n", + " print(f\"\u274c Error: {e}\")\n", + " print(\"Check your activation function implementations!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "## \ud83c\udf89 Congratulations!\n", + "\n", + "You've successfully implemented the three most important activation functions in deep learning!\n", + "\n", + "### \ud83e\uddf1 What You Built\n", + "1. **ReLU**: The workhorse activation that enables deep networks\n", + "2. **Sigmoid**: The probability activation for binary classification\n", + "3. **Tanh**: The zero-centered activation for better gradient flow\n", + "\n", + "### \ud83c\udfaf Key Insights\n", + "- **Nonlinearity is essential**: Without activations, neural networks are just linear transformations\n", + "- **Different functions serve different purposes**: ReLU for hidden layers, Sigmoid for probabilities, Tanh for zero-centered outputs\n", + "- **Mathematical properties matter**: Monotonicity, boundedness, and zero-centering affect learning\n", + "\n", + "### \ud83d\ude80 What's Next\n", + "These activation functions will be used in:\n", + "- **Layers Module**: Building neural network layers\n", + "- **Loss Functions**: Computing training objectives\n", + "- **Advanced Architectures**: CNNs, RNNs, and more\n", + "\n", + "### \ud83d\udd27 Export to Package\n", + "Run this to export your activations to the TinyTorch package:\n", + "```bash\n", + "python bin/tito.py sync\n", + "```\n", + "\n", + "Then test your implementation:\n", + "```bash\n", + "python bin/tito.py test --module activations\n", + "```\n", + "\n", + "**Excellent work! You've mastered the mathematical foundations of neural networks!** \ud83c\udf89\n", + "\n", + "---\n", + "\n", + "## \ud83d\udcda Further Reading\n", + "\n", + "**Want to learn more about activation functions?**\n", + "- **ReLU variants**: Leaky ReLU, ELU, Swish\n", + "- **Advanced activations**: GELU, Mish, SiLU\n", + "- **Activation choice**: When to use which function\n", + "- **Gradient flow**: How activations affect training\n", + "\n", + "**Next modules**: Layers, Loss Functions, Optimization\n", + "\"\"\"" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/modules/layers/layers_dev.ipynb b/modules/layers/layers_dev.ipynb index b19410bf..152a3e6f 100644 --- a/modules/layers/layers_dev.ipynb +++ b/modules/layers/layers_dev.ipynb @@ -1,701 +1,588 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "2843fa68", - "metadata": { - "cell_marker": "\"\"\"" - }, - "source": [ - "# Module 2: Layers - Neural Network Building Blocks\n", - "\n", - "Welcome to the Layers module! This is where neural networks begin. You'll implement the fundamental building blocks that transform tensors.\n", - "\n", - "## Learning Goals\n", - "- Understand layers as functions that transform tensors: `y = f(x)`\n", - "- Implement Dense layers with linear transformations: `y = Wx + b`\n", - "- Add activation functions for nonlinearity (ReLU, Sigmoid, Tanh)\n", - "- See how neural networks are just function composition\n", - "- Build intuition before diving into training\n", - "\n", - "## Build β†’ Use β†’ Understand\n", - "1. **Build**: Dense layers and activation functions\n", - "2. **Use**: Transform tensors and see immediate results\n", - "3. **Understand**: How neural networks transform information\n", - "\n", - "## Module β†’ Package Structure\n", - "**πŸŽ“ Teaching vs. πŸ”§ Building**: \n", - "- **Learning side**: Work in `modules/layers/layers_dev.py` \n", - "- **Building side**: Exports to `tinytorch/core/layers.py`\n", - "\n", - "This module builds the fundamental transformations that compose into neural networks." - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "jupyter:\n", + " jupytext:\n", + " text_representation:\n", + " extension: .py\n", + " format_name: percent\n", + " format_version: '1.3'\n", + " jupytext_version: 1.17.1\n", + "---\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "# Module 2: Layers - Neural Network Building Blocks\n", + "\n", + "Welcome to the Layers module! This is where neural networks begin. You'll implement the fundamental building blocks that transform tensors.\n", + "\n", + "## Learning Goals\n", + "- Understand layers as functions that transform tensors: `y = f(x)`\n", + "- Implement Dense layers with linear transformations: `y = Wx + b`\n", + "- Use activation functions from the activations module for nonlinearity\n", + "- See how neural networks are just function composition\n", + "- Build intuition before diving into training\n", + "\n", + "## Build \u2192 Use \u2192 Understand\n", + "1. **Build**: Dense layers using activation functions as building blocks\n", + "2. **Use**: Transform tensors and see immediate results\n", + "3. **Understand**: How neural networks transform information\n", + "\n", + "## Module Dependencies\n", + "This module builds on the **activations** module:\n", + "- **activations** \u2192 **layers** \u2192 **networks**\n", + "- Clean separation of concerns: math functions \u2192 layer building blocks \u2192 full networks\n", + "\n", + "## Module \u2192 Package Structure\n", + "**\ud83c\udf93 Teaching vs. \ud83d\udd27 Building**: \n", + "- **Learning side**: Work in `modules/layers/layers_dev.py` \n", + "- **Building side**: Exports to `tinytorch/core/layers.py`\n", + "\n", + "This module builds the fundamental transformations that compose into neural networks.\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp core.layers\n", + "\n", + "# Setup and imports\n", + "import numpy as np\n", + "import sys\n", + "from typing import Union, Optional, Callable\n", + "import math" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "import numpy as np\n", + "import math\n", + "import sys\n", + "from typing import Union, Optional, Callable\n", + "from tinytorch.core.tensor import Tensor\n", + "\n", + "# Import activation functions from the activations module\n", + "from tinytorch.core.activations import ReLU, Sigmoid, Tanh\n", + "\n", + "# Import our Tensor class\n", + "# sys.path.append('../../')\n", + "# from modules.tensor.tensor_dev import Tensor\n", + "\n", + "# print(\"\ud83d\udd25 TinyTorch Layers Module\")\n", + "# print(f\"NumPy version: {np.__version__}\")\n", + "# print(f\"Python version: {sys.version_info.major}.{sys.version_info.minor}\")\n", + "# print(\"Ready to build neural network layers!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "## Step 1: What is a Layer?\n", + "\n", + "A **layer** is a function that transforms tensors. Think of it as:\n", + "- **Input**: Tensor with some shape\n", + "- **Transformation**: Mathematical operation (linear, nonlinear, etc.)\n", + "- **Output**: Tensor with possibly different shape\n", + "\n", + "**The fundamental insight**: Neural networks are just function composition!\n", + "```\n", + "x \u2192 Layer1 \u2192 Layer2 \u2192 Layer3 \u2192 y\n", + "```\n", + "\n", + "**Why layers matter**:\n", + "- They're the building blocks of all neural networks\n", + "- Each layer learns a different transformation\n", + "- Composing layers creates complex functions\n", + "- Understanding layers = understanding neural networks\n", + "\n", + "Let's start with the most important layer: **Dense** (also called Linear or Fully Connected).\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "class Dense:\n", + " \"\"\"\n", + " Dense (Linear) Layer: y = Wx + b\n", + " \n", + " The fundamental building block of neural networks.\n", + " Performs linear transformation: matrix multiplication + bias addition.\n", + " \n", + " Args:\n", + " input_size: Number of input features\n", + " output_size: Number of output features\n", + " use_bias: Whether to include bias term (default: True)\n", + " \n", + " TODO: Implement the Dense layer with weight initialization and forward pass.\n", + " \"\"\"\n", + " \n", + " def __init__(self, input_size: int, output_size: int, use_bias: bool = True):\n", + " \"\"\"\n", + " Initialize Dense layer with random weights.\n", + " \n", + " TODO: \n", + " 1. Store layer parameters (input_size, output_size, use_bias)\n", + " 2. Initialize weights with small random values\n", + " 3. Initialize bias to zeros (if use_bias=True)\n", + " \"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")\n", + " \n", + " def forward(self, x: Tensor) -> Tensor:\n", + " \"\"\"\n", + " Forward pass: y = Wx + b\n", + " \n", + " Args:\n", + " x: Input tensor of shape (batch_size, input_size)\n", + " \n", + " Returns:\n", + " Output tensor of shape (batch_size, output_size)\n", + " \n", + " TODO: Implement matrix multiplication and bias addition\n", + " \"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")\n", + " \n", + " def __call__(self, x: Tensor) -> Tensor:\n", + " \"\"\"Make layer callable: layer(x) same as layer.forward(x)\"\"\"\n", + " return self.forward(x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "#| export\n", + "class Dense:\n", + " \"\"\"\n", + " Dense (Linear) Layer: y = Wx + b\n", + " \n", + " The fundamental building block of neural networks.\n", + " Performs linear transformation: matrix multiplication + bias addition.\n", + " \"\"\"\n", + " \n", + " def __init__(self, input_size: int, output_size: int, use_bias: bool = True):\n", + " \"\"\"Initialize Dense layer with random weights.\"\"\"\n", + " self.input_size = input_size\n", + " self.output_size = output_size\n", + " self.use_bias = use_bias\n", + " \n", + " # Initialize weights with Xavier/Glorot initialization\n", + " # This helps with gradient flow during training\n", + " limit = math.sqrt(6.0 / (input_size + output_size))\n", + " self.weights = Tensor(\n", + " np.random.uniform(-limit, limit, (input_size, output_size)).astype(np.float32)\n", + " )\n", + " \n", + " # Initialize bias to zeros\n", + " if use_bias:\n", + " self.bias = Tensor(np.zeros(output_size, dtype=np.float32))\n", + " else:\n", + " self.bias = None\n", + " \n", + " def forward(self, x: Tensor) -> Tensor:\n", + " \"\"\"Forward pass: y = Wx + b\"\"\"\n", + " # Matrix multiplication: x @ weights\n", + " # x shape: (batch_size, input_size)\n", + " # weights shape: (input_size, output_size)\n", + " # result shape: (batch_size, output_size)\n", + " output = Tensor(x.data @ self.weights.data)\n", + " \n", + " # Add bias if present\n", + " if self.bias is not None:\n", + " output = Tensor(output.data + self.bias.data)\n", + " \n", + " return output\n", + " \n", + " def __call__(self, x: Tensor) -> Tensor:\n", + " \"\"\"Make layer callable: layer(x) same as layer.forward(x)\"\"\"\n", + " return self.forward(x)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "### \ud83e\uddea Test Your Dense Layer\n", + "\n", + "Once you implement the Dense layer above, run this cell to test it:\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test the Dense layer\n", + "try:\n", + " print(\"=== Testing Dense Layer ===\")\n", + " \n", + " # Create a simple Dense layer: 3 inputs \u2192 2 outputs\n", + " layer = Dense(input_size=3, output_size=2)\n", + " print(f\"Created Dense layer: {layer.input_size} \u2192 {layer.output_size}\")\n", + " print(f\"Weights shape: {layer.weights.shape}\")\n", + " print(f\"Bias shape: {layer.bias.shape if layer.bias else 'No bias'}\")\n", + " \n", + " # Test with a single example\n", + " x = Tensor([[1.0, 2.0, 3.0]]) # Shape: (1, 3)\n", + " y = layer(x)\n", + " print(f\"Input shape: {x.shape}\")\n", + " print(f\"Output shape: {y.shape}\")\n", + " print(f\"Input: {x.data}\")\n", + " print(f\"Output: {y.data}\")\n", + " \n", + " # Test with batch\n", + " x_batch = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) # Shape: (2, 3)\n", + " y_batch = layer(x_batch)\n", + " print(f\"\\nBatch input shape: {x_batch.shape}\")\n", + " print(f\"Batch output shape: {y_batch.shape}\")\n", + " \n", + " print(\"\u2705 Dense layer working!\")\n", + " \n", + "except Exception as e:\n", + " print(f\"\u274c Error: {e}\")\n", + " print(\"Make sure to implement the Dense layer above!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "## Step 2: Activation Functions - Adding Nonlinearity\n", + "\n", + "Now we'll use the activation functions from the **activations** module! \n", + "\n", + "**Clean Architecture**: We import the activation functions rather than redefining them:\n", + "```python\n", + "from tinytorch.core.activations import ReLU, Sigmoid, Tanh\n", + "```\n", + "\n", + "**Why this matters**:\n", + "- **Separation of concerns**: Math functions vs. layer building blocks\n", + "- **Reusability**: Activations can be used anywhere in the system\n", + "- **Maintainability**: One place to update activation implementations\n", + "- **Composability**: Clean imports make neural networks easier to build\n", + "\n", + "**Why nonlinearity matters**: Without it, stacking layers is pointless!\n", + "```\n", + "Linear \u2192 Linear \u2192 Linear = Just one big Linear transformation\n", + "Linear \u2192 NonLinear \u2192 Linear = Can learn complex patterns\n", + "```\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "### \ud83e\uddea Test Activation Functions from Activations Module\n", + "\n", + "Let's test that we can use the activation functions from the activations module:\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test activation functions from activations module\n", + "try:\n", + " print(\"=== Testing Activation Functions from Activations Module ===\")\n", + " \n", + " # Test data: mix of positive, negative, and zero\n", + " x = Tensor([[-2.0, -1.0, 0.0, 1.0, 2.0]])\n", + " print(f\"Input: {x.data}\")\n", + " \n", + " # Test ReLU from activations module\n", + " relu = ReLU()\n", + " y_relu = relu(x)\n", + " print(f\"ReLU output: {y_relu.data}\")\n", + " \n", + " # Test Sigmoid from activations module\n", + " sigmoid = Sigmoid()\n", + " y_sigmoid = sigmoid(x)\n", + " print(f\"Sigmoid output: {y_sigmoid.data}\")\n", + " \n", + " # Test Tanh from activations module\n", + " tanh = Tanh()\n", + " y_tanh = tanh(x)\n", + " print(f\"Tanh output: {y_tanh.data}\")\n", + " \n", + " print(\"\u2705 Activation functions from activations module working!\")\n", + " print(\"\ud83c\udf89 Clean architecture: layers module uses activations module!\")\n", + " \n", + "except Exception as e:\n", + " print(f\"\u274c Error: {e}\")\n", + " print(\"Make sure the activations module is properly exported!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "## Step 3: Layer Composition - Building Neural Networks\n", + "\n", + "Now comes the magic! We can **compose** layers to build neural networks:\n", + "\n", + "```\n", + "Input \u2192 Dense \u2192 ReLU \u2192 Dense \u2192 Sigmoid \u2192 Output\n", + "```\n", + "\n", + "This is a 2-layer neural network that can learn complex nonlinear patterns!\n", + "\n", + "**Notice the clean architecture**:\n", + "- Dense layers handle linear transformations\n", + "- Activation functions (from activations module) handle nonlinearity\n", + "- Composition creates complex behaviors from simple building blocks\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Build a simple 2-layer neural network\n", + "try:\n", + " print(\"=== Building a 2-Layer Neural Network ===\")\n", + " \n", + " # Network architecture: 3 \u2192 4 \u2192 2\n", + " # Input: 3 features\n", + " # Hidden: 4 neurons with ReLU\n", + " # Output: 2 neurons with Sigmoid\n", + " \n", + " layer1 = Dense(input_size=3, output_size=4)\n", + " activation1 = ReLU() # From activations module\n", + " layer2 = Dense(input_size=4, output_size=2)\n", + " activation2 = Sigmoid() # From activations module\n", + " \n", + " print(\"Network architecture:\")\n", + " print(f\" Input: 3 features\")\n", + " print(f\" Hidden: {layer1.input_size} \u2192 {layer1.output_size} (Dense + ReLU)\")\n", + " print(f\" Output: {layer2.input_size} \u2192 {layer2.output_size} (Dense + Sigmoid)\")\n", + " \n", + " # Test with sample data\n", + " x = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) # 2 examples, 3 features each\n", + " print(f\"\\nInput shape: {x.shape}\")\n", + " print(f\"Input data: {x.data}\")\n", + " \n", + " # Forward pass through the network\n", + " h1 = layer1(x) # Dense layer 1\n", + " h1_activated = activation1(h1) # ReLU activation\n", + " h2 = layer2(h1_activated) # Dense layer 2 \n", + " output = activation2(h2) # Sigmoid activation\n", + " \n", + " print(f\"\\nAfter layer 1: {h1.shape}\")\n", + " print(f\"After ReLU: {h1_activated.shape}\")\n", + " print(f\"After layer 2: {h2.shape}\")\n", + " print(f\"Final output: {output.shape}\")\n", + " print(f\"Output values: {output.data}\")\n", + " \n", + " print(\"\\n\ud83c\udf89 Neural network working! You just built your first neural network!\")\n", + " print(\"\ud83c\udfd7\ufe0f Clean architecture: Dense layers + Activations module = Neural Network\")\n", + " print(\"Notice how the network transforms 3D input into 2D output through learned transformations.\")\n", + " \n", + "except Exception as e:\n", + " print(f\"\u274c Error: {e}\")\n", + " print(\"Make sure to implement the layers and check activations module!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "## Step 4: Understanding What We Built\n", + "\n", + "Congratulations! You just implemented a clean, modular neural network architecture:\n", + "\n", + "### \ud83e\uddf1 **What You Built**\n", + "1. **Dense Layer**: Linear transformation `y = Wx + b`\n", + "2. **Activation Functions**: Imported from activations module (ReLU, Sigmoid, Tanh)\n", + "3. **Layer Composition**: Chaining layers to build networks\n", + "\n", + "### \ud83c\udfd7\ufe0f **Clean Architecture Benefits**\n", + "- **Separation of concerns**: Math functions vs. layer building blocks\n", + "- **Reusability**: Activations can be used across different modules\n", + "- **Maintainability**: One place to update activation implementations\n", + "- **Composability**: Clean imports make complex networks easier to build\n", + "\n", + "### \ud83c\udfaf **Key Insights**\n", + "- **Layers are functions**: They transform tensors from one space to another\n", + "- **Composition creates complexity**: Simple layers \u2192 complex networks\n", + "- **Nonlinearity is crucial**: Without it, deep networks are just linear transformations\n", + "- **Neural networks are function approximators**: They learn to map inputs to outputs\n", + "- **Modular design**: Building blocks can be combined in many ways\n", + "\n", + "### \ud83d\ude80 **What's Next**\n", + "In the next modules, you'll learn:\n", + "- **Training**: How networks learn from data (backpropagation, optimizers)\n", + "- **Architectures**: Specialized layers for different problems (CNNs, RNNs)\n", + "- **Applications**: Using networks for real problems\n", + "\n", + "### \ud83d\udd27 **Export to Package**\n", + "Run this to export your layers to the TinyTorch package:\n", + "```bash\n", + "python bin/tito.py sync\n", + "```\n", + "\n", + "Then test your implementation:\n", + "```bash\n", + "python bin/tito.py test --module layers\n", + "```\n", + "\n", + "**Great job! You've built a clean, modular foundation for neural networks!** \ud83c\udf89\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Final demonstration: A more complex example\n", + "try:\n", + " print(\"=== Final Demo: Image Classification Network ===\")\n", + " \n", + " # Simulate a small image: 28x28 pixels flattened to 784 features\n", + " # This is like a tiny MNIST digit\n", + " image_size = 28 * 28 # 784 pixels\n", + " num_classes = 10 # 10 digits (0-9)\n", + " \n", + " # Build a 3-layer network for digit classification\n", + " # 784 \u2192 128 \u2192 64 \u2192 10\n", + " layer1 = Dense(input_size=image_size, output_size=128)\n", + " relu1 = ReLU() # From activations module\n", + " layer2 = Dense(input_size=128, output_size=64)\n", + " relu2 = ReLU() # From activations module\n", + " layer3 = Dense(input_size=64, output_size=num_classes)\n", + " softmax = Sigmoid() # Using Sigmoid as a simple \"probability-like\" output\n", + " \n", + " print(f\"Image classification network:\")\n", + " print(f\" Input: {image_size} pixels (28x28 image)\")\n", + " print(f\" Hidden 1: {layer1.input_size} \u2192 {layer1.output_size} (Dense + ReLU)\")\n", + " print(f\" Hidden 2: {layer2.input_size} \u2192 {layer2.output_size} (Dense + ReLU)\")\n", + " print(f\" Output: {layer3.input_size} \u2192 {layer3.output_size} (Dense + Sigmoid)\")\n", + " \n", + " # Simulate a batch of 5 images\n", + " batch_size = 5\n", + " fake_images = Tensor(np.random.randn(batch_size, image_size).astype(np.float32))\n", + " \n", + " # Forward pass\n", + " h1 = relu1(layer1(fake_images))\n", + " h2 = relu2(layer2(h1))\n", + " predictions = softmax(layer3(h2))\n", + " \n", + " print(f\"\\nBatch processing:\")\n", + " print(f\" Input batch shape: {fake_images.shape}\")\n", + " print(f\" Predictions shape: {predictions.shape}\")\n", + " print(f\" Sample predictions: {predictions.data[0]}\") # First image predictions\n", + " \n", + " print(\"\\n\ud83c\udf89 You built a neural network that could classify images!\")\n", + " print(\"\ud83c\udfd7\ufe0f Clean architecture: Dense layers + Activations module = Image Classifier\")\n", + " print(\"With training, this network could learn to recognize handwritten digits!\")\n", + " \n", + "except Exception as e:\n", + " print(f\"\u274c Error: {e}\")\n", + " print(\"Check your layer implementations and activations module!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "## \ud83c\udf93 Module Summary\n", + "\n", + "### What You Learned\n", + "1. **Layer Architecture**: Dense layers as linear transformations\n", + "2. **Clean Dependencies**: Layers module uses activations module\n", + "3. **Function Composition**: Simple building blocks \u2192 complex networks\n", + "4. **Modular Design**: Separation of concerns for maintainable code\n", + "\n", + "### Key Architectural Insight\n", + "```\n", + "activations (math functions) \u2192 layers (building blocks) \u2192 networks (applications)\n", + "```\n", + "\n", + "This clean dependency graph makes the system:\n", + "- **Understandable**: Each module has a clear purpose\n", + "- **Testable**: Each module can be tested independently\n", + "- **Reusable**: Components can be used across different contexts\n", + "- **Maintainable**: Changes are localized to appropriate modules\n", + "\n", + "### Next Steps\n", + "- **Training**: Learn how networks learn from data\n", + "- **Advanced Architectures**: CNNs, RNNs, Transformers\n", + "- **Applications**: Real-world machine learning problems\n", + "\n", + "**Congratulations on building a clean, modular neural network foundation!** \ud83d\ude80\n", + "\"\"\"" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.8.0" + } }, - { - "cell_type": "code", - "execution_count": null, - "id": "9d285d84", - "metadata": {}, - "outputs": [], - "source": [ - "#| default_exp core.layers\n", - "\n", - "# Setup and imports\n", - "import numpy as np\n", - "import sys\n", - "from typing import Union, Optional, Callable\n", - "import math" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a12b7f36", - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "import numpy as np\n", - "import math\n", - "import sys\n", - "from typing import Union, Optional, Callable\n", - "from tinytorch.core.tensor import Tensor\n", - "\n", - "# Import our Tensor class\n", - "# sys.path.append('../../')\n", - "# from modules.tensor.tensor_dev import Tensor\n", - "\n", - "# print(\"πŸ”₯ TinyTorch Layers Module\")\n", - "# print(f\"NumPy version: {np.__version__}\")\n", - "# print(f\"Python version: {sys.version_info.major}.{sys.version_info.minor}\")\n", - "# print(\"Ready to build neural network layers!\")" - ] - }, - { - "cell_type": "markdown", - "id": "1b8b760c", - "metadata": { - "cell_marker": "\"\"\"", - "lines_to_next_cell": 1 - }, - "source": [ - "## Step 1: What is a Layer?\n", - "\n", - "A **layer** is a function that transforms tensors. Think of it as:\n", - "- **Input**: Tensor with some shape\n", - "- **Transformation**: Mathematical operation (linear, nonlinear, etc.)\n", - "- **Output**: Tensor with possibly different shape\n", - "\n", - "**The fundamental insight**: Neural networks are just function composition!\n", - "```\n", - "x β†’ Layer1 β†’ Layer2 β†’ Layer3 β†’ y\n", - "```\n", - "\n", - "**Why layers matter**:\n", - "- They're the building blocks of all neural networks\n", - "- Each layer learns a different transformation\n", - "- Composing layers creates complex functions\n", - "- Understanding layers = understanding neural networks\n", - "\n", - "Let's start with the most important layer: **Dense** (also called Linear or Fully Connected)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fabf403c", - "metadata": { - "lines_to_next_cell": 1 - }, - "outputs": [], - "source": [ - "#| export\n", - "class Dense:\n", - " \"\"\"\n", - " Dense (Linear) Layer: y = Wx + b\n", - " \n", - " The fundamental building block of neural networks.\n", - " Performs linear transformation: matrix multiplication + bias addition.\n", - " \n", - " Args:\n", - " input_size: Number of input features\n", - " output_size: Number of output features\n", - " use_bias: Whether to include bias term (default: True)\n", - " \n", - " TODO: Implement the Dense layer with weight initialization and forward pass.\n", - " \"\"\"\n", - " \n", - " def __init__(self, input_size: int, output_size: int, use_bias: bool = True):\n", - " \"\"\"\n", - " Initialize Dense layer with random weights.\n", - " \n", - " TODO: \n", - " 1. Store layer parameters (input_size, output_size, use_bias)\n", - " 2. Initialize weights with small random values\n", - " 3. Initialize bias to zeros (if use_bias=True)\n", - " \"\"\"\n", - " raise NotImplementedError(\"Student implementation required\")\n", - " \n", - " def forward(self, x: Tensor) -> Tensor:\n", - " \"\"\"\n", - " Forward pass: y = Wx + b\n", - " \n", - " Args:\n", - " x: Input tensor of shape (batch_size, input_size)\n", - " \n", - " Returns:\n", - " Output tensor of shape (batch_size, output_size)\n", - " \n", - " TODO: Implement matrix multiplication and bias addition\n", - " \"\"\"\n", - " raise NotImplementedError(\"Student implementation required\")\n", - " \n", - " def __call__(self, x: Tensor) -> Tensor:\n", - " \"\"\"Make layer callable: layer(x) same as layer.forward(x)\"\"\"\n", - " return self.forward(x)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "718aafe5", - "metadata": { - "lines_to_next_cell": 1 - }, - "outputs": [], - "source": [ - "#| hide\n", - "#| export\n", - "class Dense:\n", - " \"\"\"\n", - " Dense (Linear) Layer: y = Wx + b\n", - " \n", - " The fundamental building block of neural networks.\n", - " Performs linear transformation: matrix multiplication + bias addition.\n", - " \"\"\"\n", - " \n", - " def __init__(self, input_size: int, output_size: int, use_bias: bool = True):\n", - " \"\"\"Initialize Dense layer with random weights.\"\"\"\n", - " self.input_size = input_size\n", - " self.output_size = output_size\n", - " self.use_bias = use_bias\n", - " \n", - " # Initialize weights with Xavier/Glorot initialization\n", - " # This helps with gradient flow during training\n", - " limit = math.sqrt(6.0 / (input_size + output_size))\n", - " self.weights = Tensor(\n", - " np.random.uniform(-limit, limit, (input_size, output_size)).astype(np.float32)\n", - " )\n", - " \n", - " # Initialize bias to zeros\n", - " if use_bias:\n", - " self.bias = Tensor(np.zeros(output_size, dtype=np.float32))\n", - " else:\n", - " self.bias = None\n", - " \n", - " def forward(self, x: Tensor) -> Tensor:\n", - " \"\"\"Forward pass: y = Wx + b\"\"\"\n", - " # Matrix multiplication: x @ weights\n", - " # x shape: (batch_size, input_size)\n", - " # weights shape: (input_size, output_size)\n", - " # result shape: (batch_size, output_size)\n", - " output = Tensor(x.data @ self.weights.data)\n", - " \n", - " # Add bias if present\n", - " if self.bias is not None:\n", - " output = Tensor(output.data + self.bias.data)\n", - " \n", - " return output\n", - " \n", - " def __call__(self, x: Tensor) -> Tensor:\n", - " \"\"\"Make layer callable: layer(x) same as layer.forward(x)\"\"\"\n", - " return self.forward(x)" - ] - }, - { - "cell_type": "markdown", - "id": "54390574", - "metadata": { - "cell_marker": "\"\"\"" - }, - "source": [ - "### πŸ§ͺ Test Your Dense Layer\n", - "\n", - "Once you implement the Dense layer above, run this cell to test it:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c24b9bc7", - "metadata": {}, - "outputs": [], - "source": [ - "# Test the Dense layer\n", - "try:\n", - " print(\"=== Testing Dense Layer ===\")\n", - " \n", - " # Create a simple Dense layer: 3 inputs β†’ 2 outputs\n", - " layer = Dense(input_size=3, output_size=2)\n", - " print(f\"Created Dense layer: {layer.input_size} β†’ {layer.output_size}\")\n", - " print(f\"Weights shape: {layer.weights.shape}\")\n", - " print(f\"Bias shape: {layer.bias.shape if layer.bias else 'No bias'}\")\n", - " \n", - " # Test with a single example\n", - " x = Tensor([[1.0, 2.0, 3.0]]) # Shape: (1, 3)\n", - " y = layer(x)\n", - " print(f\"Input shape: {x.shape}\")\n", - " print(f\"Output shape: {y.shape}\")\n", - " print(f\"Input: {x.data}\")\n", - " print(f\"Output: {y.data}\")\n", - " \n", - " # Test with batch of examples\n", - " x_batch = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) # Shape: (2, 3)\n", - " y_batch = layer(x_batch)\n", - " print(f\"\\nBatch input shape: {x_batch.shape}\")\n", - " print(f\"Batch output shape: {y_batch.shape}\")\n", - " print(f\"Batch output: {y_batch.data}\")\n", - " \n", - " print(\"βœ… Dense layer working!\")\n", - " \n", - "except Exception as e:\n", - " print(f\"❌ Error: {e}\")\n", - " print(\"Make sure to implement the Dense layer above!\")" - ] - }, - { - "cell_type": "markdown", - "id": "50ccc78d", - "metadata": { - "cell_marker": "\"\"\"", - "lines_to_next_cell": 1 - }, - "source": [ - "## Step 2: Activation Functions\n", - "\n", - "Dense layers alone can only learn **linear** transformations. But most real-world problems need **nonlinear** transformations.\n", - "\n", - "**Activation functions** add nonlinearity:\n", - "- **ReLU**: `max(0, x)` - Most common, simple and effective\n", - "- **Sigmoid**: `1 / (1 + e^(-x))` - Squashes to (0, 1)\n", - "- **Tanh**: `tanh(x)` - Squashes to (-1, 1)\n", - "\n", - "**Why nonlinearity matters**: Without it, stacking layers is pointless!\n", - "```\n", - "Linear β†’ Linear β†’ Linear = Just one big Linear transformation\n", - "Linear β†’ NonLinear β†’ Linear = Can learn complex patterns\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "85818dc3", - "metadata": { - "lines_to_next_cell": 1 - }, - "outputs": [], - "source": [ - "#| export\n", - "class ReLU:\n", - " \"\"\"\n", - " ReLU Activation: f(x) = max(0, x)\n", - " \n", - " The most popular activation function in deep learning.\n", - " Simple, effective, and computationally efficient.\n", - " \n", - " TODO: Implement ReLU activation function.\n", - " \"\"\"\n", - " \n", - " def forward(self, x: Tensor) -> Tensor:\n", - " \"\"\"\n", - " Apply ReLU: f(x) = max(0, x)\n", - " \n", - " Args:\n", - " x: Input tensor\n", - " \n", - " Returns:\n", - " Output tensor with ReLU applied element-wise\n", - " \n", - " TODO: Implement element-wise max(0, x) operation\n", - " \"\"\"\n", - " raise NotImplementedError(\"Student implementation required\")\n", - " \n", - " def __call__(self, x: Tensor) -> Tensor:\n", - " \"\"\"Make activation callable: relu(x) same as relu.forward(x)\"\"\"\n", - " return self.forward(x)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "23e807f1", - "metadata": { - "lines_to_next_cell": 1 - }, - "outputs": [], - "source": [ - "#| hide\n", - "#| export\n", - "class ReLU:\n", - " \"\"\"ReLU Activation: f(x) = max(0, x)\"\"\"\n", - " \n", - " def forward(self, x: Tensor) -> Tensor:\n", - " \"\"\"Apply ReLU: f(x) = max(0, x)\"\"\"\n", - " return Tensor(np.maximum(0, x.data))\n", - " \n", - " def __call__(self, x: Tensor) -> Tensor:\n", - " return self.forward(x)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3c0bb26a", - "metadata": { - "lines_to_next_cell": 1 - }, - "outputs": [], - "source": [ - "#| export\n", - "class Sigmoid:\n", - " \"\"\"\n", - " Sigmoid Activation: f(x) = 1 / (1 + e^(-x))\n", - " \n", - " Squashes input to range (0, 1). Often used for binary classification.\n", - " \n", - " TODO: Implement Sigmoid activation function.\n", - " \"\"\"\n", - " \n", - " def forward(self, x: Tensor) -> Tensor:\n", - " \"\"\"\n", - " Apply Sigmoid: f(x) = 1 / (1 + e^(-x))\n", - " \n", - " Args:\n", - " x: Input tensor\n", - " \n", - " Returns:\n", - " Output tensor with Sigmoid applied element-wise\n", - " \n", - " TODO: Implement sigmoid function (be careful with numerical stability!)\n", - " \"\"\"\n", - " raise NotImplementedError(\"Student implementation required\")\n", - " \n", - " def __call__(self, x: Tensor) -> Tensor:\n", - " return self.forward(x)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "972e9668", - "metadata": { - "lines_to_next_cell": 1 - }, - "outputs": [], - "source": [ - "#| hide\n", - "#| export\n", - "class Sigmoid:\n", - " \"\"\"Sigmoid Activation: f(x) = 1 / (1 + e^(-x))\"\"\"\n", - " \n", - " def forward(self, x: Tensor) -> Tensor:\n", - " \"\"\"Apply Sigmoid with numerical stability\"\"\"\n", - " # Use the numerically stable version to avoid overflow\n", - " # For x >= 0: sigmoid(x) = 1 / (1 + exp(-x))\n", - " # For x < 0: sigmoid(x) = exp(x) / (1 + exp(x))\n", - " x_data = x.data\n", - " result = np.zeros_like(x_data)\n", - " \n", - " # Stable computation\n", - " positive_mask = x_data >= 0\n", - " result[positive_mask] = 1.0 / (1.0 + np.exp(-x_data[positive_mask]))\n", - " result[~positive_mask] = np.exp(x_data[~positive_mask]) / (1.0 + np.exp(x_data[~positive_mask]))\n", - " \n", - " return Tensor(result)\n", - " \n", - " def __call__(self, x: Tensor) -> Tensor:\n", - " return self.forward(x)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2babe8a8", - "metadata": { - "lines_to_next_cell": 1 - }, - "outputs": [], - "source": [ - "#| export\n", - "class Tanh:\n", - " \"\"\"\n", - " Tanh Activation: f(x) = tanh(x)\n", - " \n", - " Squashes input to range (-1, 1). Zero-centered output.\n", - " \n", - " TODO: Implement Tanh activation function.\n", - " \"\"\"\n", - " \n", - " def forward(self, x: Tensor) -> Tensor:\n", - " \"\"\"\n", - " Apply Tanh: f(x) = tanh(x)\n", - " \n", - " Args:\n", - " x: Input tensor\n", - " \n", - " Returns:\n", - " Output tensor with Tanh applied element-wise\n", - " \n", - " TODO: Implement tanh function\n", - " \"\"\"\n", - " raise NotImplementedError(\"Student implementation required\")\n", - " \n", - " def __call__(self, x: Tensor) -> Tensor:\n", - " return self.forward(x)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5eff4e44", - "metadata": { - "lines_to_next_cell": 1 - }, - "outputs": [], - "source": [ - "#| hide\n", - "#| export\n", - "class Tanh:\n", - " \"\"\"Tanh Activation: f(x) = tanh(x)\"\"\"\n", - " \n", - " def forward(self, x: Tensor) -> Tensor:\n", - " \"\"\"Apply Tanh\"\"\"\n", - " return Tensor(np.tanh(x.data))\n", - " \n", - " def __call__(self, x: Tensor) -> Tensor:\n", - " return self.forward(x)" - ] - }, - { - "cell_type": "markdown", - "id": "c39e4420", - "metadata": { - "cell_marker": "\"\"\"" - }, - "source": [ - "### πŸ§ͺ Test Your Activation Functions\n", - "\n", - "Once you implement the activation functions above, run this cell to test them:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f73687cc", - "metadata": {}, - "outputs": [], - "source": [ - "# Test activation functions\n", - "try:\n", - " print(\"=== Testing Activation Functions ===\")\n", - " \n", - " # Test data: mix of positive, negative, and zero\n", - " x = Tensor([[-2.0, -1.0, 0.0, 1.0, 2.0]])\n", - " print(f\"Input: {x.data}\")\n", - " \n", - " # Test ReLU\n", - " relu = ReLU()\n", - " y_relu = relu(x)\n", - " print(f\"ReLU output: {y_relu.data}\")\n", - " \n", - " # Test Sigmoid\n", - " sigmoid = Sigmoid()\n", - " y_sigmoid = sigmoid(x)\n", - " print(f\"Sigmoid output: {y_sigmoid.data}\")\n", - " \n", - " # Test Tanh\n", - " tanh = Tanh()\n", - " y_tanh = tanh(x)\n", - " print(f\"Tanh output: {y_tanh.data}\")\n", - " \n", - " print(\"βœ… Activation functions working!\")\n", - " \n", - "except Exception as e:\n", - " print(f\"❌ Error: {e}\")\n", - " print(\"Make sure to implement the activation functions above!\")" - ] - }, - { - "cell_type": "markdown", - "id": "ec82e933", - "metadata": { - "cell_marker": "\"\"\"" - }, - "source": [ - "## Step 3: Layer Composition - Building Neural Networks\n", - "\n", - "Now comes the magic! We can **compose** layers to build neural networks:\n", - "\n", - "```\n", - "Input β†’ Dense β†’ ReLU β†’ Dense β†’ Sigmoid β†’ Output\n", - "```\n", - "\n", - "This is a 2-layer neural network that can learn complex nonlinear patterns!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "06c5692f", - "metadata": {}, - "outputs": [], - "source": [ - "# Build a simple 2-layer neural network\n", - "try:\n", - " print(\"=== Building a 2-Layer Neural Network ===\")\n", - " \n", - " # Network architecture: 3 β†’ 4 β†’ 2\n", - " # Input: 3 features\n", - " # Hidden: 4 neurons with ReLU\n", - " # Output: 2 neurons with Sigmoid\n", - " \n", - " layer1 = Dense(input_size=3, output_size=4)\n", - " activation1 = ReLU()\n", - " layer2 = Dense(input_size=4, output_size=2)\n", - " activation2 = Sigmoid()\n", - " \n", - " print(\"Network architecture:\")\n", - " print(f\" Input: 3 features\")\n", - " print(f\" Hidden: {layer1.input_size} β†’ {layer1.output_size} (Dense + ReLU)\")\n", - " print(f\" Output: {layer2.input_size} β†’ {layer2.output_size} (Dense + Sigmoid)\")\n", - " \n", - " # Test with sample data\n", - " x = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) # 2 examples, 3 features each\n", - " print(f\"\\nInput shape: {x.shape}\")\n", - " print(f\"Input data: {x.data}\")\n", - " \n", - " # Forward pass through the network\n", - " h1 = layer1(x) # Dense layer 1\n", - " h1_activated = activation1(h1) # ReLU activation\n", - " h2 = layer2(h1_activated) # Dense layer 2 \n", - " output = activation2(h2) # Sigmoid activation\n", - " \n", - " print(f\"\\nAfter layer 1: {h1.shape}\")\n", - " print(f\"After ReLU: {h1_activated.shape}\")\n", - " print(f\"After layer 2: {h2.shape}\")\n", - " print(f\"Final output: {output.shape}\")\n", - " print(f\"Output values: {output.data}\")\n", - " \n", - " print(\"\\nπŸŽ‰ Neural network working! You just built your first neural network!\")\n", - " print(\"Notice how the network transforms 3D input into 2D output through learned transformations.\")\n", - " \n", - "except Exception as e:\n", - " print(f\"❌ Error: {e}\")\n", - " print(\"Make sure to implement the layers and activations above!\")" - ] - }, - { - "cell_type": "markdown", - "id": "13dc6d9a", - "metadata": { - "cell_marker": "\"\"\"" - }, - "source": [ - "## Step 4: Understanding What We Built\n", - "\n", - "Congratulations! You just implemented the fundamental building blocks of neural networks:\n", - "\n", - "### 🧱 **What You Built**\n", - "1. **Dense Layer**: Linear transformation `y = Wx + b`\n", - "2. **Activation Functions**: Nonlinear transformations (ReLU, Sigmoid, Tanh)\n", - "3. **Layer Composition**: Chaining layers to build networks\n", - "\n", - "### 🎯 **Key Insights**\n", - "- **Layers are functions**: They transform tensors from one space to another\n", - "- **Composition creates complexity**: Simple layers β†’ complex networks\n", - "- **Nonlinearity is crucial**: Without it, deep networks are just linear transformations\n", - "- **Neural networks are function approximators**: They learn to map inputs to outputs\n", - "\n", - "### πŸš€ **What's Next**\n", - "In the next modules, you'll learn:\n", - "- **Training**: How networks learn from data (backpropagation, optimizers)\n", - "- **Architectures**: Specialized layers for different problems (CNNs, RNNs)\n", - "- **Applications**: Using networks for real problems\n", - "\n", - "### πŸ”§ **Export to Package**\n", - "Run this to export your layers to the TinyTorch package:\n", - "```bash\n", - "python bin/tito.py sync\n", - "```\n", - "\n", - "Then test your implementation:\n", - "```bash\n", - "python bin/tito.py test --module layers\n", - "```\n", - "\n", - "**Great job! You've built the foundation of neural networks!** πŸŽ‰" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a54d8ce9", - "metadata": {}, - "outputs": [], - "source": [ - "# Final demonstration: A more complex example\n", - "try:\n", - " print(\"=== Final Demo: Image Classification Network ===\")\n", - " \n", - " # Simulate a small image: 28x28 pixels flattened to 784 features\n", - " # This is like a tiny MNIST digit\n", - " image_size = 28 * 28 # 784 pixels\n", - " num_classes = 10 # 10 digits (0-9)\n", - " \n", - " # Build a 3-layer network for digit classification\n", - " # 784 β†’ 128 β†’ 64 β†’ 10\n", - " layer1 = Dense(input_size=image_size, output_size=128)\n", - " relu1 = ReLU()\n", - " layer2 = Dense(input_size=128, output_size=64)\n", - " relu2 = ReLU()\n", - " layer3 = Dense(input_size=64, output_size=num_classes)\n", - " softmax = Sigmoid() # Using Sigmoid as a simple \"probability-like\" output\n", - " \n", - " print(f\"Image classification network:\")\n", - " print(f\" Input: {image_size} pixels (28x28 image)\")\n", - " print(f\" Hidden 1: {layer1.input_size} β†’ {layer1.output_size} (Dense + ReLU)\")\n", - " print(f\" Hidden 2: {layer2.input_size} β†’ {layer2.output_size} (Dense + ReLU)\")\n", - " print(f\" Output: {layer3.input_size} β†’ {layer3.output_size} (Dense + Sigmoid)\")\n", - " \n", - " # Simulate a batch of 5 images\n", - " batch_size = 5\n", - " fake_images = Tensor(np.random.randn(batch_size, image_size).astype(np.float32))\n", - " \n", - " # Forward pass\n", - " h1 = relu1(layer1(fake_images))\n", - " h2 = relu2(layer2(h1))\n", - " predictions = softmax(layer3(h2))\n", - " \n", - " print(f\"\\nBatch processing:\")\n", - " print(f\" Input batch shape: {fake_images.shape}\")\n", - " print(f\" Predictions shape: {predictions.shape}\")\n", - " print(f\" Sample predictions: {predictions.data[0]}\") # First image predictions\n", - " \n", - " print(\"\\nπŸŽ‰ You built a neural network that could classify images!\")\n", - " print(\"With training, this network could learn to recognize handwritten digits!\")\n", - " \n", - "except Exception as e:\n", - " print(f\"❌ Error: {e}\")\n", - " print(\"Check your layer implementations!\") " - ] - } - ], - "metadata": { - "jupytext": { - "main_language": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/modules/nbdev.yml b/modules/nbdev.yml new file mode 100644 index 00000000..9e72b3f1 --- /dev/null +++ b/modules/nbdev.yml @@ -0,0 +1,9 @@ +project: + output-dir: _docs + +website: + title: "TinyTorch" + site-url: "https://tinytorch.github.io/TinyTorch/" + description: "Build ML Systems from Scratch - A hands-on systems course" + repo-branch: main + repo-url: "https://github.com/tinytorch/TinyTorch/" diff --git a/modules/setup/setup_dev.ipynb b/modules/setup/setup_dev.ipynb index 42b82868..284bd58d 100644 --- a/modules/setup/setup_dev.ipynb +++ b/modules/setup/setup_dev.ipynb @@ -1,910 +1,684 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "695331d6", - "metadata": { - "cell_marker": "\"\"\"" - }, - "source": [ - "# Module 0: Setup - TinyπŸ”₯Torch Development Workflow\n", - "\n", - "Welcome to TinyTorch! This module teaches you the development workflow you'll use throughout the course.\n", - "\n", - "## Learning Goals\n", - "- Understand the nbdev notebook-to-Python workflow\n", - "- Write your first TinyTorch code\n", - "- Run tests and use the CLI tools\n", - "- Get comfortable with the development rhythm\n", - "\n", - "## The TinyTorch Development Cycle\n", - "\n", - "1. **Write code** in this notebook using `#| export` \n", - "2. **Export code** with `python bin/tito.py sync --module setup`\n", - "3. **Run tests** with `python bin/tito.py test --module setup`\n", - "4. **Check progress** with `python bin/tito.py info`\n", - "\n", - "Let's get started!" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "77e98f62", - "metadata": { - "execution": { - "iopub.execute_input": "2025-07-10T23:28:59.070182Z", - "iopub.status.busy": "2025-07-10T23:28:59.069869Z", - "iopub.status.idle": "2025-07-10T23:28:59.076126Z", - "shell.execute_reply": "2025-07-10T23:28:59.075657Z" - } - }, - "outputs": [ + "cells": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "πŸ”₯ TinyTorch Development Environment\n", - "Python 3.13.3 (v3.13.3:6280bb54784, Apr 8 2025, 10:47:54) [Clang 15.0.0 (clang-1500.3.9.4)]\n", - "Platform: Darwin 24.5.0\n", - "Started: 2025-07-10 19:28:59\n" - ] - } - ], - "source": [ - "#| default_exp core.utils\n", - "\n", - "# Setup imports and environment\n", - "import sys\n", - "import platform\n", - "from datetime import datetime\n", - "import os\n", - "from pathlib import Path\n", - "\n", - "print(\"πŸ”₯ TinyTorch Development Environment\")\n", - "print(f\"Python {sys.version}\")\n", - "print(f\"Platform: {platform.system()} {platform.release()}\")\n", - "print(f\"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")" - ] - }, - { - "cell_type": "markdown", - "id": "a1a9c143", - "metadata": { - "cell_marker": "\"\"\"", - "lines_to_next_cell": 1 - }, - "source": [ - "## Step 1: Understanding the Module β†’ Package Structure\n", - "\n", - "**πŸŽ“ Teaching vs. πŸ”§ Building**: This course has two sides:\n", - "- **Teaching side**: You work in `modules/setup/setup_dev.ipynb` (learning-focused)\n", - "- **Building side**: Your code exports to `tinytorch/core/utils.py` (production package)\n", - "\n", - "**Key Concept**: The `#| default_exp core.utils` directive at the top tells nbdev to export all `#| export` cells to `tinytorch/core/utils.py`.\n", - "\n", - "This separation allows us to:\n", - "- Organize learning by **concepts** (modules) \n", - "- Organize code by **function** (package structure)\n", - "- Build a real ML framework while learning systematically\n", - "\n", - "Let's write a simple \"Hello World\" function with the `#| export` directive:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "d41c28bc", - "metadata": { - "execution": { - "iopub.execute_input": "2025-07-10T23:28:59.078349Z", - "iopub.status.busy": "2025-07-10T23:28:59.078166Z", - "iopub.status.idle": "2025-07-10T23:28:59.080816Z", - "shell.execute_reply": "2025-07-10T23:28:59.080472Z" + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "jupyter:\n", + " jupytext:\n", + " text_representation:\n", + " extension: .py\n", + " format_name: percent\n", + " format_version: '1.3'\n", + " jupytext_version: 1.17.1\n", + "---\n" + ] }, - "lines_to_next_cell": 1 - }, - "outputs": [], - "source": [ - "#| export\n", - "def hello_tinytorch():\n", - " \"\"\"\n", - " A simple hello world function for TinyTorch.\n", - " \n", - " TODO: Implement this function to display TinyTorch ASCII art and welcome message.\n", - " Load the flame art from tinytorch_flame.txt file with graceful fallback.\n", - " \"\"\"\n", - " raise NotImplementedError(\"Student implementation required\")\n", - "\n", - "def add_numbers(a, b):\n", - " \"\"\"\n", - " Add two numbers together.\n", - " \n", - " TODO: Implement addition of two numbers.\n", - " This is the foundation of all mathematical operations in ML.\n", - " \"\"\"\n", - " raise NotImplementedError(\"Student implementation required\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "0a8001ec", - "metadata": { - "execution": { - "iopub.execute_input": "2025-07-10T23:28:59.082743Z", - "iopub.status.busy": "2025-07-10T23:28:59.082583Z", - "iopub.status.idle": "2025-07-10T23:28:59.086912Z", - "shell.execute_reply": "2025-07-10T23:28:59.086623Z" - }, - "lines_to_next_cell": 1 - }, - "outputs": [], - "source": [ - "#| hide\n", - "#| export\n", - "def hello_tinytorch():\n", - " \"\"\"Display the TinyTorch ASCII art and welcome message.\"\"\"\n", - " try:\n", - " # Get the directory containing this file\n", - " current_dir = Path(__file__).parent\n", - " art_file = current_dir / \"tinytorch_flame.txt\"\n", - " \n", - " if art_file.exists():\n", - " with open(art_file, 'r') as f:\n", - " ascii_art = f.read()\n", - " print(ascii_art)\n", - " print(\"TinyπŸ”₯Torch\")\n", - " print(\"Build ML Systems from Scratch!\")\n", - " else:\n", - " print(\"πŸ”₯ TinyTorch πŸ”₯\")\n", - " print(\"Build ML Systems from Scratch!\")\n", - " except NameError:\n", - " # Handle case when running in notebook where __file__ is not defined\n", - " try:\n", - " art_file = Path(os.getcwd()) / \"tinytorch_flame.txt\"\n", - " if art_file.exists():\n", - " with open(art_file, 'r') as f:\n", - " ascii_art = f.read()\n", - " print(ascii_art)\n", - " print(\"TinyπŸ”₯Torch\")\n", - " print(\"Build ML Systems from Scratch!\")\n", - " else:\n", - " print(\"πŸ”₯ TinyTorch πŸ”₯\")\n", - " print(\"Build ML Systems from Scratch!\")\n", - " except:\n", - " print(\"πŸ”₯ TinyTorch πŸ”₯\")\n", - " print(\"Build ML Systems from Scratch!\")\n", - "\n", - "def add_numbers(a, b):\n", - " \"\"\"Add two numbers together.\"\"\"\n", - " return a + b" - ] - }, - { - "cell_type": "markdown", - "id": "b28103af", - "metadata": { - "cell_marker": "\"\"\"" - }, - "source": [ - "### πŸ§ͺ Test Your Implementation\n", - "\n", - "Once you implement the functions above, run this cell to test them:" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "a1beca72", - "metadata": { - "execution": { - "iopub.execute_input": "2025-07-10T23:28:59.088616Z", - "iopub.status.busy": "2025-07-10T23:28:59.088506Z", - "iopub.status.idle": "2025-07-10T23:28:59.091981Z", - "shell.execute_reply": "2025-07-10T23:28:59.091554Z" - } - }, - "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Testing hello_tinytorch():\n", - ". . ... ....... .... ... . . .. .... . .. . . . . . .... \n", - ". . .. .++. .. . . . .. ... . . . .. ... .. \n", - " . . . .=++=.. . . . .. . . .. . ... .. . . \n", - ". .. ... .++++=. . . . . .. . .. .\n", - ". . . . ....-+++++.... ... .. . .... .. . . . . . . . . . . . . \n", - " . .. ...-++++++-...... .. . ..... ..-:.. .. . .... .. . . .. . .. . . . \n", - " .. .. ..++++++++-.. . . ..##... -%#. . . . . . \n", - ". .. .:+++++++++.... ... . ...:%%:............:-:. ..... ...... . . ....... .. . . \n", - " ..+++++++++++. ... . .. .=#%%##+.-##..#%####%%=.=%%. .*%+.. . . . ... \n", - " . ..++++++++++++...-++..... . .%%... -##..##=...=%#..*%*..=%#.. . .. ... . . . . .. . ...\n", - " ..-+++++++++++++..=++++... .....%#.. -##..#%-.. -##. .%%=.%%.. . . . . . ... .\n", - ". .=++++++++++++++-+++++++.... . ...%%:...-##..#%-. .-%#. ..#%#%=.. . .. ... . . . .\n", - "..=+++++++++++++++++++++++-. . ..=%%%+.-%#..##-. .-%#....-%%*.. . .. . .. .. .. \n", - ".:+++++++++++=+++++++++++++. . ................ .......-%%... . .. . . .. . \n", - ".++++++++++===+++++++++++++: . .................... . ...%%%#:........ . .. ..... ......... ....\n", - ":+++++++++====+++++++++++++=.. ...-----------.....-+#*=:.....-------:.......:=*#+-.. ..--:.....--=.\n", - ":++++++++======++++++++++++=.. ...#%%%%%%%%%#..-#%%###%%#=...#%####%%%=...+%%%###%%#...#%+.. ..#%%.\n", - ".+++++++========+++++++++++- .. .#%%.. ..-%%+.. ..-%%+..#%*.. .*%%..*%%:. ..#%*..#%+... .#%%.\n", - ".=++++++==========+++++++++: . .#%%.....#%#.... .*%#..#%*...-%%*..#%+. ... . ..##%#####%%%.\n", - "..++++++===========+++++++-. . ...#%%. . .#%#. . .*%#..#%%%%%%#-. .#%+. . ....#%*-----#%%.\n", - "...+++++===========++++++=. . . . .#%%... -%%+.....=%%+..#%*..+%%-. .*%%-.....#%*..%%+.. ..%%%.\n", - ". ..-+++===========+++++.. . .. ..#%%. .:%%%###%%%=...#%*...+%%=...+%%####%%#...%%+.. ..%%%.\n", - " . ...-++==========+++:.... ... . .===. ... ..-+++=.. ..-=-....-==: ..:=+++-.. ..==-... .===.\n", - " ....-+=======+-...... .. . . ... . . .. ... . . .... . . . . ..... . ... ..... .\n", - " .... . ......:..... ... . .. . ... . . ... . . . ... . . . ... .. ..... . . \n", - "\n", - "TinyπŸ”₯Torch\n", - "Build ML Systems from Scratch!\n", - "\n", - "Testing add_numbers():\n", - "2 + 3 = 5\n" - ] - } - ], - "source": [ - "# Test the functions in the notebook (will fail until implemented)\n", - "try:\n", - " print(\"Testing hello_tinytorch():\")\n", - " hello_tinytorch()\n", - " print()\n", - " print(\"Testing add_numbers():\")\n", - " print(f\"2 + 3 = {add_numbers(2, 3)}\")\n", - "except NotImplementedError as e:\n", - " print(f\"⚠️ {e}\")\n", - " print(\"Implement the functions above first!\")" - ] - }, - { - "cell_type": "markdown", - "id": "887b9723", - "metadata": { - "cell_marker": "\"\"\"", - "lines_to_next_cell": 1 - }, - "source": [ - "## Step 2: A Simple Class\n", - "\n", - "Let's create a simple class that will help us understand system information. This is still basic, but shows how to structure classes in TinyTorch." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "29b647dc", - "metadata": { - "execution": { - "iopub.execute_input": "2025-07-10T23:28:59.093735Z", - "iopub.status.busy": "2025-07-10T23:28:59.093599Z", - "iopub.status.idle": "2025-07-10T23:28:59.096084Z", - "shell.execute_reply": "2025-07-10T23:28:59.095799Z" + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "# Module 0: Setup - Tiny\ud83d\udd25Torch Development Workflow\n", + "\n", + "Welcome to TinyTorch! This module teaches you the development workflow you'll use throughout the course.\n", + "\n", + "## Learning Goals\n", + "- Understand the nbdev notebook-to-Python workflow\n", + "- Write your first TinyTorch code\n", + "- Run tests and use the CLI tools\n", + "- Get comfortable with the development rhythm\n", + "\n", + "## The TinyTorch Development Cycle\n", + "\n", + "1. **Write code** in this notebook using `#| export` \n", + "2. **Export code** with `python bin/tito.py sync --module setup`\n", + "3. **Run tests** with `python bin/tito.py test --module setup`\n", + "4. **Check progress** with `python bin/tito.py info`\n", + "\n", + "Let's get started!\n", + "\"\"\"" + ] }, - "lines_to_next_cell": 1 - }, - "outputs": [], - "source": [ - "#| export\n", - "class SystemInfo:\n", - " \"\"\"\n", - " Simple system information class.\n", - " \n", - " TODO: Implement this class to collect and display system information.\n", - " \"\"\"\n", - " \n", - " def __init__(self):\n", - " \"\"\"\n", - " Initialize system information collection.\n", - " \n", - " TODO: Collect Python version, platform, and machine information.\n", - " \"\"\"\n", - " raise NotImplementedError(\"Student implementation required\")\n", - " \n", - " def __str__(self):\n", - " \"\"\"\n", - " Return human-readable system information.\n", - " \n", - " TODO: Format system info as a readable string.\n", - " \"\"\"\n", - " raise NotImplementedError(\"Student implementation required\")\n", - " \n", - " def is_compatible(self):\n", - " \"\"\"\n", - " Check if system meets minimum requirements.\n", - " \n", - " TODO: Check if Python version is >= 3.8\n", - " \"\"\"\n", - " raise NotImplementedError(\"Student implementation required\")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "d40a8e32", - "metadata": { - "execution": { - "iopub.execute_input": "2025-07-10T23:28:59.097467Z", - "iopub.status.busy": "2025-07-10T23:28:59.097350Z", - "iopub.status.idle": "2025-07-10T23:28:59.099769Z", - "shell.execute_reply": "2025-07-10T23:28:59.099425Z" - }, - "lines_to_next_cell": 1 - }, - "outputs": [], - "source": [ - "#| hide\n", - "#| export\n", - "class SystemInfo:\n", - " \"\"\"Simple system information class.\"\"\"\n", - " \n", - " def __init__(self):\n", - " self.python_version = sys.version_info\n", - " self.platform = platform.system()\n", - " self.machine = platform.machine()\n", - " \n", - " def __str__(self):\n", - " return f\"Python {self.python_version.major}.{self.python_version.minor} on {self.platform} ({self.machine})\"\n", - " \n", - " def is_compatible(self):\n", - " \"\"\"Check if system meets minimum requirements.\"\"\"\n", - " return self.python_version >= (3, 8)" - ] - }, - { - "cell_type": "markdown", - "id": "a182b8ad", - "metadata": { - "cell_marker": "\"\"\"" - }, - "source": [ - "### πŸ§ͺ Test Your SystemInfo Class\n", - "\n", - "Once you implement the SystemInfo class above, run this cell to test it:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "bfd7d3c4", - "metadata": { - "execution": { - "iopub.execute_input": "2025-07-10T23:28:59.101366Z", - "iopub.status.busy": "2025-07-10T23:28:59.101221Z", - "iopub.status.idle": "2025-07-10T23:28:59.103476Z", - "shell.execute_reply": "2025-07-10T23:28:59.103228Z" - } - }, - "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Testing SystemInfo class:\n", - "System: Python 3.13 on Darwin (arm64)\n", - "Compatible: True\n" - ] - } - ], - "source": [ - "# Test the SystemInfo class (will fail until implemented)\n", - "try:\n", - " print(\"Testing SystemInfo class:\")\n", - " info = SystemInfo()\n", - " print(f\"System: {info}\")\n", - " print(f\"Compatible: {info.is_compatible()}\")\n", - "except NotImplementedError as e:\n", - " print(f\"⚠️ {e}\")\n", - " print(\"Implement the SystemInfo class above first!\")" - ] - }, - { - "cell_type": "markdown", - "id": "9a14de41", - "metadata": { - "cell_marker": "\"\"\"", - "lines_to_next_cell": 1 - }, - "source": [ - "## Step 3: Developer Personalization\n", - "\n", - "Let's make TinyTorch yours! Create a developer profile that will identify you throughout your ML systems journey." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "486717dd", - "metadata": { - "execution": { - "iopub.execute_input": "2025-07-10T23:28:59.104998Z", - "iopub.status.busy": "2025-07-10T23:28:59.104881Z", - "iopub.status.idle": "2025-07-10T23:28:59.107642Z", - "shell.execute_reply": "2025-07-10T23:28:59.107356Z" + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp core.utils\n", + "\n", + "# Setup imports and environment\n", + "import sys\n", + "import platform\n", + "from datetime import datetime\n", + "import os\n", + "from pathlib import Path\n", + "\n", + "print(\"\ud83d\udd25 TinyTorch Development Environment\")\n", + "print(f\"Python {sys.version}\")\n", + "print(f\"Platform: {platform.system()} {platform.release()}\")\n", + "print(f\"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")" + ] }, - "lines_to_next_cell": 1 - }, - "outputs": [], - "source": [ - "#| export\n", - "class DeveloperProfile:\n", - " \"\"\"\n", - " Developer profile for personalizing TinyTorch experience.\n", - " \n", - " TODO: Implement this class to store and display developer information.\n", - " Default to course instructor but allow students to personalize.\n", - " \"\"\"\n", - " \n", - " @staticmethod\n", - " def _load_default_flame():\n", - " \"\"\"\n", - " Load the default TinyTorch flame ASCII art from file.\n", - " \n", - " TODO: Implement file loading for tinytorch_flame.txt with fallback.\n", - " \"\"\"\n", - " raise NotImplementedError(\"Student implementation required\")\n", - " \n", - " def __init__(self, name=\"Vijay Janapa Reddi\", affiliation=\"Harvard University\", \n", - " email=\"vj@eecs.harvard.edu\", github_username=\"profvjreddi\", ascii_art=None):\n", - " \"\"\"\n", - " Initialize developer profile.\n", - " \n", - " TODO: Store developer information with sensible defaults.\n", - " Students should be able to customize this with their own info and ASCII art.\n", - " \"\"\"\n", - " raise NotImplementedError(\"Student implementation required\")\n", - " \n", - " def __str__(self):\n", - " \"\"\"\n", - " Return formatted developer information.\n", - " \n", - " TODO: Format developer info as a professional signature with optional ASCII art.\n", - " \"\"\"\n", - " raise NotImplementedError(\"Student implementation required\")\n", - " \n", - " def get_signature(self):\n", - " \"\"\"\n", - " Get a short signature for code headers.\n", - " \n", - " TODO: Return a concise signature like \"Built by Name (@github)\"\n", - " \"\"\"\n", - " raise NotImplementedError(\"Student implementation required\")\n", - " \n", - " def get_ascii_art(self):\n", - " \"\"\"\n", - " Get ASCII art for the profile.\n", - " \n", - " TODO: Return custom ASCII art or default flame loaded from file.\n", - " \"\"\"\n", - " raise NotImplementedError(\"Student implementation required\")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "49a5cfed", - "metadata": { - "execution": { - "iopub.execute_input": "2025-07-10T23:28:59.109202Z", - "iopub.status.busy": "2025-07-10T23:28:59.109073Z", - "iopub.status.idle": "2025-07-10T23:28:59.113144Z", - "shell.execute_reply": "2025-07-10T23:28:59.112824Z" - }, - "lines_to_next_cell": 1 - }, - "outputs": [], - "source": [ - "#| hide\n", - "#| export\n", - "class DeveloperProfile:\n", - " \"\"\"Developer profile for personalizing TinyTorch experience.\"\"\"\n", - " \n", - " @staticmethod\n", - " def _load_default_flame():\n", - " \"\"\"Load the default TinyTorch flame ASCII art from file.\"\"\"\n", - " try:\n", - " # Try to load from the same directory as this module\n", - " try:\n", - " # Try to get the directory of the current file\n", - " current_dir = os.path.dirname(__file__)\n", - " except NameError:\n", - " # If __file__ is not defined (e.g., in notebook), use current directory\n", - " current_dir = os.getcwd()\n", - " \n", - " flame_path = os.path.join(current_dir, 'tinytorch_flame.txt')\n", - " \n", - " with open(flame_path, 'r', encoding='utf-8') as f:\n", - " flame_art = f.read()\n", - " \n", - " # Add the TinyπŸ”₯Torch text below the flame\n", - " return f\"\"\"{flame_art}\n", - " \n", - " TinyπŸ”₯Torch\n", - " Build ML Systems from Scratch!\n", - " \"\"\"\n", - " except (FileNotFoundError, IOError):\n", - " # Fallback to simple flame if file not found\n", - " return \"\"\"\n", - " πŸ”₯ TinyTorch Developer πŸ”₯\n", - " . . . . . .\n", - " . . . . . .\n", - " . . . . . . .\n", - " . . . . . . . .\n", - " . . . . . . . . .\n", - " . . . . . . . . . .\n", - " . . . . . . . . . . .\n", - " . . . . . . . . . . . .\n", - " . . . . . . . . . . . . .\n", - ". . . . . . . . . . . . . .\n", - " \\\\ \\\\ \\\\ \\\\ \\\\ \\\\ \\\\ \\\\ \\\\ / / / / / /\n", - " \\\\ \\\\ \\\\ \\\\ \\\\ \\\\ \\\\ \\\\ / / / / / /\n", - " \\\\ \\\\ \\\\ \\\\ \\\\ \\\\ \\\\ / / / / / /\n", - " \\\\ \\\\ \\\\ \\\\ \\\\ \\\\ / / / / / /\n", - " \\\\ \\\\ \\\\ \\\\ \\\\ / / / / / /\n", - " \\\\ \\\\ \\\\ \\\\ / / / / / /\n", - " \\\\ \\\\ \\\\ / / / / / /\n", - " \\\\ \\\\ / / / / / /\n", - " \\\\ / / / / / /\n", - " \\\\/ / / / / /\n", - " \\\\/ / / / /\n", - " \\\\/ / / /\n", - " \\\\/ / /\n", - " \\\\/ /\n", - " \\\\/\n", - " \n", - " TinyπŸ”₯Torch\n", - " Build ML Systems from Scratch!\n", - " \"\"\"\n", - " \n", - " def __init__(self, name=\"Vijay Janapa Reddi\", affiliation=\"Harvard University\", \n", - " email=\"vj@eecs.harvard.edu\", github_username=\"profvjreddi\", ascii_art=None):\n", - " self.name = name\n", - " self.affiliation = affiliation\n", - " self.email = email\n", - " self.github_username = github_username\n", - " self.ascii_art = ascii_art or self._load_default_flame()\n", - " \n", - " def __str__(self):\n", - " return f\"πŸ‘¨β€πŸ’» {self.name} | {self.affiliation} | @{self.github_username}\"\n", - " \n", - " def get_signature(self):\n", - " \"\"\"Get a short signature for code headers.\"\"\"\n", - " return f\"Built by {self.name} (@{self.github_username})\"\n", - " \n", - " def get_ascii_art(self):\n", - " \"\"\"Get ASCII art for the profile.\"\"\"\n", - " return self.ascii_art\n", - " \n", - " def get_full_profile(self):\n", - " \"\"\"Get complete profile with ASCII art.\"\"\"\n", - " return f\"\"\"{self.ascii_art}\n", - " \n", - "πŸ‘¨β€πŸ’» Developer: {self.name}\n", - "πŸ›οΈ Affiliation: {self.affiliation}\n", - "πŸ“§ Email: {self.email}\n", - "πŸ™ GitHub: @{self.github_username}\n", - "πŸ”₯ Ready to build ML systems from scratch!\n", - "\"\"\"" - ] - }, - { - "cell_type": "markdown", - "id": "b848981d", - "metadata": { - "cell_marker": "\"\"\"" - }, - "source": [ - "### πŸ§ͺ Test Your Developer Profile\n", - "\n", - "Customize your developer profile! Replace the default information with your own:" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "5d80e79c", - "metadata": { - "execution": { - "iopub.execute_input": "2025-07-10T23:28:59.114628Z", - "iopub.status.busy": "2025-07-10T23:28:59.114540Z", - "iopub.status.idle": "2025-07-10T23:28:59.118055Z", - "shell.execute_reply": "2025-07-10T23:28:59.117792Z" - } - }, - "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Testing DeveloperProfile (with defaults):\n", - "Profile: πŸ‘¨β€πŸ’» Vijay Janapa Reddi | Harvard University | @profvjreddi\n", - "Signature: Built by Vijay Janapa Reddi (@profvjreddi)\n", - "\n", - "🎨 ASCII Art Preview:\n", - ". . ... ....... .... ... . . .. .... . .. . . . . . .... \n", - ". . .. .++. .. . . . .. ... . . . .. ... .. \n", - " . . . .=++=.. . . . .. . . .. . ... .. . . \n", - ". .. ... .++++=. . . . . .. . .. .\n", - ". . . . ....-+++++.... ... .. . .... .. . . . . . . . . . . . . \n", - " . .. ...-++++++-...... .. . ..... ..-:.. .. . .... .. . . .. . .. . . . \n", - " .. .. ..++++++++-.. . . ..##... -%#. . . . . . \n", - ". .. .:+++++++++.... ... . ...:%%:............:-:. ..... ...... . . ....... .. . . \n", - " ..+++++++++++. ... . .. .=#%%##+.-##..#%####%%=.=%%. .*%+.. . . . ... \n", - " . ..++++++++++++...-++..... . .%%... -##..##=...=%#..*%*..=%#.. . .. ... . . . . .. . ...\n", - " ..-+++++++++++++..=++++... .....%#.. -##..#%-.. -##. .%%=.%%.. . . . . . ... .\n", - ". .=++++++++++++++-+++++++.... . ...%%:...-##..#%-. .-%#. ..#%#%=.. . .. ... . . . .\n", - "..=+++++++++++++++++++++++-. . ..=%%%+.-%#..##-. .-%#....-%%*.. . .. . .. .. .. \n", - ".:+++++++++++=+++++++++++++. . ................ .......-%%... . .. . . .. . \n", - ".++++++++++===+++++++++++++: . .................... . ...%%%#:........ . .. ..... ......... ....\n", - ":+++++++++====+++++++++++++=.. ...-----------.....-+#*=:.....-------:.......:=*#+-.. ..--:.....--=.\n", - ":++++++++======++++++++++++=.. ...#%%%%%%%%%#..-#%%###%%#=...#%####%%%=...+%%%###%%#...#%+.. ..#%%.\n", - ".+++++++========+++++++++++- .. .#%%.. ..-%%+.. ..-%%+..#%*.. .*%%..*%%:. ..#%*..#%+... .#%%.\n", - ".=++++++==========+++++++++: . .#%%.....#%#.... .*%#..#%*...-%%*..#%+. ... . ..##%#####%%%.\n", - "..++++++===========+++++++-. . ...#%%. . .#%#. . .*%#..#%%%%%%#-. .#%+. . ....#%*-----#%%.\n", - "...+++++===========++++++=. . . . .#%%... -%%+.....=%%+..#%*..+%%-. .*%%-.....#%*..%%+.. ..%%%.\n", - ". ..-+++===========+++++.. . .. ..#%%. .:%%%###%%%=...#%*...+%%=...+%%####%%#...%%+.. ..%%%.\n", - " . ...-++==========+++:.... ... . .===. ... ..-+++=.. ..-=-....-==: ..:=+++-.. ..==-... .===.\n", - " ....-+=======+-...... .. . . ... . . .. ... . . .... . . . . ..... . ... ..... .\n", - " .... . ......:..... ... . .. . ... . . ... . . . ... . . . ... .. ..... . . \n", - "\n", - "\n", - " TinyπŸ”₯Torch\n", - " Build ML Systems from Scratch!\n", - " \n", - "\n", - "πŸ”₯ Full Profile Display:\n", - ". . ... ....... .... ... . . .. .... . .. . . . . . .... \n", - ". . .. .++. .. . . . .. ... . . . .. ... .. \n", - " . . . .=++=.. . . . .. . . .. . ... .. . . \n", - ". .. ... .++++=. . . . . .. . .. .\n", - ". . . . ....-+++++.... ... .. . .... .. . . . . . . . . . . . . \n", - " . .. ...-++++++-...... .. . ..... ..-:.. .. . .... .. . . .. . .. . . . \n", - " .. .. ..++++++++-.. . . ..##... -%#. . . . . . \n", - ". .. .:+++++++++.... ... . ...:%%:............:-:. ..... ...... . . ....... .. . . \n", - " ..+++++++++++. ... . .. .=#%%##+.-##..#%####%%=.=%%. .*%+.. . . . ... \n", - " . ..++++++++++++...-++..... . .%%... -##..##=...=%#..*%*..=%#.. . .. ... . . . . .. . ...\n", - " ..-+++++++++++++..=++++... .....%#.. -##..#%-.. -##. .%%=.%%.. . . . . . ... .\n", - ". .=++++++++++++++-+++++++.... . ...%%:...-##..#%-. .-%#. ..#%#%=.. . .. ... . . . .\n", - "..=+++++++++++++++++++++++-. . ..=%%%+.-%#..##-. .-%#....-%%*.. . .. . .. .. .. \n", - ".:+++++++++++=+++++++++++++. . ................ .......-%%... . .. . . .. . \n", - ".++++++++++===+++++++++++++: . .................... . ...%%%#:........ . .. ..... ......... ....\n", - ":+++++++++====+++++++++++++=.. ...-----------.....-+#*=:.....-------:.......:=*#+-.. ..--:.....--=.\n", - ":++++++++======++++++++++++=.. ...#%%%%%%%%%#..-#%%###%%#=...#%####%%%=...+%%%###%%#...#%+.. ..#%%.\n", - ".+++++++========+++++++++++- .. .#%%.. ..-%%+.. ..-%%+..#%*.. .*%%..*%%:. ..#%*..#%+... .#%%.\n", - ".=++++++==========+++++++++: . .#%%.....#%#.... .*%#..#%*...-%%*..#%+. ... . ..##%#####%%%.\n", - "..++++++===========+++++++-. . ...#%%. . .#%#. . .*%#..#%%%%%%#-. .#%+. . ....#%*-----#%%.\n", - "...+++++===========++++++=. . . . .#%%... -%%+.....=%%+..#%*..+%%-. .*%%-.....#%*..%%+.. ..%%%.\n", - ". ..-+++===========+++++.. . .. ..#%%. .:%%%###%%%=...#%*...+%%=...+%%####%%#...%%+.. ..%%%.\n", - " . ...-++==========+++:.... ... . .===. ... ..-+++=.. ..-=-....-==: ..:=+++-.. ..==-... .===.\n", - " ....-+=======+-...... .. . . ... . . .. ... . . .... . . . . ..... . ... ..... .\n", - " .... . ......:..... ... . .. . ... . . ... . . . ... . . . ... .. ..... . . \n", - "\n", - "\n", - " TinyπŸ”₯Torch\n", - " Build ML Systems from Scratch!\n", - " \n", - "\n", - "πŸ‘¨β€πŸ’» Developer: Vijay Janapa Reddi\n", - "πŸ›οΈ Affiliation: Harvard University\n", - "πŸ“§ Email: vj@eecs.harvard.edu\n", - "πŸ™ GitHub: @profvjreddi\n", - "πŸ”₯ Ready to build ML systems from scratch!\n", - "\n", - "\n", - "🎯 YOUR TURN: Create your own profile!\n", - "Uncomment and modify the lines below:\n", - "# my_profile = DeveloperProfile(\n", - "# name='Your Name',\n", - "# affiliation='Your University/Company',\n", - "# email='your.email@example.com',\n", - "# github_username='yourgithub',\n", - "# ascii_art='''\n", - "# Your Custom ASCII Art Here!\n", - "# Maybe your initials, a logo, or something fun!\n", - "# '''\n", - "# )\n", - "# print(f'My Profile: {my_profile}')\n", - "# print(f'My Signature: {my_profile.get_signature()}')\n", - "# print(my_profile.get_full_profile())\n" - ] + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "## Step 1: Understanding the Module \u2192 Package Structure\n", + "\n", + "**\ud83c\udf93 Teaching vs. \ud83d\udd27 Building**: This course has two sides:\n", + "- **Teaching side**: You work in `modules/setup/setup_dev.ipynb` (learning-focused)\n", + "- **Building side**: Your code exports to `tinytorch/core/utils.py` (production package)\n", + "\n", + "**Key Concept**: The `#| default_exp core.utils` directive at the top tells nbdev to export all `#| export` cells to `tinytorch/core/utils.py`.\n", + "\n", + "This separation allows us to:\n", + "- Organize learning by **concepts** (modules) \n", + "- Organize code by **function** (package structure)\n", + "- Build a real ML framework while learning systematically\n", + "\n", + "Let's write a simple \"Hello World\" function with the `#| export` directive:\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "def hello_tinytorch():\n", + " \"\"\"\n", + " A simple hello world function for TinyTorch.\n", + " \n", + " TODO: Implement this function to display TinyTorch ASCII art and welcome message.\n", + " Load the flame art from tinytorch_flame.txt file with graceful fallback.\n", + " \"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")\n", + "\n", + "def add_numbers(a, b):\n", + " \"\"\"\n", + " Add two numbers together.\n", + " \n", + " TODO: Implement addition of two numbers.\n", + " This is the foundation of all mathematical operations in ML.\n", + " \"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "#| export\n", + "def hello_tinytorch():\n", + " \"\"\"Display the TinyTorch ASCII art and welcome message.\"\"\"\n", + " try:\n", + " # Get the directory containing this file\n", + " current_dir = Path(__file__).parent\n", + " art_file = current_dir / \"tinytorch_flame.txt\"\n", + " \n", + " if art_file.exists():\n", + " with open(art_file, 'r') as f:\n", + " ascii_art = f.read()\n", + " print(ascii_art)\n", + " print(\"Tiny\ud83d\udd25Torch\")\n", + " print(\"Build ML Systems from Scratch!\")\n", + " else:\n", + " print(\"\ud83d\udd25 TinyTorch \ud83d\udd25\")\n", + " print(\"Build ML Systems from Scratch!\")\n", + " except NameError:\n", + " # Handle case when running in notebook where __file__ is not defined\n", + " try:\n", + " art_file = Path(os.getcwd()) / \"tinytorch_flame.txt\"\n", + " if art_file.exists():\n", + " with open(art_file, 'r') as f:\n", + " ascii_art = f.read()\n", + " print(ascii_art)\n", + " print(\"Tiny\ud83d\udd25Torch\")\n", + " print(\"Build ML Systems from Scratch!\")\n", + " else:\n", + " print(\"\ud83d\udd25 TinyTorch \ud83d\udd25\")\n", + " print(\"Build ML Systems from Scratch!\")\n", + " except:\n", + " print(\"\ud83d\udd25 TinyTorch \ud83d\udd25\")\n", + " print(\"Build ML Systems from Scratch!\")\n", + "\n", + "def add_numbers(a, b):\n", + " \"\"\"Add two numbers together.\"\"\"\n", + " return a + b" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "### \ud83e\uddea Test Your Implementation\n", + "\n", + "Once you implement the functions above, run this cell to test them:\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test the functions in the notebook (will fail until implemented)\n", + "try:\n", + " print(\"Testing hello_tinytorch():\")\n", + " hello_tinytorch()\n", + " print()\n", + " print(\"Testing add_numbers():\")\n", + " print(f\"2 + 3 = {add_numbers(2, 3)}\")\n", + "except NotImplementedError as e:\n", + " print(f\"\u26a0\ufe0f {e}\")\n", + " print(\"Implement the functions above first!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "## Step 2: A Simple Class\n", + "\n", + "Let's create a simple class that will help us understand system information. This is still basic, but shows how to structure classes in TinyTorch.\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "class SystemInfo:\n", + " \"\"\"\n", + " Simple system information class.\n", + " \n", + " TODO: Implement this class to collect and display system information.\n", + " \"\"\"\n", + " \n", + " def __init__(self):\n", + " \"\"\"\n", + " Initialize system information collection.\n", + " \n", + " TODO: Collect Python version, platform, and machine information.\n", + " \"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")\n", + " \n", + " def __str__(self):\n", + " \"\"\"\n", + " Return human-readable system information.\n", + " \n", + " TODO: Format system info as a readable string.\n", + " \"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")\n", + " \n", + " def is_compatible(self):\n", + " \"\"\"\n", + " Check if system meets minimum requirements.\n", + " \n", + " TODO: Check if Python version is >= 3.8\n", + " \"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "#| export\n", + "class SystemInfo:\n", + " \"\"\"Simple system information class.\"\"\"\n", + " \n", + " def __init__(self):\n", + " self.python_version = sys.version_info\n", + " self.platform = platform.system()\n", + " self.machine = platform.machine()\n", + " \n", + " def __str__(self):\n", + " return f\"Python {self.python_version.major}.{self.python_version.minor} on {self.platform} ({self.machine})\"\n", + " \n", + " def is_compatible(self):\n", + " \"\"\"Check if system meets minimum requirements.\"\"\"\n", + " return self.python_version >= (3, 8)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "### \ud83e\uddea Test Your SystemInfo Class\n", + "\n", + "Once you implement the SystemInfo class above, run this cell to test it:\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test the SystemInfo class (will fail until implemented)\n", + "try:\n", + " print(\"Testing SystemInfo class:\")\n", + " info = SystemInfo()\n", + " print(f\"System: {info}\")\n", + " print(f\"Compatible: {info.is_compatible()}\")\n", + "except NotImplementedError as e:\n", + " print(f\"\u26a0\ufe0f {e}\")\n", + " print(\"Implement the SystemInfo class above first!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "## Step 3: Developer Personalization\n", + "\n", + "Let's make TinyTorch yours! Create a developer profile that will identify you throughout your ML systems journey.\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "class DeveloperProfile:\n", + " \"\"\"\n", + " Developer profile for personalizing TinyTorch experience.\n", + " \n", + " TODO: Implement this class to store and display developer information.\n", + " Default to course instructor but allow students to personalize.\n", + " \"\"\"\n", + " \n", + " @staticmethod\n", + " def _load_default_flame():\n", + " \"\"\"\n", + " Load the default TinyTorch flame ASCII art from file.\n", + " \n", + " TODO: Implement file loading for tinytorch_flame.txt with fallback.\n", + " \"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")\n", + " \n", + " def __init__(self, name=\"Vijay Janapa Reddi\", affiliation=\"Harvard University\", \n", + " email=\"vj@eecs.harvard.edu\", github_username=\"profvjreddi\", ascii_art=None):\n", + " \"\"\"\n", + " Initialize developer profile.\n", + " \n", + " TODO: Store developer information with sensible defaults.\n", + " Students should be able to customize this with their own info and ASCII art.\n", + " \"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")\n", + " \n", + " def __str__(self):\n", + " \"\"\"\n", + " Return formatted developer information.\n", + " \n", + " TODO: Format developer info as a professional signature with optional ASCII art.\n", + " \"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")\n", + " \n", + " def get_signature(self):\n", + " \"\"\"\n", + " Get a short signature for code headers.\n", + " \n", + " TODO: Return a concise signature like \"Built by Name (@github)\"\n", + " \"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")\n", + " \n", + " def get_ascii_art(self):\n", + " \"\"\"\n", + " Get ASCII art for the profile.\n", + " \n", + " TODO: Return custom ASCII art or default flame loaded from file.\n", + " \"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "#| export\n", + "class DeveloperProfile:\n", + " \"\"\"Developer profile for personalizing TinyTorch experience.\"\"\"\n", + " \n", + " @staticmethod\n", + " def _load_default_flame():\n", + " \"\"\"Load the default TinyTorch flame ASCII art from file.\"\"\"\n", + " try:\n", + " # Try to load from the same directory as this module\n", + " try:\n", + " # Try to get the directory of the current file\n", + " current_dir = os.path.dirname(__file__)\n", + " except NameError:\n", + " # If __file__ is not defined (e.g., in notebook), use current directory\n", + " current_dir = os.getcwd()\n", + " \n", + " flame_path = os.path.join(current_dir, 'tinytorch_flame.txt')\n", + " \n", + " with open(flame_path, 'r', encoding='utf-8') as f:\n", + " flame_art = f.read()\n", + " \n", + " # Add the Tiny\ud83d\udd25Torch text below the flame\n", + " return f\"\"\"{flame_art}\n", + " \n", + " Tiny\ud83d\udd25Torch\n", + " Build ML Systems from Scratch!\n", + " \"\"\"\n", + " except (FileNotFoundError, IOError):\n", + " # Fallback to simple flame if file not found\n", + " return \"\"\"\n", + " \ud83d\udd25 TinyTorch Developer \ud83d\udd25\n", + " . . . . . .\n", + " . . . . . .\n", + " . . . . . . .\n", + " . . . . . . . .\n", + " . . . . . . . . .\n", + " . . . . . . . . . .\n", + " . . . . . . . . . . .\n", + " . . . . . . . . . . . .\n", + " . . . . . . . . . . . . .\n", + ". . . . . . . . . . . . . .\n", + " \\\\ \\\\ \\\\ \\\\ \\\\ \\\\ \\\\ \\\\ \\\\ / / / / / /\n", + " \\\\ \\\\ \\\\ \\\\ \\\\ \\\\ \\\\ \\\\ / / / / / /\n", + " \\\\ \\\\ \\\\ \\\\ \\\\ \\\\ \\\\ / / / / / /\n", + " \\\\ \\\\ \\\\ \\\\ \\\\ \\\\ / / / / / /\n", + " \\\\ \\\\ \\\\ \\\\ \\\\ / / / / / /\n", + " \\\\ \\\\ \\\\ \\\\ / / / / / /\n", + " \\\\ \\\\ \\\\ / / / / / /\n", + " \\\\ \\\\ / / / / / /\n", + " \\\\ / / / / / /\n", + " \\\\/ / / / / /\n", + " \\\\/ / / / /\n", + " \\\\/ / / /\n", + " \\\\/ / /\n", + " \\\\/ /\n", + " \\\\/\n", + " \n", + " Tiny\ud83d\udd25Torch\n", + " Build ML Systems from Scratch!\n", + " \"\"\"\n", + " \n", + " def __init__(self, name=\"Vijay Janapa Reddi\", affiliation=\"Harvard University\", \n", + " email=\"vj@eecs.harvard.edu\", github_username=\"profvjreddi\", ascii_art=None):\n", + " self.name = name\n", + " self.affiliation = affiliation\n", + " self.email = email\n", + " self.github_username = github_username\n", + " self.ascii_art = ascii_art or self._load_default_flame()\n", + " \n", + " def __str__(self):\n", + " return f\"\ud83d\udc68\u200d\ud83d\udcbb {self.name} | {self.affiliation} | @{self.github_username}\"\n", + " \n", + " def get_signature(self):\n", + " \"\"\"Get a short signature for code headers.\"\"\"\n", + " return f\"Built by {self.name} (@{self.github_username})\"\n", + " \n", + " def get_ascii_art(self):\n", + " \"\"\"Get ASCII art for the profile.\"\"\"\n", + " return self.ascii_art\n", + " \n", + " def get_full_profile(self):\n", + " \"\"\"Get complete profile with ASCII art.\"\"\"\n", + " return f\"\"\"{self.ascii_art}\n", + " \n", + "\ud83d\udc68\u200d\ud83d\udcbb Developer: {self.name}\n", + "\ud83c\udfdb\ufe0f Affiliation: {self.affiliation}\n", + "\ud83d\udce7 Email: {self.email}\n", + "\ud83d\udc19 GitHub: @{self.github_username}\n", + "\ud83d\udd25 Ready to build ML systems from scratch!\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "### \ud83e\uddea Test Your Developer Profile\n", + "\n", + "Customize your developer profile! Replace the default information with your own:\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test the DeveloperProfile class\n", + "try:\n", + " print(\"Testing DeveloperProfile (with defaults):\")\n", + " # Default profile (instructor)\n", + " default_profile = DeveloperProfile()\n", + " print(f\"Profile: {default_profile}\")\n", + " print(f\"Signature: {default_profile.get_signature()}\")\n", + " print()\n", + " \n", + " print(\"\ud83c\udfa8 ASCII Art Preview:\")\n", + " print(default_profile.get_ascii_art())\n", + " print()\n", + " \n", + " print(\"\ud83d\udd25 Full Profile Display:\")\n", + " print(default_profile.get_full_profile())\n", + " print()\n", + " \n", + " # TODO: Students should customize this with their own information!\n", + " print(\"\ud83c\udfaf YOUR TURN: Create your own profile!\")\n", + " print(\"Uncomment and modify the lines below:\")\n", + " print(\"# my_profile = DeveloperProfile(\")\n", + " print(\"# name='Your Name',\")\n", + " print(\"# affiliation='Your University/Company',\")\n", + " print(\"# email='your.email@example.com',\")\n", + " print(\"# github_username='yourgithub',\")\n", + " print(\"# ascii_art='''\")\n", + " print(\"# Your Custom ASCII Art Here!\")\n", + " print(\"# Maybe your initials, a logo, or something fun!\")\n", + " print(\"# '''\")\n", + " print(\"# )\")\n", + " print(\"# print(f'My Profile: {my_profile}')\")\n", + " print(\"# print(f'My Signature: {my_profile.get_signature()}')\")\n", + " print(\"# print(my_profile.get_full_profile())\")\n", + " \n", + "except NotImplementedError as e:\n", + " print(f\"\u26a0\ufe0f {e}\")\n", + " print(\"Implement the DeveloperProfile class above first!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "### \ud83c\udfa8 Personalization Challenge\n", + "\n", + "**For Students**: Make TinyTorch truly yours by:\n", + "\n", + "1. **Update your profile** in the cell above with your real information\n", + "2. **Create custom ASCII art** - your initials, a simple logo, or something that represents you\n", + "3. **Customize the flame file** - edit `tinytorch_flame.txt` to create your own default art\n", + "4. **Add your signature** to code you write throughout the course\n", + "5. **Show off your full profile** with the `get_full_profile()` method\n", + "\n", + "This isn't just about customization - it's about taking ownership of your learning journey in ML systems!\n", + "\n", + "**ASCII Art Customization Options:**\n", + "\n", + "**Option 1: Custom ASCII Art Parameter**\n", + "```python\n", + "my_profile = DeveloperProfile(\n", + " name=\"Your Name\",\n", + " ascii_art='''\n", + " Your Custom ASCII Art Here!\n", + " Maybe your initials, a logo, or something fun!\n", + " '''\n", + ")\n", + "```\n", + "\n", + "**Option 2: Edit the Default Flame File**\n", + "- Edit `tinytorch_flame.txt` in this directory\n", + "- Replace with your own ASCII art design\n", + "- All students using defaults will see your custom art!\n", + "\n", + "**ASCII Art Ideas:**\n", + "- Your initials in block letters\n", + "- A simple logo or symbol that represents you\n", + "- Your university mascot in ASCII\n", + "- A coding-themed design\n", + "- Something that motivates you!\n", + "\n", + "**Pro Tip**: The `tinytorch_flame.txt` file contains the beautiful default flame art. You can:\n", + "- Edit it directly for a personalized default\n", + "- Create your own `.txt` file and modify the code to load it\n", + "- Use online ASCII art generators for inspiration\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "## Step 4: Try the Export Process\n", + "\n", + "Now let's export our code! In your terminal, run:\n", + "\n", + "```bash\n", + "python bin/tito.py sync --module setup\n", + "```\n", + "\n", + "This will export the code marked with `#| export` to `tinytorch/core/utils.py`.\n", + "\n", + "**What happens during export:**\n", + "1. nbdev scans this notebook for `#| export` cells\n", + "2. Extracts the Python code \n", + "3. Writes it to `tinytorch/core/utils.py` (because of `#| default_exp core.utils`)\n", + "4. Handles imports and dependencies automatically\n", + "\n", + "**\ud83d\udd0d Verification**: After export, check `tinytorch/core/utils.py` - you'll see your functions there with auto-generated headers pointing back to this notebook!\n", + "\n", + "**Note**: The export process will use the instructor solutions (from `#|hide` cells) so the package will have working implementations even if you haven't completed the exercises yet.\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "## Step 5: Run Tests\n", + "\n", + "After exporting, run the tests:\n", + "\n", + "```bash\n", + "python bin/tito.py test --module setup\n", + "```\n", + "\n", + "This will run all tests for the setup module and verify your implementation works correctly.\n", + "\n", + "## Step 6: Check Your Progress\n", + "\n", + "See your overall progress:\n", + "\n", + "```bash\n", + "python bin/tito.py info\n", + "```\n", + "\n", + "This shows which modules are complete and which are pending.\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "## \ud83c\udf89 Congratulations!\n", + "\n", + "You've learned the TinyTorch development workflow:\n", + "\n", + "1. \u2705 Write code in notebooks with `#| export`\n", + "2. \u2705 Export with `tito sync --module setup` \n", + "3. \u2705 Test with `tito test --module setup`\n", + "4. \u2705 Check progress with `tito info`\n", + "\n", + "**This is the rhythm you'll use for every module in TinyTorch.**\n", + "\n", + "### Next Steps\n", + "\n", + "Ready for the real work? Head to **Module 1: Tensor** where you'll build the core data structures that power everything else in TinyTorch.\n", + "\n", + "**Development Tips:**\n", + "- Always test your code in the notebook first\n", + "- Export frequently to catch issues early \n", + "- Read error messages carefully - they're designed to help\n", + "- When stuck, check if your code exports cleanly first\n", + "\n", + "Happy building! \ud83d\udd25\n", + "\"\"\"" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.8.0" } - ], - "source": [ - "# Test the DeveloperProfile class\n", - "try:\n", - " print(\"Testing DeveloperProfile (with defaults):\")\n", - " # Default profile (instructor)\n", - " default_profile = DeveloperProfile()\n", - " print(f\"Profile: {default_profile}\")\n", - " print(f\"Signature: {default_profile.get_signature()}\")\n", - " print()\n", - " \n", - " print(\"🎨 ASCII Art Preview:\")\n", - " print(default_profile.get_ascii_art())\n", - " print()\n", - " \n", - " print(\"πŸ”₯ Full Profile Display:\")\n", - " print(default_profile.get_full_profile())\n", - " print()\n", - " \n", - " # TODO: Students should customize this with their own information!\n", - " print(\"🎯 YOUR TURN: Create your own profile!\")\n", - " print(\"Uncomment and modify the lines below:\")\n", - " print(\"# my_profile = DeveloperProfile(\")\n", - " print(\"# name='Your Name',\")\n", - " print(\"# affiliation='Your University/Company',\")\n", - " print(\"# email='your.email@example.com',\")\n", - " print(\"# github_username='yourgithub',\")\n", - " print(\"# ascii_art='''\")\n", - " print(\"# Your Custom ASCII Art Here!\")\n", - " print(\"# Maybe your initials, a logo, or something fun!\")\n", - " print(\"# '''\")\n", - " print(\"# )\")\n", - " print(\"# print(f'My Profile: {my_profile}')\")\n", - " print(\"# print(f'My Signature: {my_profile.get_signature()}')\")\n", - " print(\"# print(my_profile.get_full_profile())\")\n", - " \n", - "except NotImplementedError as e:\n", - " print(f\"⚠️ {e}\")\n", - " print(\"Implement the DeveloperProfile class above first!\")" - ] }, - { - "cell_type": "markdown", - "id": "4f117574", - "metadata": { - "cell_marker": "\"\"\"" - }, - "source": [ - "### 🎨 Personalization Challenge\n", - "\n", - "**For Students**: Make TinyTorch truly yours by:\n", - "\n", - "1. **Update your profile** in the cell above with your real information\n", - "2. **Create custom ASCII art** - your initials, a simple logo, or something that represents you\n", - "3. **Customize the flame file** - edit `tinytorch_flame.txt` to create your own default art\n", - "4. **Add your signature** to code you write throughout the course\n", - "5. **Show off your full profile** with the `get_full_profile()` method\n", - "\n", - "This isn't just about customization - it's about taking ownership of your learning journey in ML systems!\n", - "\n", - "**ASCII Art Customization Options:**\n", - "\n", - "**Option 1: Custom ASCII Art Parameter**\n", - "```python\n", - "my_profile = DeveloperProfile(\n", - " name=\"Your Name\",\n", - " ascii_art='''\n", - " Your Custom ASCII Art Here!\n", - " Maybe your initials, a logo, or something fun!\n", - " '''\n", - ")\n", - "```\n", - "\n", - "**Option 2: Edit the Default Flame File**\n", - "- Edit `tinytorch_flame.txt` in this directory\n", - "- Replace with your own ASCII art design\n", - "- All students using defaults will see your custom art!\n", - "\n", - "**ASCII Art Ideas:**\n", - "- Your initials in block letters\n", - "- A simple logo or symbol that represents you\n", - "- Your university mascot in ASCII\n", - "- A coding-themed design\n", - "- Something that motivates you!\n", - "\n", - "**Pro Tip**: The `tinytorch_flame.txt` file contains the beautiful default flame art. You can:\n", - "- Edit it directly for a personalized default\n", - "- Create your own `.txt` file and modify the code to load it\n", - "- Use online ASCII art generators for inspiration" - ] - }, - { - "cell_type": "markdown", - "id": "3e34c7fe", - "metadata": { - "cell_marker": "\"\"\"" - }, - "source": [ - "## Step 4: Try the Export Process\n", - "\n", - "Now let's export our code! In your terminal, run:\n", - "\n", - "```bash\n", - "python bin/tito.py sync --module setup\n", - "```\n", - "\n", - "This will export the code marked with `#| export` to `tinytorch/core/utils.py`.\n", - "\n", - "**What happens during export:**\n", - "1. nbdev scans this notebook for `#| export` cells\n", - "2. Extracts the Python code \n", - "3. Writes it to `tinytorch/core/utils.py` (because of `#| default_exp core.utils`)\n", - "4. Handles imports and dependencies automatically\n", - "\n", - "**πŸ” Verification**: After export, check `tinytorch/core/utils.py` - you'll see your functions there with auto-generated headers pointing back to this notebook!\n", - "\n", - "**Note**: The export process will use the instructor solutions (from `#|hide` cells) so the package will have working implementations even if you haven't completed the exercises yet." - ] - }, - { - "cell_type": "markdown", - "id": "641ad5d7", - "metadata": { - "cell_marker": "\"\"\"" - }, - "source": [ - "## Step 5: Run Tests\n", - "\n", - "After exporting, run the tests:\n", - "\n", - "```bash\n", - "python bin/tito.py test --module setup\n", - "```\n", - "\n", - "This will run all tests for the setup module and verify your implementation works correctly.\n", - "\n", - "## Step 6: Check Your Progress\n", - "\n", - "See your overall progress:\n", - "\n", - "```bash\n", - "python bin/tito.py info\n", - "```\n", - "\n", - "This shows which modules are complete and which are pending." - ] - }, - { - "cell_type": "markdown", - "id": "7a09b00d", - "metadata": { - "cell_marker": "\"\"\"" - }, - "source": [ - "## πŸŽ‰ Congratulations!\n", - "\n", - "You've learned the TinyTorch development workflow:\n", - "\n", - "1. βœ… Write code in notebooks with `#| export`\n", - "2. βœ… Export with `tito sync --module setup` \n", - "3. βœ… Test with `tito test --module setup`\n", - "4. βœ… Check progress with `tito info`\n", - "\n", - "**This is the rhythm you'll use for every module in TinyTorch.**\n", - "\n", - "### Next Steps\n", - "\n", - "Ready for the real work? Head to **Module 1: Tensor** where you'll build the core data structures that power everything else in TinyTorch.\n", - "\n", - "**Development Tips:**\n", - "- Always test your code in the notebook first\n", - "- Export frequently to catch issues early \n", - "- Read error messages carefully - they're designed to help\n", - "- When stuck, check if your code exports cleanly first\n", - "\n", - "Happy building! πŸ”₯" - ] - } - ], - "metadata": { - "jupytext": { - "main_language": "python" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/modules/sidebar.yml b/modules/sidebar.yml new file mode 100644 index 00000000..ffbd6897 --- /dev/null +++ b/modules/sidebar.yml @@ -0,0 +1,9 @@ +website: + sidebar: + contents: + - section: layers + contents: + - layers/layers_dev.ipynb + - section: setup + contents: + - setup/setup_dev.ipynb diff --git a/modules/tensor/tensor_dev.ipynb b/modules/tensor/tensor_dev.ipynb new file mode 100644 index 00000000..85385a71 --- /dev/null +++ b/modules/tensor/tensor_dev.ipynb @@ -0,0 +1,491 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "jupyter:\n", + " jupytext:\n", + " text_representation:\n", + " extension: .py\n", + " format_name: percent\n", + " format_version: '1.3'\n", + " jupytext_version: 1.17.1\n", + "---\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "# Module 1: Tensor - Core Data Structure\n", + "\n", + "Welcome to the Tensor module! This is where TinyTorch really begins. You'll implement the fundamental data structure that powers all ML systems.\n", + "\n", + "## Learning Goals\n", + "- Understand tensors as N-dimensional arrays with ML-specific operations\n", + "- Implement a complete Tensor class with arithmetic operations\n", + "- Handle shape management, data types, and memory layout\n", + "- Build the foundation for neural networks and automatic differentiation\n", + "\n", + "## Module \u2192 Package Structure\n", + "**\ud83c\udf93 Teaching vs. \ud83d\udd27 Building**: \n", + "- **Learning side**: Work in `modules/tensor/tensor_dev.py` \n", + "- **Building side**: Exports to `tinytorch/core/tensor.py`\n", + "\n", + "This module builds the core data structure that all other TinyTorch components will use.\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp core.tensor\n", + "\n", + "# Setup and imports\n", + "import numpy as np\n", + "import sys\n", + "from typing import Union, List, Tuple, Optional, Any\n", + "\n", + "print(\"\ud83d\udd25 TinyTorch Tensor Module\")\n", + "print(f\"NumPy version: {np.__version__}\")\n", + "print(f\"Python version: {sys.version_info.major}.{sys.version_info.minor}\")\n", + "print(\"Ready to build tensors!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "## Step 1: What is a Tensor?\n", + "\n", + "A **tensor** is an N-dimensional array with ML-specific operations. Think of it as:\n", + "- **Scalar** (0D): A single number - `5.0`\n", + "- **Vector** (1D): A list of numbers - `[1, 2, 3]` \n", + "- **Matrix** (2D): A 2D array - `[[1, 2], [3, 4]]`\n", + "- **Higher dimensions**: 3D, 4D, etc. for images, video, batches\n", + "\n", + "**Why not just use NumPy?** We will use NumPy internally, but our Tensor class will add:\n", + "- ML-specific operations (later: gradients, GPU support)\n", + "- Consistent API for neural networks\n", + "- Type safety and error checking\n", + "- Integration with the rest of TinyTorch\n", + "\n", + "Let's start building!\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "class Tensor:\n", + " \"\"\"\n", + " TinyTorch Tensor: N-dimensional array with ML operations.\n", + " \n", + " The fundamental data structure for all TinyTorch operations.\n", + " Wraps NumPy arrays with ML-specific functionality.\n", + " \n", + " TODO: Implement the core Tensor class with data handling and properties.\n", + " \"\"\"\n", + " \n", + " def __init__(self, data: Union[int, float, List, np.ndarray], dtype: Optional[str] = None):\n", + " \"\"\"\n", + " Create a new tensor from data.\n", + " \n", + " Args:\n", + " data: Input data (scalar, list, or numpy array)\n", + " dtype: Data type ('float32', 'int32', etc.). Defaults to auto-detect.\n", + " \n", + " TODO: Implement tensor creation with proper type handling.\n", + " \"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")\n", + " \n", + " @property\n", + " def data(self) -> np.ndarray:\n", + " \"\"\"Access underlying numpy array.\"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")\n", + " \n", + " @property\n", + " def shape(self) -> Tuple[int, ...]:\n", + " \"\"\"Get tensor shape.\"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")\n", + " \n", + " @property\n", + " def size(self) -> int:\n", + " \"\"\"Get total number of elements.\"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")\n", + " \n", + " @property\n", + " def dtype(self) -> np.dtype:\n", + " \"\"\"Get data type as numpy dtype.\"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")\n", + " \n", + " def __repr__(self) -> str:\n", + " \"\"\"String representation.\"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "#| export\n", + "class Tensor:\n", + " \"\"\"\n", + " TinyTorch Tensor: N-dimensional array with ML operations.\n", + " \n", + " The fundamental data structure for all TinyTorch operations.\n", + " Wraps NumPy arrays with ML-specific functionality.\n", + " \"\"\"\n", + " \n", + " def __init__(self, data: Union[int, float, List, np.ndarray], dtype: Optional[str] = None):\n", + " \"\"\"\n", + " Create a new tensor from data.\n", + " \n", + " Args:\n", + " data: Input data (scalar, list, or numpy array)\n", + " dtype: Data type ('float32', 'int32', etc.). Defaults to auto-detect.\n", + " \"\"\"\n", + " # Convert input to numpy array\n", + " if isinstance(data, (int, float, np.number)):\n", + " # Handle Python and NumPy scalars\n", + " if dtype is None:\n", + " # Auto-detect type: int for integers, float32 for floats\n", + " if isinstance(data, int) or (isinstance(data, np.number) and np.issubdtype(type(data), np.integer)):\n", + " dtype = 'int32'\n", + " else:\n", + " dtype = 'float32'\n", + " self._data = np.array(data, dtype=dtype)\n", + " elif isinstance(data, list):\n", + " # Let NumPy auto-detect type, then convert if needed\n", + " temp_array = np.array(data)\n", + " if dtype is None:\n", + " # Keep NumPy's auto-detected type, but prefer common ML types\n", + " if np.issubdtype(temp_array.dtype, np.integer):\n", + " dtype = 'int32'\n", + " elif np.issubdtype(temp_array.dtype, np.floating):\n", + " dtype = 'float32'\n", + " else:\n", + " dtype = temp_array.dtype\n", + " self._data = temp_array.astype(dtype)\n", + " elif isinstance(data, np.ndarray):\n", + " self._data = data.astype(dtype or data.dtype)\n", + " else:\n", + " raise TypeError(f\"Cannot create tensor from {type(data)}\")\n", + " \n", + " @property\n", + " def data(self) -> np.ndarray:\n", + " \"\"\"Access underlying numpy array.\"\"\"\n", + " return self._data\n", + " \n", + " @property\n", + " def shape(self) -> Tuple[int, ...]:\n", + " \"\"\"Get tensor shape.\"\"\"\n", + " return self._data.shape\n", + " \n", + " @property\n", + " def size(self) -> int:\n", + " \"\"\"Get total number of elements.\"\"\"\n", + " return self._data.size\n", + " \n", + " @property\n", + " def dtype(self) -> np.dtype:\n", + " \"\"\"Get data type as numpy dtype.\"\"\"\n", + " return self._data.dtype\n", + " \n", + " def __repr__(self) -> str:\n", + " \"\"\"String representation.\"\"\"\n", + " return f\"Tensor({self._data.tolist()}, shape={self.shape}, dtype={self.dtype})\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "### \ud83e\uddea Test Your Tensor Class\n", + "\n", + "Once you implement the Tensor class above, run this cell to test it:\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test the basic Tensor class\n", + "try:\n", + " print(\"=== Testing Tensor Creation ===\")\n", + " \n", + " # Scalar tensor\n", + " scalar = Tensor(5.0)\n", + " print(f\"Scalar: {scalar}\")\n", + " \n", + " # Vector tensor \n", + " vector = Tensor([1, 2, 3])\n", + " print(f\"Vector: {vector}\")\n", + " \n", + " # Matrix tensor\n", + " matrix = Tensor([[1, 2], [3, 4]])\n", + " print(f\"Matrix: {matrix}\")\n", + " \n", + " print(f\"\\nProperties:\")\n", + " print(f\"Matrix shape: {matrix.shape}\")\n", + " print(f\"Matrix size: {matrix.size}\")\n", + " print(f\"Matrix dtype: {matrix.dtype}\")\n", + " \n", + "except NotImplementedError as e:\n", + " print(f\"\u26a0\ufe0f {e}\")\n", + " print(\"Implement the Tensor class above first!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "## Step 2: Arithmetic Operations\n", + "\n", + "Now let's add the core arithmetic operations. These are essential for neural networks:\n", + "- **Addition**: `tensor + other` \n", + "- **Subtraction**: `tensor - other`\n", + "- **Multiplication**: `tensor * other`\n", + "- **Division**: `tensor / other`\n", + "\n", + "Each operation should handle both **tensor + tensor** and **tensor + scalar** cases.\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "def _add_arithmetic_methods():\n", + " \"\"\"\n", + " Add arithmetic operations to Tensor class.\n", + " \n", + " TODO: Implement arithmetic methods (__add__, __sub__, __mul__, __truediv__)\n", + " and their reverse operations (__radd__, __rsub__, etc.)\n", + " \"\"\"\n", + " \n", + " def __add__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n", + " \"\"\"Addition: tensor + other\"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")\n", + " \n", + " def __sub__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n", + " \"\"\"Subtraction: tensor - other\"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")\n", + " \n", + " def __mul__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n", + " \"\"\"Multiplication: tensor * other\"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")\n", + " \n", + " def __truediv__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n", + " \"\"\"Division: tensor / other\"\"\"\n", + " raise NotImplementedError(\"Student implementation required\")\n", + " \n", + " # Add methods to Tensor class\n", + " Tensor.__add__ = __add__\n", + " Tensor.__sub__ = __sub__\n", + " Tensor.__mul__ = __mul__\n", + " Tensor.__truediv__ = __truediv__" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| hide \n", + "#| export\n", + "def _add_arithmetic_methods():\n", + " \"\"\"Add arithmetic operations to Tensor class.\"\"\"\n", + " \n", + " def __add__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n", + " \"\"\"Addition: tensor + other\"\"\"\n", + " if isinstance(other, Tensor):\n", + " return Tensor(self._data + other._data)\n", + " else: # scalar\n", + " return Tensor(self._data + other)\n", + " \n", + " def __sub__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n", + " \"\"\"Subtraction: tensor - other\"\"\"\n", + " if isinstance(other, Tensor):\n", + " return Tensor(self._data - other._data)\n", + " else: # scalar\n", + " return Tensor(self._data - other)\n", + " \n", + " def __mul__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n", + " \"\"\"Multiplication: tensor * other\"\"\"\n", + " if isinstance(other, Tensor):\n", + " return Tensor(self._data * other._data)\n", + " else: # scalar\n", + " return Tensor(self._data * other)\n", + " \n", + " def __truediv__(self, other: Union['Tensor', int, float]) -> 'Tensor':\n", + " \"\"\"Division: tensor / other\"\"\"\n", + " if isinstance(other, Tensor):\n", + " return Tensor(self._data / other._data)\n", + " else: # scalar\n", + " return Tensor(self._data / other)\n", + " \n", + " def __radd__(self, other: Union[int, float]) -> 'Tensor':\n", + " \"\"\"Reverse addition: scalar + tensor\"\"\"\n", + " return Tensor(other + self._data)\n", + " \n", + " def __rsub__(self, other: Union[int, float]) -> 'Tensor':\n", + " \"\"\"Reverse subtraction: scalar - tensor\"\"\"\n", + " return Tensor(other - self._data)\n", + " \n", + " def __rmul__(self, other: Union[int, float]) -> 'Tensor':\n", + " \"\"\"Reverse multiplication: scalar * tensor\"\"\"\n", + " return Tensor(other * self._data)\n", + " \n", + " def __rtruediv__(self, other: Union[int, float]) -> 'Tensor':\n", + " \"\"\"Reverse division: scalar / tensor\"\"\"\n", + " return Tensor(other / self._data)\n", + " \n", + " # Add methods to Tensor class\n", + " Tensor.__add__ = __add__\n", + " Tensor.__sub__ = __sub__\n", + " Tensor.__mul__ = __mul__\n", + " Tensor.__truediv__ = __truediv__\n", + " Tensor.__radd__ = __radd__\n", + " Tensor.__rsub__ = __rsub__\n", + " Tensor.__rmul__ = __rmul__\n", + " Tensor.__rtruediv__ = __rtruediv__\n", + "\n", + "# Call the function to add arithmetic methods\n", + "_add_arithmetic_methods()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "### \ud83e\uddea Test Your Arithmetic Operations\n", + "\n", + "Once you implement the arithmetic methods above, run this cell to test them:\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Test arithmetic operations\n", + "try:\n", + " print(\"=== Testing Arithmetic Operations ===\")\n", + " \n", + " a = Tensor([1, 2, 3])\n", + " b = Tensor([4, 5, 6])\n", + " \n", + " print(f\"a = {a}\")\n", + " print(f\"b = {b}\")\n", + " print()\n", + " \n", + " # Tensor + Tensor\n", + " print(f\"a + b = {a + b}\")\n", + " print(f\"a - b = {a - b}\")\n", + " print(f\"a * b = {a * b}\")\n", + " print(f\"a / b = {a / b}\")\n", + " print()\n", + " \n", + " # Tensor + Scalar\n", + " print(f\"a + 10 = {a + 10}\")\n", + " print(f\"a * 2 = {a * 2}\")\n", + " print()\n", + " \n", + " # Scalar + Tensor (reverse operations)\n", + " print(f\"10 + a = {10 + a}\")\n", + " print(f\"2 * a = {2 * a}\")\n", + " \n", + "except (NotImplementedError, AttributeError) as e:\n", + " print(f\"\u26a0\ufe0f {e}\")\n", + " print(\"Implement the arithmetic methods above first!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\"\"\"\n", + "## Step 3: Try the Export Process\n", + "\n", + "Now let's export our tensor code! In your terminal, run:\n", + "\n", + "```bash\n", + "python bin/tito.py sync --module tensor\n", + "```\n", + "\n", + "This will export the code marked with `#| export` to `tinytorch/core/tensor.py`.\n", + "\n", + "Then test it with:\n", + "\n", + "```bash\n", + "python bin/tito.py test --module tensor\n", + "```\n", + "\n", + "## Next Steps\n", + "\n", + "\ud83c\udf89 **Congratulations!** You've built the foundation of TinyTorch - the Tensor class. \n", + "\n", + "In the next modules, you'll add:\n", + "- **Automatic differentiation** (gradients)\n", + "- **Neural network layers**\n", + "- **Optimizers and training loops**\n", + "- **GPU acceleration**\n", + "\n", + "Each builds on this tensor foundation!\n", + "\"\"\"" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/tinytorch/core/layers.py b/tinytorch/core/layers.py index 6134438a..567b612a 100644 --- a/tinytorch/core/layers.py +++ b/tinytorch/core/layers.py @@ -1,7 +1,7 @@ # AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/layers/layers_dev.ipynb. # %% auto 0 -__all__ = ['Dense'] +__all__ = ['Dense', 'ReLU', 'Sigmoid', 'Tanh'] # %% ../../modules/layers/layers_dev.ipynb 2 import numpy as np @@ -10,9 +10,6 @@ import sys from typing import Union, Optional, Callable from .tensor import Tensor -# Import activation functions from the activations module -from .activations import ReLU, Sigmoid, Tanh - # Import our Tensor class # sys.path.append('../../') # from modules.tensor.tensor_dev import Tensor @@ -112,3 +109,130 @@ class Dense: def __call__(self, x: Tensor) -> Tensor: """Make layer callable: layer(x) same as layer.forward(x)""" return self.forward(x) + +# %% ../../modules/layers/layers_dev.ipynb 9 +class ReLU: + """ + ReLU Activation: f(x) = max(0, x) + + The most popular activation function in deep learning. + Simple, effective, and computationally efficient. + + TODO: Implement ReLU activation function. + """ + + def forward(self, x: Tensor) -> Tensor: + """ + Apply ReLU: f(x) = max(0, x) + + Args: + x: Input tensor + + Returns: + Output tensor with ReLU applied element-wise + + TODO: Implement element-wise max(0, x) operation + """ + raise NotImplementedError("Student implementation required") + + def __call__(self, x: Tensor) -> Tensor: + """Make activation callable: relu(x) same as relu.forward(x)""" + return self.forward(x) + +# %% ../../modules/layers/layers_dev.ipynb 10 +class ReLU: + """ReLU Activation: f(x) = max(0, x)""" + + def forward(self, x: Tensor) -> Tensor: + """Apply ReLU: f(x) = max(0, x)""" + return Tensor(np.maximum(0, x.data)) + + def __call__(self, x: Tensor) -> Tensor: + return self.forward(x) + +# %% ../../modules/layers/layers_dev.ipynb 11 +class Sigmoid: + """ + Sigmoid Activation: f(x) = 1 / (1 + e^(-x)) + + Squashes input to range (0, 1). Often used for binary classification. + + TODO: Implement Sigmoid activation function. + """ + + def forward(self, x: Tensor) -> Tensor: + """ + Apply Sigmoid: f(x) = 1 / (1 + e^(-x)) + + Args: + x: Input tensor + + Returns: + Output tensor with Sigmoid applied element-wise + + TODO: Implement sigmoid function (be careful with numerical stability!) + """ + raise NotImplementedError("Student implementation required") + + def __call__(self, x: Tensor) -> Tensor: + return self.forward(x) + +# %% ../../modules/layers/layers_dev.ipynb 12 +class Sigmoid: + """Sigmoid Activation: f(x) = 1 / (1 + e^(-x))""" + + def forward(self, x: Tensor) -> Tensor: + """Apply Sigmoid with numerical stability""" + # Use the numerically stable version to avoid overflow + # For x >= 0: sigmoid(x) = 1 / (1 + exp(-x)) + # For x < 0: sigmoid(x) = exp(x) / (1 + exp(x)) + x_data = x.data + result = np.zeros_like(x_data) + + # Stable computation + positive_mask = x_data >= 0 + result[positive_mask] = 1.0 / (1.0 + np.exp(-x_data[positive_mask])) + result[~positive_mask] = np.exp(x_data[~positive_mask]) / (1.0 + np.exp(x_data[~positive_mask])) + + return Tensor(result) + + def __call__(self, x: Tensor) -> Tensor: + return self.forward(x) + +# %% ../../modules/layers/layers_dev.ipynb 13 +class Tanh: + """ + Tanh Activation: f(x) = tanh(x) + + Squashes input to range (-1, 1). Zero-centered output. + + TODO: Implement Tanh activation function. + """ + + def forward(self, x: Tensor) -> Tensor: + """ + Apply Tanh: f(x) = tanh(x) + + Args: + x: Input tensor + + Returns: + Output tensor with Tanh applied element-wise + + TODO: Implement tanh function + """ + raise NotImplementedError("Student implementation required") + + def __call__(self, x: Tensor) -> Tensor: + return self.forward(x) + +# %% ../../modules/layers/layers_dev.ipynb 14 +class Tanh: + """Tanh Activation: f(x) = tanh(x)""" + + def forward(self, x: Tensor) -> Tensor: + """Apply Tanh""" + return Tensor(np.tanh(x.data)) + + def __call__(self, x: Tensor) -> Tensor: + return self.forward(x) diff --git a/tools/py_to_notebook.py b/tools/py_to_notebook.py old mode 100755 new mode 100644