diff --git a/bin/tito.py b/bin/tito.py
index 8f5e0a77..a3453d09 100755
--- a/bin/tito.py
+++ b/bin/tito.py
@@ -343,7 +343,7 @@ def cmd_info(args):
 
 def cmd_test(args):
     """Run tests for a specific module."""
-    valid_modules = ["setup", "tensor", "layers", "cnn", "data", "training", 
+    valid_modules = ["setup", "tensor", "activations", "layers", "cnn", "data", "training", 
                      "profiling", "compression", "kernels", "benchmarking", "mlops"]
     
     if args.all:
diff --git a/modules/activations/README.md b/modules/activations/README.md
new file mode 100644
index 00000000..9f37c496
--- /dev/null
+++ b/modules/activations/README.md
@@ -0,0 +1,237 @@
+# 🔥 TinyTorch Activations Module
+
+Welcome to the **Activations** module! This is where you'll implement the mathematical functions that give neural networks their power to learn complex patterns.
+
+## 🎯 Learning Objectives
+
+By the end of this module, you will:
+1. **Understand** why activation functions are essential for neural networks
+2. **Implement** the three most important activation functions: ReLU, Sigmoid, and Tanh
+3. **Test** your functions with various inputs to understand their behavior
+4. **Grasp** the mathematical properties that make each function useful
+
+## 🧠 Why This Module Matters
+
+**Without activation functions, neural networks are just linear transformations!**
+
+```
+Linear → Linear → Linear = Still just Linear
+Linear → Activation → Linear = Can learn complex patterns!
+```
+
+This module teaches you the mathematical foundations that make deep learning possible.
+
+## 📚 What You'll Build
+
+### 1. **ReLU** (Rectified Linear Unit)
+- **Formula**: `f(x) = max(0, x)`
+- **Properties**: Simple, sparse, unbounded
+- **Use case**: Hidden layers (most common)
+
+### 2. **Sigmoid** 
+- **Formula**: `f(x) = 1 / (1 + e^(-x))`
+- **Properties**: Bounded to (0,1), smooth, probabilistic
+- **Use case**: Binary classification, gates
+
+### 3. **Tanh** (Hyperbolic Tangent)
+- **Formula**: `f(x) = tanh(x)`
+- **Properties**: Bounded to (-1,1), zero-centered, smooth
+- **Use case**: Hidden layers, RNNs
+
+## 🚀 Getting Started
+
+### Development Workflow
+
+1. **Open the development file**:
+   ```bash
+   python bin/tito.py jupyter
+   # Then open modules/activations/activations_dev.py
+   ```
+
+2. **Implement the functions**:
+   - Start with ReLU (simplest)
+   - Move to Sigmoid (numerical stability challenge)
+   - Finish with Tanh (symmetry properties)
+
+3. **Visualize your functions**:
+   - Each function has plotting sections
+   - See how your implementation transforms inputs
+   - Compare all functions side-by-side
+
+4. **Test as you go**:
+   ```bash
+   python bin/tito.py test --module activations
+   ```
+
+5. **Export to package**:
+   ```bash
+   python bin/tito.py sync
+   ```
+
+### 📊 Visual Learning Features
+
+This module includes comprehensive plotting sections to help you understand:
+
+- **Individual Function Plots**: See each activation function's curve
+- **Implementation Comparison**: Your implementation vs ideal side-by-side
+- **Mathematical Explanations**: Visual breakdown of function properties
+- **Error Analysis**: Quantitative feedback on implementation accuracy
+- **Comprehensive Comparison**: All functions analyzed together
+
+**Enhanced Features**:
+- **4-Panel Plots**: Implementation vs ideal, mathematical definition, properties, error analysis
+- **Real-time Feedback**: Immediate accuracy scores with color-coded status
+- **Mathematical Insights**: Detailed explanations of function properties
+- **Numerical Stability Testing**: Verification with extreme values
+- **Property Verification**: Symmetry, monotonicity, and zero-centering tests
+
+**Why enhanced plots matter**: 
+- **Visual Debugging**: See exactly where your implementation differs
+- **Quantitative Feedback**: Get precise error measurements
+- **Mathematical Understanding**: Connect formulas to visual behavior
+- **Implementation Confidence**: Know immediately if your code is correct
+- **Learning Reinforcement**: Multiple visual perspectives of the same concept
+
+### Implementation Tips
+
+#### ReLU Implementation
+```python
+def forward(self, x: Tensor) -> Tensor:
+    return Tensor(np.maximum(0, x.data))
+```
+
+#### Sigmoid Implementation (Numerical Stability)
+```python
+def forward(self, x: Tensor) -> Tensor:
+    # For x >= 0: sigmoid(x) = 1 / (1 + exp(-x))
+    # For x < 0: sigmoid(x) = exp(x) / (1 + exp(x))
+    x_data = x.data
+    result = np.zeros_like(x_data)
+    
+    positive_mask = x_data >= 0
+    result[positive_mask] = 1.0 / (1.0 + np.exp(-x_data[positive_mask]))
+    result[~positive_mask] = np.exp(x_data[~positive_mask]) / (1.0 + np.exp(x_data[~positive_mask]))
+    
+    return Tensor(result)
+```
+
+#### Tanh Implementation
+```python
+def forward(self, x: Tensor) -> Tensor:
+    return Tensor(np.tanh(x.data))
+```
+
+## 🧪 Testing Your Implementation
+
+### Unit Tests
+```bash
+python bin/tito.py test --module activations
+```
+
+**Test Coverage**:
+- ✅ Mathematical correctness
+- ✅ Numerical stability
+- ✅ Shape preservation
+- ✅ Edge cases
+- ✅ Function properties
+
+### Manual Testing
+```python
+# Test all activations
+from tinytorch.core.tensor import Tensor
+from modules.activations.activations_dev import ReLU, Sigmoid, Tanh
+
+x = Tensor([[-2.0, -1.0, 0.0, 1.0, 2.0]])
+
+relu = ReLU()
+sigmoid = Sigmoid()
+tanh = Tanh()
+
+print("Input:", x.data)
+print("ReLU:", relu(x).data)
+print("Sigmoid:", sigmoid(x).data)
+print("Tanh:", tanh(x).data)
+```
+
+## 📊 Understanding Function Properties
+
+### Range Comparison
+| Function | Input Range | Output Range | Zero Point |
+|----------|-------------|--------------|------------|
+| ReLU     | (-∞, ∞)     | [0, ∞)       | f(0) = 0   |
+| Sigmoid  | (-∞, ∞)     | (0, 1)       | f(0) = 0.5 |
+| Tanh     | (-∞, ∞)     | (-1, 1)      | f(0) = 0   |
+
+### Key Properties
+- **ReLU**: Sparse (zeros out negatives), unbounded, simple
+- **Sigmoid**: Probabilistic (0-1 range), smooth, saturating
+- **Tanh**: Zero-centered, symmetric, stronger gradients than sigmoid
+
+## 🔧 Integration with TinyTorch
+
+After implementation, your activations will be available as:
+
+```python
+from tinytorch.core.activations import ReLU, Sigmoid, Tanh
+
+# Use in neural networks
+relu = ReLU()
+output = relu(input_tensor)
+```
+
+## 🎯 Common Issues & Solutions
+
+### Issue 1: Sigmoid Overflow
+**Problem**: `exp()` overflow with large inputs
+**Solution**: Use numerically stable implementation (see code above)
+
+### Issue 2: Wrong Output Range
+**Problem**: Sigmoid/Tanh outputs outside expected range
+**Solution**: Check your mathematical implementation
+
+### Issue 3: Shape Mismatch
+**Problem**: Output shape differs from input shape
+**Solution**: Ensure element-wise operations preserve shape
+
+### Issue 4: Import Errors
+**Problem**: Cannot import after implementation
+**Solution**: Run `python bin/tito.py sync` to export to package
+
+## 📈 Performance Considerations
+
+- **ReLU**: Fastest (simple max operation)
+- **Sigmoid**: Moderate (exponential computation)
+- **Tanh**: Moderate (hyperbolic function)
+
+All implementations use NumPy for vectorized operations.
+
+## 🚀 What's Next
+
+After mastering activations, you'll use them in:
+1. **Layers Module**: Building neural network layers
+2. **Loss Functions**: Computing training objectives
+3. **Advanced Architectures**: CNNs, RNNs, and more
+
+These functions are the mathematical foundation for everything that follows!
+
+## 📚 Further Reading
+
+**Mathematical Background**:
+- [Activation Functions in Neural Networks](https://en.wikipedia.org/wiki/Activation_function)
+- [Deep Learning Book - Chapter 6](http://www.deeplearningbook.org/)
+
+**Advanced Topics**:
+- ReLU variants (Leaky ReLU, ELU, Swish)
+- Activation function choice and impact
+- Gradient flow and vanishing gradients
+
+## 🎉 Success Criteria
+
+You've mastered this module when:
+- [ ] All tests pass (`python bin/tito.py test --module activations`)
+- [ ] You understand why each function is useful
+- [ ] You can explain the mathematical properties
+- [ ] You can use activations in neural networks
+- [ ] You appreciate the importance of nonlinearity
+
+**Great work! You've built the mathematical foundation of neural networks!** 🎉 
\ No newline at end of file
diff --git a/modules/activations/activations_dev.py b/modules/activations/activations_dev.py
new file mode 100644
index 00000000..582933ff
--- /dev/null
+++ b/modules/activations/activations_dev.py
@@ -0,0 +1,1162 @@
+# ---
+# jupyter:
+#   jupytext:
+#     text_representation:
+#       extension: .py
+#       format_name: percent
+#       format_version: '1.3'
+#       jupytext_version: 1.17.1
+# ---
+
+# %% [markdown]
+"""
+# 🔥 TinyTorch Activations Module
+
+Welcome to the **Activations** module! This is where you'll implement the mathematical functions that give neural networks their power.
+
+## 🎯 Learning Objectives
+
+By the end of this module, you will:
+1. **Understand** why activation functions are essential for neural networks
+2. **Implement** the three most important activation functions: ReLU, Sigmoid, and Tanh
+3. **Test** your functions with various inputs to understand their behavior
+4. **Use** these functions as building blocks for neural networks
+
+## 🧠 Why Activation Functions Matter
+
+**Without activation functions, neural networks are just linear transformations!**
+
+```
+Linear → Linear → Linear = Still just Linear
+Linear → Activation → Linear = Can learn complex patterns!
+```
+
+**Key insight**: Activation functions add **nonlinearity**, allowing networks to learn complex patterns that linear functions cannot capture.
+
+## 📚 What You'll Build
+
+- **ReLU**: `f(x) = max(0, x)` - The workhorse of deep learning
+- **Sigmoid**: `f(x) = 1 / (1 + e^(-x))` - Squashes to (0, 1)
+- **Tanh**: `f(x) = tanh(x)` - Squashes to (-1, 1)
+
+Each function serves different purposes and has different mathematical properties.
+
+---
+
+Let's start building! 🚀
+"""
+
+# %%
+#| default_exp core.activations
+
+# Standard library imports
+import math
+import numpy as np
+import matplotlib.pyplot as plt
+import os
+import sys
+
+# TinyTorch imports
+from tinytorch.core.tensor import Tensor
+
+# %%
+# Helper function to detect if we're in a testing environment
+def _should_show_plots():
+    """
+    Determine if we should show plots based on the execution context.
+    
+    Returns False if:
+    - Running in pytest (detected by 'pytest' in sys.modules)
+    - Running in test environment (detected by environment variables)
+    - Running from command line test runner
+    
+    Returns True if:
+    - Running in Jupyter notebook
+    - Running interactively in Python
+    """
+    # Check if we're running in pytest
+    if 'pytest' in sys.modules:
+        return False
+    
+    # Check if we're in a test environment
+    if os.environ.get('PYTEST_CURRENT_TEST'):
+        return False
+    
+    # Check if we're running from a test file (more specific check)
+    if any(arg.endswith('.py') and 'test_' in os.path.basename(arg) and 'tests/' in arg for arg in sys.argv):
+        return False
+    
+    # Check if we're running from the tito CLI test command
+    if len(sys.argv) > 0 and 'tito.py' in sys.argv[0] and 'test' in sys.argv:
+        return False
+    
+    # Default to showing plots (notebook/interactive environment)
+    return True
+
+# %% [markdown]
+"""
+## Step 1: ReLU Activation Function
+
+**ReLU** (Rectified Linear Unit) is the most popular activation function in deep learning.
+
+**Formula**: `f(x) = max(0, x)`
+
+**Properties**:
+- **Simple**: Easy to compute and understand
+- **Sparse**: Outputs exactly zero for negative inputs
+- **Unbounded**: No upper limit on positive outputs
+- **Non-saturating**: Doesn't suffer from vanishing gradients
+
+**When to use**: Almost everywhere! It's the default choice for hidden layers.
+"""
+
+# %%
+#| export
+class ReLU:
+    """
+    ReLU Activation: f(x) = max(0, x)
+    
+    The most popular activation function in deep learning.
+    Simple, effective, and computationally efficient.
+    
+    TODO: Implement ReLU activation function.
+    """
+    
+    def forward(self, x: Tensor) -> Tensor:
+        """
+        Apply ReLU: f(x) = max(0, x)
+        
+        Args:
+            x: Input tensor
+            
+        Returns:
+            Output tensor with ReLU applied element-wise
+            
+        TODO: Implement element-wise max(0, x) operation
+        Hint: Use np.maximum(0, x.data)
+        """
+        raise NotImplementedError("Student implementation required")
+    
+    def __call__(self, x: Tensor) -> Tensor:
+        """Make activation callable: relu(x) same as relu.forward(x)"""
+        return self.forward(x)
+
+# %%
+#| hide
+#| export
+class ReLU:
+    """ReLU Activation: f(x) = max(0, x)"""
+    
+    def forward(self, x: Tensor) -> Tensor:
+        """Apply ReLU: f(x) = max(0, x)"""
+        return Tensor(np.maximum(0, x.data))
+    
+    def __call__(self, x: Tensor) -> Tensor:
+        return self.forward(x)
+
+# %% [markdown]
+"""
+### 🧪 Test Your ReLU Function
+
+Once you implement ReLU above, run this cell to test it:
+"""
+
+# %%
+# Test ReLU function
+try:
+    print("=== Testing ReLU Function ===")
+    
+    # Test data: mix of positive, negative, and zero
+    x = Tensor([[-3.0, -1.0, 0.0, 1.0, 3.0]])
+    print(f"Input: {x.data}")
+    
+    # Test ReLU
+    relu = ReLU()
+    y = relu(x)
+    print(f"ReLU output: {y.data}")
+    print(f"Expected: [[0. 0. 0. 1. 3.]]")
+    
+    # Test with different shapes
+    x_2d = Tensor([[-2.0, 1.0], [0.5, -0.5]])
+    y_2d = relu(x_2d)
+    print(f"\n2D Input: {x_2d.data}")
+    print(f"2D ReLU output: {y_2d.data}")
+    
+    print("✅ ReLU working!")
+    
+except Exception as e:
+    print(f"❌ Error: {e}")
+    print("Make sure to implement the ReLU function above!")
+
+# %% [markdown]
+"""
+### 📊 Visualize ReLU Function
+
+Let's plot the ReLU function to see how it transforms inputs:
+"""
+
+# %%
+# Plot ReLU function
+try:
+    print("=== Plotting ReLU Function ===")
+    
+    # Create a range of input values
+    x_range = np.linspace(-5, 5, 100)
+    x_tensor = Tensor([x_range])
+    
+    # Apply ReLU (student implementation)
+    relu = ReLU()
+    y_tensor = relu(x_tensor)
+    y_range = y_tensor.data[0]
+    
+    # Create ideal ReLU for comparison
+    y_ideal = np.maximum(0, x_range)
+    
+    # Only show plots if we're not in a testing environment
+    if _should_show_plots():
+        # Create the plot
+        plt.figure(figsize=(12, 8))
+        
+        # Plot both student implementation and ideal
+        plt.subplot(2, 2, 1)
+        plt.plot(x_range, y_range, 'b-', linewidth=3, label='Your ReLU Implementation')
+        plt.plot(x_range, y_ideal, 'r--', linewidth=2, alpha=0.7, label='Ideal ReLU')
+        plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)
+        plt.axvline(x=0, color='k', linestyle='-', alpha=0.3)
+        plt.xlabel('Input (x)')
+        plt.ylabel('Output')
+        plt.title('ReLU: Your Implementation vs Ideal')
+        plt.grid(True, alpha=0.3)
+        plt.legend()
+        plt.xlim(-5, 5)
+        plt.ylim(-1, 5)
+        
+        # Mathematical explanation plot
+        plt.subplot(2, 2, 2)
+        # Show the mathematical definition
+        x_math = np.array([-3, -2, -1, 0, 1, 2, 3])
+        y_math = np.maximum(0, x_math)
+        plt.stem(x_math, y_math, basefmt=' ', linefmt='g-', markerfmt='go')
+        plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)
+        plt.axvline(x=0, color='k', linestyle='-', alpha=0.3)
+        plt.xlabel('Input (x)')
+        plt.ylabel('max(0, x)')
+        plt.title('Mathematical Definition: max(0, x)')
+        plt.grid(True, alpha=0.3)
+        plt.xlim(-4, 4)
+        plt.ylim(-0.5, 3.5)
+        
+        # Show the piecewise nature
+        plt.subplot(2, 2, 3)
+        x_left = np.linspace(-5, 0, 50)
+        x_right = np.linspace(0, 5, 50)
+        plt.plot(x_left, np.zeros_like(x_left), 'r-', linewidth=3, label='f(x) = 0 for x < 0')
+        plt.plot(x_right, x_right, 'b-', linewidth=3, label='f(x) = x for x ≥ 0')
+        plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)
+        plt.axvline(x=0, color='k', linestyle='-', alpha=0.3)
+        plt.xlabel('Input (x)')
+        plt.ylabel('Output')
+        plt.title('Piecewise Function Definition')
+        plt.grid(True, alpha=0.3)
+        plt.legend()
+        plt.xlim(-5, 5)
+        plt.ylim(-1, 5)
+        
+        # Error analysis
+        plt.subplot(2, 2, 4)
+        difference = np.abs(y_range - y_ideal)
+        max_error = np.max(difference)
+        plt.plot(x_range, difference, 'purple', linewidth=2)
+        plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)
+        plt.xlabel('Input (x)')
+        plt.ylabel('|Your Output - Ideal Output|')
+        plt.title(f'Implementation Error (Max: {max_error:.6f})')
+        plt.grid(True, alpha=0.3)
+        plt.xlim(-5, 5)
+        
+        plt.tight_layout()
+        plt.show()
+        
+        # Print analysis
+        print(f"\n📊 Analysis:")
+        print(f"✅ Maximum error: {max_error:.10f}")
+        if max_error < 1e-10:
+            print("🎉 Perfect implementation!")
+        elif max_error < 1e-6:
+            print("🌟 Excellent implementation!")
+        elif max_error < 1e-3:
+            print("👍 Good implementation!")
+        else:
+            print("🔧 Implementation needs work.")
+            
+        print(f"📈 Function properties:")
+        print(f"   • Range: [0, ∞)")
+        print(f"   • Piecewise: f(x) = 0 for x < 0, f(x) = x for x ≥ 0")
+        print(f"   • Monotonic: Always increasing for x ≥ 0")
+        print(f"   • Sparse: Exactly zero for negative inputs")
+    else:
+        print("📊 Plots disabled during testing - this is normal!")
+        
+    # Always show the mathematical analysis
+    difference = np.abs(y_range - y_ideal)
+    max_error = np.max(difference)
+    print(f"\n📊 Mathematical Analysis:")
+    print(f"✅ Maximum error: {max_error:.10f}")
+    if max_error < 1e-10:
+        print("🎉 Perfect implementation!")
+    elif max_error < 1e-6:
+        print("🌟 Excellent implementation!")
+    elif max_error < 1e-3:
+        print("👍 Good implementation!")
+    else:
+        print("🔧 Implementation needs work.")
+        
+except Exception as e:
+    print(f"❌ Error in plotting: {e}")
+    print("Make sure to implement the ReLU function above!")
+
+# %% [markdown]
+"""
+## Step 2: Sigmoid Activation Function
+
+**Sigmoid** squashes any input to the range (0, 1), making it useful for probabilities.
+
+**Formula**: `f(x) = 1 / (1 + e^(-x))`
+
+**Properties**:
+- **Bounded**: Always outputs between 0 and 1
+- **Smooth**: Differentiable everywhere
+- **S-shaped**: Smooth transition from 0 to 1
+- **Saturating**: Can suffer from vanishing gradients
+
+**When to use**: Binary classification (final layer), gates in RNNs/LSTMs.
+
+**⚠️ Numerical Stability**: Be careful with large inputs to avoid overflow!
+"""
+
+# %%
+#| export
+class Sigmoid:
+    """
+    Sigmoid Activation: f(x) = 1 / (1 + e^(-x))
+    
+    Squashes input to range (0, 1). Often used for binary classification.
+    
+    TODO: Implement Sigmoid activation function.
+    """
+    
+    def forward(self, x: Tensor) -> Tensor:
+        """
+        Apply Sigmoid: f(x) = 1 / (1 + e^(-x))
+        
+        Args:
+            x: Input tensor
+            
+        Returns:
+            Output tensor with Sigmoid applied element-wise
+            
+        TODO: Implement sigmoid function (be careful with numerical stability!)
+        
+        Hint: For numerical stability, use:
+        - For x >= 0: sigmoid(x) = 1 / (1 + exp(-x))
+        - For x < 0: sigmoid(x) = exp(x) / (1 + exp(x))
+        """
+        raise NotImplementedError("Student implementation required")
+    
+    def __call__(self, x: Tensor) -> Tensor:
+        return self.forward(x)
+
+# %%
+#| hide
+#| export
+class Sigmoid:
+    """Sigmoid Activation: f(x) = 1 / (1 + e^(-x))"""
+    
+    def forward(self, x: Tensor) -> Tensor:
+        """Apply Sigmoid with numerical stability"""
+        # Use the numerically stable version to avoid overflow
+        # For x >= 0: sigmoid(x) = 1 / (1 + exp(-x))
+        # For x < 0: sigmoid(x) = exp(x) / (1 + exp(x))
+        x_data = x.data
+        result = np.zeros_like(x_data)
+        
+        # Stable computation
+        positive_mask = x_data >= 0
+        result[positive_mask] = 1.0 / (1.0 + np.exp(-x_data[positive_mask]))
+        result[~positive_mask] = np.exp(x_data[~positive_mask]) / (1.0 + np.exp(x_data[~positive_mask]))
+        
+        return Tensor(result)
+    
+    def __call__(self, x: Tensor) -> Tensor:
+        return self.forward(x)
+
+# %% [markdown]
+"""
+### 🧪 Test Your Sigmoid Function
+
+Once you implement Sigmoid above, run this cell to test it:
+"""
+
+# %%
+# Test Sigmoid function
+try:
+    print("=== Testing Sigmoid Function ===")
+    
+    # Test data: mix of positive, negative, and zero
+    x = Tensor([[-5.0, -1.0, 0.0, 1.0, 5.0]])
+    print(f"Input: {x.data}")
+    
+    # Test Sigmoid
+    sigmoid = Sigmoid()
+    y = sigmoid(x)
+    print(f"Sigmoid output: {y.data}")
+    print("Expected: values between 0 and 1")
+    print(f"All values in (0,1)? {np.all((y.data > 0) & (y.data < 1))}")
+    
+    # Test specific values
+    x_zero = Tensor([[0.0]])
+    y_zero = sigmoid(x_zero)
+    print(f"\nSigmoid(0) = {y_zero.data[0, 0]:.4f} (should be 0.5)")
+    
+    # Test extreme values (numerical stability)
+    x_extreme = Tensor([[-100.0, 100.0]])
+    y_extreme = sigmoid(x_extreme)
+    print(f"Sigmoid([-100, 100]) = {y_extreme.data}")
+    print("Should be close to [0, 1] without overflow errors")
+    
+    print("✅ Sigmoid working!")
+    
+except Exception as e:
+    print(f"❌ Error: {e}")
+    print("Make sure to implement the Sigmoid function above!")
+
+# %% [markdown]
+"""
+### 📊 Visualize Sigmoid Function
+
+Let's plot the Sigmoid function to see its S-shaped curve:
+"""
+
+# %%
+# Plot Sigmoid function
+try:
+    print("=== Plotting Sigmoid Function ===")
+    
+    # Create a range of input values
+    x_range = np.linspace(-10, 10, 100)
+    x_tensor = Tensor([x_range])
+    
+    # Apply Sigmoid (student implementation)
+    sigmoid = Sigmoid()
+    y_tensor = sigmoid(x_tensor)
+    y_range = y_tensor.data[0]
+    
+    # Create ideal Sigmoid for comparison
+    y_ideal = 1.0 / (1.0 + np.exp(-x_range))
+    
+    # Only show plots if we're not in a testing environment
+    if _should_show_plots():
+        # Create the plot
+        plt.figure(figsize=(12, 8))
+        
+        # Plot both student implementation and ideal
+        plt.subplot(2, 2, 1)
+        plt.plot(x_range, y_range, 'g-', linewidth=3, label='Your Sigmoid Implementation')
+        plt.plot(x_range, y_ideal, 'r--', linewidth=2, alpha=0.7, label='Ideal Sigmoid')
+        plt.axhline(y=0.5, color='orange', linestyle='--', alpha=0.5, label='y = 0.5')
+        plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)
+        plt.axhline(y=1, color='k', linestyle='-', alpha=0.3)
+        plt.axvline(x=0, color='k', linestyle='-', alpha=0.3)
+        plt.xlabel('Input (x)')
+        plt.ylabel('Output')
+        plt.title('Sigmoid: Your Implementation vs Ideal')
+        plt.grid(True, alpha=0.3)
+        plt.legend()
+        plt.xlim(-10, 10)
+        plt.ylim(-0.1, 1.1)
+        
+        # Mathematical explanation plot
+        plt.subplot(2, 2, 2)
+        # Show key points
+        x_key = np.array([-5, -2, -1, 0, 1, 2, 5])
+        y_key = 1.0 / (1.0 + np.exp(-x_key))
+        plt.stem(x_key, y_key, basefmt=' ', linefmt='orange', markerfmt='o')
+        plt.axhline(y=0.5, color='orange', linestyle='--', alpha=0.5)
+        plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)
+        plt.axhline(y=1, color='k', linestyle='-', alpha=0.3)
+        plt.axvline(x=0, color='k', linestyle='-', alpha=0.3)
+        plt.xlabel('Input (x)')
+        plt.ylabel('1/(1+e^(-x))')
+        plt.title('Mathematical Definition: 1/(1+e^(-x))')
+        plt.grid(True, alpha=0.3)
+        plt.xlim(-6, 6)
+        plt.ylim(-0.1, 1.1)
+        
+        # Show the S-curve properties
+        plt.subplot(2, 2, 3)
+        x_detailed = np.linspace(-8, 8, 200)
+        y_detailed = 1.0 / (1.0 + np.exp(-x_detailed))
+        plt.plot(x_detailed, y_detailed, 'g-', linewidth=3)
+        # Add asymptotes
+        plt.axhline(y=0, color='r', linestyle='--', alpha=0.7, label='Lower asymptote: y = 0')
+        plt.axhline(y=1, color='r', linestyle='--', alpha=0.7, label='Upper asymptote: y = 1')
+        plt.axhline(y=0.5, color='orange', linestyle='--', alpha=0.7, label='Midpoint: y = 0.5')
+        plt.axvline(x=0, color='k', linestyle='-', alpha=0.3)
+        plt.xlabel('Input (x)')
+        plt.ylabel('Output')
+        plt.title('S-Curve Properties')
+        plt.grid(True, alpha=0.3)
+        plt.legend()
+        plt.xlim(-8, 8)
+        plt.ylim(-0.1, 1.1)
+        
+        # Error analysis
+        plt.subplot(2, 2, 4)
+        difference = np.abs(y_range - y_ideal)
+        max_error = np.max(difference)
+        plt.plot(x_range, difference, 'purple', linewidth=2)
+        plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)
+        plt.xlabel('Input (x)')
+        plt.ylabel('|Your Output - Ideal Output|')
+        plt.title(f'Implementation Error (Max: {max_error:.6f})')
+        plt.grid(True, alpha=0.3)
+        plt.xlim(-10, 10)
+        
+        plt.tight_layout()
+        plt.show()
+        
+        # Print analysis
+        print(f"\n📊 Analysis:")
+        print(f"✅ Maximum error: {max_error:.10f}")
+        if max_error < 1e-10:
+            print("🎉 Perfect implementation!")
+        elif max_error < 1e-6:
+            print("🌟 Excellent implementation!")
+        elif max_error < 1e-3:
+            print("👍 Good implementation!")
+        else:
+            print("🔧 Implementation needs work.")
+            
+        print(f"📈 Function properties:")
+        print(f"   • Range: (0, 1)")
+        print(f"   • Symmetric around (0, 0.5)")
+        print(f"   • Smooth and differentiable everywhere")
+        print(f"   • Saturates for large |x| (vanishing gradient problem)")
+        print(f"   • Useful for binary classification (outputs probabilities)")
+    else:
+        print("📊 Plots disabled during testing - this is normal!")
+        
+    # Always show the mathematical analysis
+    difference = np.abs(y_range - y_ideal)
+    max_error = np.max(difference)
+    print(f"\n📊 Mathematical Analysis:")
+    print(f"✅ Maximum error: {max_error:.10f}")
+    if max_error < 1e-10:
+        print("🎉 Perfect implementation!")
+    elif max_error < 1e-6:
+        print("🌟 Excellent implementation!")
+    elif max_error < 1e-3:
+        print("👍 Good implementation!")
+    else:
+        print("🔧 Implementation needs work.")
+        
+except Exception as e:
+    print(f"❌ Error in plotting: {e}")
+    print("Make sure to implement the Sigmoid function above!")
+
+# %% [markdown]
+"""
+## Step 3: Tanh Activation Function
+
+**Tanh** (Hyperbolic Tangent) squashes inputs to the range (-1, 1).
+
+**Formula**: `f(x) = tanh(x) = (e^x - e^(-x)) / (e^x + e^(-x))`
+
+**Properties**:
+- **Bounded**: Always outputs between -1 and 1
+- **Zero-centered**: Output is centered around 0
+- **Smooth**: Differentiable everywhere
+- **Stronger gradients**: Than sigmoid around zero
+
+**When to use**: Hidden layers when you want zero-centered outputs, RNNs.
+
+**Advantage over Sigmoid**: Zero-centered outputs help with gradient flow.
+"""
+
+# %%
+#| export
+class Tanh:
+    """
+    Tanh Activation: f(x) = tanh(x)
+    
+    Squashes input to range (-1, 1). Zero-centered output.
+    
+    TODO: Implement Tanh activation function.
+    """
+    
+    def forward(self, x: Tensor) -> Tensor:
+        """
+        Apply Tanh: f(x) = tanh(x)
+        
+        Args:
+            x: Input tensor
+            
+        Returns:
+            Output tensor with Tanh applied element-wise
+            
+        TODO: Implement tanh function
+        Hint: Use np.tanh(x.data)
+        """
+        raise NotImplementedError("Student implementation required")
+    
+    def __call__(self, x: Tensor) -> Tensor:
+        return self.forward(x)
+
+# %%
+#| hide
+#| export
+class Tanh:
+    """Tanh Activation: f(x) = tanh(x)"""
+    
+    def forward(self, x: Tensor) -> Tensor:
+        """Apply Tanh"""
+        return Tensor(np.tanh(x.data))
+    
+    def __call__(self, x: Tensor) -> Tensor:
+        return self.forward(x)
+
+# %% [markdown]
+"""
+### 🧪 Test Your Tanh Function
+
+Once you implement Tanh above, run this cell to test it:
+"""
+
+# %%
+# Test Tanh function
+try:
+    print("=== Testing Tanh Function ===")
+    
+    # Test data: mix of positive, negative, and zero
+    x = Tensor([[-3.0, -1.0, 0.0, 1.0, 3.0]])
+    print(f"Input: {x.data}")
+    
+    # Test Tanh
+    tanh = Tanh()
+    y = tanh(x)
+    print(f"Tanh output: {y.data}")
+    print("Expected: values between -1 and 1")
+    print(f"All values in (-1,1)? {np.all((y.data > -1) & (y.data < 1))}")
+    
+    # Test specific values
+    x_zero = Tensor([[0.0]])
+    y_zero = tanh(x_zero)
+    print(f"\nTanh(0) = {y_zero.data[0, 0]:.4f} (should be 0.0)")
+    
+    # Test extreme values
+    x_extreme = Tensor([[-10.0, 10.0]])
+    y_extreme = tanh(x_extreme)
+    print(f"Tanh([-10, 10]) = {y_extreme.data}")
+    print("Should be close to [-1, 1]")
+    
+    print("✅ Tanh working!")
+    
+except Exception as e:
+    print(f"❌ Error: {e}")
+    print("Make sure to implement the Tanh function above!")
+
+# %% [markdown]
+"""
+### 📊 Visualize Tanh Function
+
+Let's plot the Tanh function to see its zero-centered S-shaped curve:
+"""
+
+# %%
+# Plot Tanh function
+try:
+    print("=== Plotting Tanh Function ===")
+    
+    # Create a range of input values
+    x_range = np.linspace(-5, 5, 100)
+    x_tensor = Tensor([x_range])
+    
+    # Apply Tanh (student implementation)
+    tanh = Tanh()
+    y_tensor = tanh(x_tensor)
+    y_range = y_tensor.data[0]
+    
+    # Create ideal Tanh for comparison
+    y_ideal = np.tanh(x_range)
+    
+    # Only show plots if we're not in a testing environment
+    if _should_show_plots():
+        # Create the plot
+        plt.figure(figsize=(12, 8))
+        
+        # Plot both student implementation and ideal
+        plt.subplot(2, 2, 1)
+        plt.plot(x_range, y_range, 'orange', linewidth=3, label='Your Tanh Implementation')
+        plt.plot(x_range, y_ideal, 'r--', linewidth=2, alpha=0.7, label='Ideal Tanh')
+        plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)
+        plt.axhline(y=1, color='k', linestyle='--', alpha=0.3)
+        plt.axhline(y=-1, color='k', linestyle='--', alpha=0.3)
+        plt.axvline(x=0, color='k', linestyle='-', alpha=0.3)
+        plt.xlabel('Input (x)')
+        plt.ylabel('Output')
+        plt.title('Tanh: Your Implementation vs Ideal')
+        plt.grid(True, alpha=0.3)
+        plt.legend()
+        plt.xlim(-5, 5)
+        plt.ylim(-1.2, 1.2)
+        
+        # Mathematical explanation plot
+        plt.subplot(2, 2, 2)
+        # Show key points
+        x_key = np.array([-3, -2, -1, 0, 1, 2, 3])
+        y_key = np.tanh(x_key)
+        plt.stem(x_key, y_key, basefmt=' ', linefmt='purple', markerfmt='o')
+        plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)
+        plt.axhline(y=1, color='k', linestyle='--', alpha=0.3)
+        plt.axhline(y=-1, color='k', linestyle='--', alpha=0.3)
+        plt.axvline(x=0, color='k', linestyle='-', alpha=0.3)
+        plt.xlabel('Input (x)')
+        plt.ylabel('tanh(x)')
+        plt.title('Mathematical Definition: tanh(x)')
+        plt.grid(True, alpha=0.3)
+        plt.xlim(-4, 4)
+        plt.ylim(-1.2, 1.2)
+        
+        # Show symmetry property
+        plt.subplot(2, 2, 3)
+        x_sym = np.linspace(-4, 4, 100)
+        y_sym = np.tanh(x_sym)
+        plt.plot(x_sym, y_sym, 'orange', linewidth=3, label='tanh(x)')
+        plt.plot(-x_sym, -y_sym, 'b--', linewidth=2, alpha=0.7, label='-tanh(-x)')
+        plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)
+        plt.axhline(y=1, color='r', linestyle='--', alpha=0.7, label='Upper asymptote: y = 1')
+        plt.axhline(y=-1, color='r', linestyle='--', alpha=0.7, label='Lower asymptote: y = -1')
+        plt.axvline(x=0, color='k', linestyle='-', alpha=0.3)
+        plt.xlabel('Input (x)')
+        plt.ylabel('Output')
+        plt.title('Symmetry: tanh(-x) = -tanh(x)')
+        plt.grid(True, alpha=0.3)
+        plt.legend()
+        plt.xlim(-4, 4)
+        plt.ylim(-1.2, 1.2)
+        
+        # Error analysis
+        plt.subplot(2, 2, 4)
+        difference = np.abs(y_range - y_ideal)
+        max_error = np.max(difference)
+        plt.plot(x_range, difference, 'purple', linewidth=2)
+        plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)
+        plt.xlabel('Input (x)')
+        plt.ylabel('|Your Output - Ideal Output|')
+        plt.title(f'Implementation Error (Max: {max_error:.6f})')
+        plt.grid(True, alpha=0.3)
+        plt.xlim(-5, 5)
+        
+        plt.tight_layout()
+        plt.show()
+        
+        # Print analysis
+        print(f"\n📊 Analysis:")
+        print(f"✅ Maximum error: {max_error:.10f}")
+        if max_error < 1e-10:
+            print("🎉 Perfect implementation!")
+        elif max_error < 1e-6:
+            print("🌟 Excellent implementation!")
+        elif max_error < 1e-3:
+            print("👍 Good implementation!")
+        else:
+            print("🔧 Implementation needs work.")
+            
+        print(f"📈 Function properties:")
+        print(f"   • Range: (-1, 1)")
+        print(f"   • Odd function: tanh(-x) = -tanh(x)")
+        print(f"   • Symmetric around origin (0, 0)")
+        print(f"   • Smooth and differentiable everywhere")
+        print(f"   • Stronger gradients than sigmoid around zero")
+        print(f"   • Related to sigmoid: tanh(x) = 2*sigmoid(2x) - 1")
+    else:
+        print("📊 Plots disabled during testing - this is normal!")
+        
+    # Always show the mathematical analysis
+    difference = np.abs(y_range - y_ideal)
+    max_error = np.max(difference)
+    print(f"\n📊 Mathematical Analysis:")
+    print(f"✅ Maximum error: {max_error:.10f}")
+    if max_error < 1e-10:
+        print("🎉 Perfect implementation!")
+    elif max_error < 1e-6:
+        print("🌟 Excellent implementation!")
+    elif max_error < 1e-3:
+        print("👍 Good implementation!")
+    else:
+        print("🔧 Implementation needs work.")
+        
+except Exception as e:
+    print(f"❌ Error in plotting: {e}")
+    print("Make sure to implement the Tanh function above!")
+
+# %% [markdown]
+"""
+## Step 4: Compare All Activation Functions
+
+Let's see how all three functions behave on the same input:
+"""
+
+# %%
+# Compare all activation functions
+try:
+    print("=== Comparing All Activation Functions ===")
+    
+    # Test data: range from -5 to 5
+    x = Tensor([[-5.0, -2.0, -1.0, 0.0, 1.0, 2.0, 5.0]])
+    print(f"Input: {x.data}")
+    
+    # Apply all activations
+    relu = ReLU()
+    sigmoid = Sigmoid()
+    tanh = Tanh()
+    
+    y_relu = relu(x)
+    y_sigmoid = sigmoid(x)
+    y_tanh = tanh(x)
+    
+    print(f"\nReLU:    {y_relu.data}")
+    print(f"Sigmoid: {y_sigmoid.data}")
+    print(f"Tanh:    {y_tanh.data}")
+    
+    print("\n📊 Key Differences:")
+    print("- ReLU: Zeros out negative values, unbounded positive")
+    print("- Sigmoid: Squashes to (0, 1), always positive")
+    print("- Tanh: Squashes to (-1, 1), zero-centered")
+    
+    print("\n✅ All activation functions working!")
+    
+except Exception as e:
+    print(f"❌ Error: {e}")
+    print("Make sure to implement all activation functions above!")
+
+# %% [markdown]
+"""
+### 📊 Comprehensive Activation Function Comparison
+
+Let's plot all three functions together to see their differences:
+"""
+
+# %%
+# Plot all activation functions together
+try:
+    print("=== Plotting All Activation Functions Together ===")
+    
+    # Create a range of input values
+    x_range = np.linspace(-5, 5, 100)
+    x_tensor = Tensor([x_range])
+    
+    # Apply all activations (student implementations)
+    relu = ReLU()
+    sigmoid = Sigmoid()
+    tanh = Tanh()
+    
+    y_relu = relu(x_tensor).data[0]
+    y_sigmoid = sigmoid(x_tensor).data[0]
+    y_tanh = tanh(x_tensor).data[0]
+    
+    # Create ideal functions for comparison
+    y_relu_ideal = np.maximum(0, x_range)
+    y_sigmoid_ideal = 1.0 / (1.0 + np.exp(-x_range))
+    y_tanh_ideal = np.tanh(x_range)
+    
+    # Only show plots if we're not in a testing environment
+    if _should_show_plots():
+        # Create the comprehensive plot
+        plt.figure(figsize=(15, 10))
+        
+        # Main comparison plot
+        plt.subplot(2, 3, (1, 2))
+        plt.plot(x_range, y_relu, 'b-', linewidth=3, label='Your ReLU')
+        plt.plot(x_range, y_sigmoid, 'g-', linewidth=3, label='Your Sigmoid')
+        plt.plot(x_range, y_tanh, 'orange', linewidth=3, label='Your Tanh')
+        
+        # Add ideal functions as dashed lines
+        plt.plot(x_range, y_relu_ideal, 'b--', linewidth=1, alpha=0.7, label='Ideal ReLU')
+        plt.plot(x_range, y_sigmoid_ideal, 'g--', linewidth=1, alpha=0.7, label='Ideal Sigmoid')
+        plt.plot(x_range, y_tanh_ideal, '--', color='orange', linewidth=1, alpha=0.7, label='Ideal Tanh')
+        
+        # Add reference lines
+        plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)
+        plt.axhline(y=1, color='k', linestyle='--', alpha=0.3)
+        plt.axhline(y=-1, color='k', linestyle='--', alpha=0.3)
+        plt.axvline(x=0, color='k', linestyle='-', alpha=0.3)
+        
+        # Formatting
+        plt.xlabel('Input (x)', fontsize=12)
+        plt.ylabel('Output f(x)', fontsize=12)
+        plt.title('Activation Functions: Your Implementation vs Ideal', fontsize=14, fontweight='bold')
+        plt.grid(True, alpha=0.3)
+        plt.legend(fontsize=10, loc='upper left')
+        plt.xlim(-5, 5)
+        plt.ylim(-1.5, 5)
+        
+        # Mathematical definitions
+        plt.subplot(2, 3, 3)
+        plt.text(0.05, 0.95, 'Mathematical Definitions:', fontsize=12, fontweight='bold', 
+                 transform=plt.gca().transAxes, verticalalignment='top')
+        plt.text(0.05, 0.85, 'ReLU:', fontsize=11, fontweight='bold', color='blue',
+                 transform=plt.gca().transAxes, verticalalignment='top')
+        plt.text(0.05, 0.80, 'f(x) = max(0, x)', fontsize=10, fontfamily='monospace',
+                 transform=plt.gca().transAxes, verticalalignment='top')
+        plt.text(0.05, 0.70, 'Sigmoid:', fontsize=11, fontweight='bold', color='green',
+                 transform=plt.gca().transAxes, verticalalignment='top')
+        plt.text(0.05, 0.65, 'f(x) = 1/(1+e^(-x))', fontsize=10, fontfamily='monospace',
+                 transform=plt.gca().transAxes, verticalalignment='top')
+        plt.text(0.05, 0.55, 'Tanh:', fontsize=11, fontweight='bold', color='orange',
+                 transform=plt.gca().transAxes, verticalalignment='top')
+        plt.text(0.05, 0.50, 'f(x) = tanh(x)', fontsize=10, fontfamily='monospace',
+                 transform=plt.gca().transAxes, verticalalignment='top')
+        plt.text(0.05, 0.45, '     = (e^x-e^(-x))/(e^x+e^(-x))', fontsize=10, fontfamily='monospace',
+                 transform=plt.gca().transAxes, verticalalignment='top')
+        
+        plt.text(0.05, 0.30, 'Key Properties:', fontsize=12, fontweight='bold',
+                 transform=plt.gca().transAxes, verticalalignment='top')
+        plt.text(0.05, 0.25, '• ReLU: Sparse, unbounded', fontsize=10, color='blue',
+                 transform=plt.gca().transAxes, verticalalignment='top')
+        plt.text(0.05, 0.20, '• Sigmoid: Bounded (0,1)', fontsize=10, color='green',
+                 transform=plt.gca().transAxes, verticalalignment='top')
+        plt.text(0.05, 0.15, '• Tanh: Zero-centered (-1,1)', fontsize=10, color='orange',
+                 transform=plt.gca().transAxes, verticalalignment='top')
+        plt.axis('off')
+        
+        # Error analysis for ReLU
+        plt.subplot(2, 3, 4)
+        error_relu = np.abs(y_relu - y_relu_ideal)
+        plt.plot(x_range, error_relu, 'b-', linewidth=2)
+        plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)
+        plt.xlabel('Input (x)')
+        plt.ylabel('Error')
+        plt.title(f'ReLU Error (Max: {np.max(error_relu):.2e})')
+        plt.grid(True, alpha=0.3)
+        plt.xlim(-5, 5)
+        
+        # Error analysis for Sigmoid
+        plt.subplot(2, 3, 5)
+        error_sigmoid = np.abs(y_sigmoid - y_sigmoid_ideal)
+        plt.plot(x_range, error_sigmoid, 'g-', linewidth=2)
+        plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)
+        plt.xlabel('Input (x)')
+        plt.ylabel('Error')
+        plt.title(f'Sigmoid Error (Max: {np.max(error_sigmoid):.2e})')
+        plt.grid(True, alpha=0.3)
+        plt.xlim(-5, 5)
+        
+        # Error analysis for Tanh
+        plt.subplot(2, 3, 6)
+        error_tanh = np.abs(y_tanh - y_tanh_ideal)
+        plt.plot(x_range, error_tanh, 'orange', linewidth=2)
+        plt.axhline(y=0, color='k', linestyle='-', alpha=0.3)
+        plt.xlabel('Input (x)')
+        plt.ylabel('Error')
+        plt.title(f'Tanh Error (Max: {np.max(error_tanh):.2e})')
+        plt.grid(True, alpha=0.3)
+        plt.xlim(-5, 5)
+        
+        plt.tight_layout()
+        plt.show()
+        
+        # Comprehensive analysis
+        print("\n📊 Comprehensive Analysis:")
+        print("=" * 60)
+        
+        # Function ranges
+        print("📈 Output Ranges:")
+        print(f"  ReLU:    [{np.min(y_relu):.3f}, {np.max(y_relu):.3f}]")
+        print(f"  Sigmoid: [{np.min(y_sigmoid):.3f}, {np.max(y_sigmoid):.3f}]")
+        print(f"  Tanh:    [{np.min(y_tanh):.3f}, {np.max(y_tanh):.3f}]")
+        
+        # Implementation accuracy
+        print("\n🎯 Implementation Accuracy:")
+        max_errors = [np.max(error_relu), np.max(error_sigmoid), np.max(error_tanh)]
+        functions = ['ReLU', 'Sigmoid', 'Tanh']
+        
+        for func, error in zip(functions, max_errors):
+            if error < 1e-10:
+                status = "✅ PERFECT"
+            elif error < 1e-6:
+                status = "✅ EXCELLENT"
+            elif error < 1e-3:
+                status = "⚠️  GOOD"
+            else:
+                status = "❌ NEEDS WORK"
+            print(f"  {func:8s}: {status:12s} (error: {error:.2e})")
+        
+        # Mathematical properties verification
+        print("\n🔍 Mathematical Properties:")
+        
+        # Zero-centered test
+        x_zero = Tensor([[0.0]])
+        print("  Zero-centered test (f(0) should be 0):")
+        for name, func in [("ReLU", relu), ("Sigmoid", sigmoid), ("Tanh", tanh)]:
+            output = func(x_zero).data[0, 0]
+            is_zero = abs(output) < 1e-6
+            expected = 0.0 if name != "Sigmoid" else 0.5
+            print(f"    {name:8s}: f(0) = {output:.4f} {'✅' if abs(output - expected) < 1e-6 else '❌'}")
+        
+        # Monotonicity test
+        print("  Monotonicity test (should be increasing):")
+        test_vals = np.array([-2, -1, 0, 1, 2])
+        x_test = Tensor([test_vals])
+        for name, func in [("ReLU", relu), ("Sigmoid", sigmoid), ("Tanh", tanh)]:
+            outputs = func(x_test).data[0]
+            is_monotonic = np.all(outputs[1:] >= outputs[:-1])
+            print(f"    {name:8s}: {'✅ Monotonic' if is_monotonic else '❌ Not monotonic'}")
+        
+        print("\n🎉 Comparison complete! Use these insights to understand each function's role in neural networks.")
+    else:
+        print("📊 Plots disabled during testing - this is normal!")
+        
+except Exception as e:
+    print(f"❌ Error in plotting: {e}")
+    print("Make sure matplotlib is installed and all functions are implemented!")
+
+# %% [markdown]
+"""
+## Step 5: Understanding Activation Function Properties
+
+Let's explore the mathematical properties of each function:
+"""
+
+# %%
+# Explore activation function properties
+try:
+    print("=== Activation Function Properties ===")
+    
+    # Create test functions
+    relu = ReLU()
+    sigmoid = Sigmoid()
+    tanh = Tanh()
+    
+    # Test with a range of values
+    test_values = np.linspace(-5, 5, 11)
+    x = Tensor([test_values])
+    
+    print(f"Input range: {test_values}")
+    print(f"ReLU range: [{np.min(relu(x).data):.2f}, {np.max(relu(x).data):.2f}]")
+    print(f"Sigmoid range: [{np.min(sigmoid(x).data):.2f}, {np.max(sigmoid(x).data):.2f}]")
+    print(f"Tanh range: [{np.min(tanh(x).data):.2f}, {np.max(tanh(x).data):.2f}]")
+    
+    # Test monotonicity (should all be increasing functions)
+    print(f"\n📈 Monotonicity Test:")
+    for name, func in [("ReLU", relu), ("Sigmoid", sigmoid), ("Tanh", tanh)]:
+        outputs = func(x).data[0]
+        is_monotonic = np.all(outputs[1:] >= outputs[:-1])
+        print(f"{name}: {'✅ Monotonic' if is_monotonic else '❌ Not monotonic'}")
+    
+    # Test zero-centered property
+    print(f"\n🎯 Zero-Centered Test (f(0) = 0):")
+    x_zero = Tensor([[0.0]])
+    for name, func in [("ReLU", relu), ("Sigmoid", sigmoid), ("Tanh", tanh)]:
+        output = func(x_zero).data[0, 0]
+        is_zero_centered = abs(output) < 1e-6
+        print(f"{name}: f(0) = {output:.4f} {'✅ Zero-centered' if is_zero_centered else '❌ Not zero-centered'}")
+    
+    print("\n🎉 Property analysis complete!")
+    
+except Exception as e:
+    print(f"❌ Error: {e}")
+    print("Check your activation function implementations!")
+
+# %% [markdown]
+"""
+## Step 6: Practical Usage Examples
+
+Let's see how these functions would be used in practice:
+"""
+
+# %%
+# Practical usage examples
+try:
+    print("=== Practical Usage Examples ===")
+    
+    # Example 1: Binary classification with sigmoid
+    print("1. Binary Classification (Sigmoid):")
+    logits = Tensor([[2.5, -1.2, 0.8, -0.3]])  # Raw network outputs
+    sigmoid = Sigmoid()
+    probabilities = sigmoid(logits)
+    print(f"   Logits: {logits.data}")
+    print(f"   Probabilities: {probabilities.data}")
+    print(f"   Predictions: {(probabilities.data > 0.5).astype(int)}")
+    
+    # Example 2: Feature processing with ReLU
+    print("\n2. Feature Processing (ReLU):")
+    features = Tensor([[-0.5, 1.2, -2.1, 0.8, -0.1]])  # Mixed positive/negative
+    relu = ReLU()
+    processed = relu(features)
+    print(f"   Raw features: {features.data}")
+    print(f"   After ReLU: {processed.data}")
+    print(f"   Sparsity: {np.mean(processed.data == 0):.1%} zeros")
+    
+    # Example 3: Normalized features with Tanh
+    print("\n3. Normalized Features (Tanh):")
+    raw_features = Tensor([[3.2, -1.8, 0.5, -2.4, 1.1]])
+    tanh = Tanh()
+    normalized = tanh(raw_features)
+    print(f"   Raw features: {raw_features.data}")
+    print(f"   Normalized: {normalized.data}")
+    print(f"   Mean: {np.mean(normalized.data):.3f} (close to 0)")
+    
+    print("\n✅ Practical examples complete!")
+    
+except Exception as e:
+    print(f"❌ Error: {e}")
+    print("Check your activation function implementations!")
+
+# %% [markdown]
+"""
+## 🎉 Congratulations!
+
+You've successfully implemented the three most important activation functions in deep learning!
+
+### 🧱 What You Built
+1. **ReLU**: The workhorse activation that enables deep networks
+2. **Sigmoid**: The probability activation for binary classification
+3. **Tanh**: The zero-centered activation for better gradient flow
+
+### 🎯 Key Insights
+- **Nonlinearity is essential**: Without activations, neural networks are just linear transformations
+- **Different functions serve different purposes**: ReLU for hidden layers, Sigmoid for probabilities, Tanh for zero-centered outputs
+- **Mathematical properties matter**: Monotonicity, boundedness, and zero-centering affect learning
+
+### 🚀 What's Next
+These activation functions will be used in:
+- **Layers Module**: Building neural network layers
+- **Loss Functions**: Computing training objectives
+- **Advanced Architectures**: CNNs, RNNs, and more
+
+### 🔧 Export to Package
+Run this to export your activations to the TinyTorch package:
+```bash
+python bin/tito.py sync
+```
+
+Then test your implementation:
+```bash
+python bin/tito.py test --module activations
+```
+
+**Excellent work! You've mastered the mathematical foundations of neural networks!** 🎉
+
+---
+
+## 📚 Further Reading
+
+**Want to learn more about activation functions?**
+- **ReLU variants**: Leaky ReLU, ELU, Swish
+- **Advanced activations**: GELU, Mish, SiLU
+- **Activation choice**: When to use which function
+- **Gradient flow**: How activations affect training
+
+**Next modules**: Layers, Loss Functions, Optimization
+""" 
\ No newline at end of file
diff --git a/modules/activations/tests/test_activations.py b/modules/activations/tests/test_activations.py
new file mode 100644
index 00000000..0a9f7967
--- /dev/null
+++ b/modules/activations/tests/test_activations.py
@@ -0,0 +1,345 @@
+"""
+Test suite for the TinyTorch Activations module.
+
+This test suite validates the mathematical correctness of activation functions:
+- ReLU: f(x) = max(0, x)
+- Sigmoid: f(x) = 1 / (1 + e^(-x))
+- Tanh: f(x) = tanh(x)
+
+Tests focus on:
+1. Mathematical correctness
+2. Numerical stability
+3. Edge cases
+4. Shape preservation
+5. Type consistency
+"""
+
+import pytest
+import numpy as np
+import math
+from tinytorch.core.tensor import Tensor
+
+# Import the activation functions
+import sys
+import os
+sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+from activations_dev import ReLU, Sigmoid, Tanh
+
+
+class TestReLU:
+    """Test the ReLU activation function."""
+    
+    def test_relu_basic_functionality(self):
+        """Test basic ReLU behavior: max(0, x)"""
+        relu = ReLU()
+        
+        # Test mixed positive/negative values
+        x = Tensor([[-2.0, -1.0, 0.0, 1.0, 2.0]])
+        y = relu(x)
+        expected = np.array([[0.0, 0.0, 0.0, 1.0, 2.0]])
+        
+        assert np.allclose(y.data, expected), f"Expected {expected}, got {y.data}"
+    
+    def test_relu_all_positive(self):
+        """Test ReLU with all positive values (should be unchanged)"""
+        relu = ReLU()
+        
+        x = Tensor([[1.0, 2.5, 3.7, 10.0]])
+        y = relu(x)
+        
+        assert np.allclose(y.data, x.data), "ReLU should preserve positive values"
+    
+    def test_relu_all_negative(self):
+        """Test ReLU with all negative values (should be zeros)"""
+        relu = ReLU()
+        
+        x = Tensor([[-1.0, -2.5, -3.7, -10.0]])
+        y = relu(x)
+        expected = np.zeros_like(x.data)
+        
+        assert np.allclose(y.data, expected), "ReLU should zero out negative values"
+    
+    def test_relu_zero_input(self):
+        """Test ReLU with zero input"""
+        relu = ReLU()
+        
+        x = Tensor([[0.0]])
+        y = relu(x)
+        
+        assert y.data[0, 0] == 0.0, "ReLU(0) should be 0"
+    
+    def test_relu_shape_preservation(self):
+        """Test that ReLU preserves tensor shape"""
+        relu = ReLU()
+        
+        # Test different shapes
+        shapes = [(1, 5), (2, 3), (4, 1), (3, 3)]
+        for shape in shapes:
+            x = Tensor(np.random.randn(*shape))
+            y = relu(x)
+            assert y.shape == x.shape, f"Shape mismatch: expected {x.shape}, got {y.shape}"
+    
+    def test_relu_callable(self):
+        """Test that ReLU can be called directly"""
+        relu = ReLU()
+        x = Tensor([[1.0, -1.0]])
+        
+        y1 = relu(x)
+        y2 = relu.forward(x)
+        
+        assert np.allclose(y1.data, y2.data), "Direct call should match forward method"
+
+
+class TestSigmoid:
+    """Test the Sigmoid activation function."""
+    
+    def test_sigmoid_basic_functionality(self):
+        """Test basic Sigmoid behavior"""
+        sigmoid = Sigmoid()
+        
+        # Test known values
+        x = Tensor([[0.0]])
+        y = sigmoid(x)
+        assert abs(y.data[0, 0] - 0.5) < 1e-6, "Sigmoid(0) should be 0.5"
+    
+    def test_sigmoid_range(self):
+        """Test that Sigmoid outputs are in (0, 1)"""
+        sigmoid = Sigmoid()
+        
+        # Test wide range of inputs
+        x = Tensor([[-10.0, -5.0, -1.0, 0.0, 1.0, 5.0, 10.0]])
+        y = sigmoid(x)
+        
+        assert np.all(y.data > 0), "Sigmoid outputs should be > 0"
+        assert np.all(y.data < 1), "Sigmoid outputs should be < 1"
+    
+    def test_sigmoid_numerical_stability(self):
+        """Test Sigmoid with extreme values (numerical stability)"""
+        sigmoid = Sigmoid()
+        
+        # Test extreme values that could cause overflow
+        x = Tensor([[-100.0, -50.0, 50.0, 100.0]])
+        y = sigmoid(x)
+        
+        # Should not contain NaN or inf
+        assert not np.any(np.isnan(y.data)), "Sigmoid should not produce NaN"
+        assert not np.any(np.isinf(y.data)), "Sigmoid should not produce inf"
+        
+        # Should be close to 0 for very negative, close to 1 for very positive
+        assert y.data[0, 0] < 1e-10, "Sigmoid(-100) should be very close to 0"
+        assert y.data[0, 1] < 1e-10, "Sigmoid(-50) should be very close to 0"
+        assert y.data[0, 2] > 1 - 1e-10, "Sigmoid(50) should be very close to 1"
+        assert y.data[0, 3] > 1 - 1e-10, "Sigmoid(100) should be very close to 1"
+    
+    def test_sigmoid_monotonicity(self):
+        """Test that Sigmoid is monotonically increasing"""
+        sigmoid = Sigmoid()
+        
+        x = Tensor([[-3.0, -1.0, 0.0, 1.0, 3.0]])
+        y = sigmoid(x)
+        
+        # Check that outputs are increasing
+        for i in range(len(y.data[0]) - 1):
+            assert y.data[0, i] < y.data[0, i + 1], "Sigmoid should be monotonically increasing"
+    
+    def test_sigmoid_shape_preservation(self):
+        """Test that Sigmoid preserves tensor shape"""
+        sigmoid = Sigmoid()
+        
+        shapes = [(1, 5), (2, 3), (4, 1)]
+        for shape in shapes:
+            x = Tensor(np.random.randn(*shape))
+            y = sigmoid(x)
+            assert y.shape == x.shape, f"Shape mismatch: expected {x.shape}, got {y.shape}"
+    
+    def test_sigmoid_callable(self):
+        """Test that Sigmoid can be called directly"""
+        sigmoid = Sigmoid()
+        x = Tensor([[1.0, -1.0]])
+        
+        y1 = sigmoid(x)
+        y2 = sigmoid.forward(x)
+        
+        assert np.allclose(y1.data, y2.data), "Direct call should match forward method"
+
+
+class TestTanh:
+    """Test the Tanh activation function."""
+    
+    def test_tanh_basic_functionality(self):
+        """Test basic Tanh behavior"""
+        tanh = Tanh()
+        
+        # Test known values
+        x = Tensor([[0.0]])
+        y = tanh(x)
+        assert abs(y.data[0, 0] - 0.0) < 1e-6, "Tanh(0) should be 0"
+    
+    def test_tanh_range(self):
+        """Test that Tanh outputs are in [-1, 1]"""
+        tanh = Tanh()
+        
+        # Test wide range of inputs
+        x = Tensor([[-10.0, -5.0, -1.0, 0.0, 1.0, 5.0, 10.0]])
+        y = tanh(x)
+        
+        assert np.all(y.data >= -1), "Tanh outputs should be >= -1"
+        assert np.all(y.data <= 1), "Tanh outputs should be <= 1"
+    
+    def test_tanh_symmetry(self):
+        """Test that Tanh is symmetric: tanh(-x) = -tanh(x)"""
+        tanh = Tanh()
+        
+        x = Tensor([[1.0, 2.0, 3.0]])
+        x_neg = Tensor([[-1.0, -2.0, -3.0]])
+        
+        y_pos = tanh(x)
+        y_neg = tanh(x_neg)
+        
+        assert np.allclose(y_neg.data, -y_pos.data), "Tanh should be symmetric"
+    
+    def test_tanh_monotonicity(self):
+        """Test that Tanh is monotonically increasing"""
+        tanh = Tanh()
+        
+        x = Tensor([[-3.0, -1.0, 0.0, 1.0, 3.0]])
+        y = tanh(x)
+        
+        # Check that outputs are increasing
+        for i in range(len(y.data[0]) - 1):
+            assert y.data[0, i] < y.data[0, i + 1], "Tanh should be monotonically increasing"
+    
+    def test_tanh_extreme_values(self):
+        """Test Tanh with extreme values"""
+        tanh = Tanh()
+        
+        x = Tensor([[-100.0, 100.0]])
+        y = tanh(x)
+        
+        # Should be close to -1 and 1 respectively
+        assert abs(y.data[0, 0] - (-1.0)) < 1e-10, "Tanh(-100) should be very close to -1"
+        assert abs(y.data[0, 1] - 1.0) < 1e-10, "Tanh(100) should be very close to 1"
+    
+    def test_tanh_shape_preservation(self):
+        """Test that Tanh preserves tensor shape"""
+        tanh = Tanh()
+        
+        shapes = [(1, 5), (2, 3), (4, 1)]
+        for shape in shapes:
+            x = Tensor(np.random.randn(*shape))
+            y = tanh(x)
+            assert y.shape == x.shape, f"Shape mismatch: expected {x.shape}, got {y.shape}"
+    
+    def test_tanh_callable(self):
+        """Test that Tanh can be called directly"""
+        tanh = Tanh()
+        x = Tensor([[1.0, -1.0]])
+        
+        y1 = tanh(x)
+        y2 = tanh.forward(x)
+        
+        assert np.allclose(y1.data, y2.data), "Direct call should match forward method"
+
+
+class TestActivationComparison:
+    """Test interactions and comparisons between activation functions."""
+    
+    def test_activation_consistency(self):
+        """Test that all activations work with the same input"""
+        relu = ReLU()
+        sigmoid = Sigmoid()
+        tanh = Tanh()
+        
+        x = Tensor([[-2.0, -1.0, 0.0, 1.0, 2.0]])
+        
+        # All should process without error
+        y_relu = relu(x)
+        y_sigmoid = sigmoid(x)
+        y_tanh = tanh(x)
+        
+        # All should preserve shape
+        assert y_relu.shape == x.shape
+        assert y_sigmoid.shape == x.shape
+        assert y_tanh.shape == x.shape
+    
+    def test_activation_ranges(self):
+        """Test that activations have expected output ranges"""
+        relu = ReLU()
+        sigmoid = Sigmoid()
+        tanh = Tanh()
+        
+        x = Tensor([[-5.0, -2.0, 0.0, 2.0, 5.0]])
+        
+        y_relu = relu(x)
+        y_sigmoid = sigmoid(x)
+        y_tanh = tanh(x)
+        
+        # ReLU: [0, inf)
+        assert np.all(y_relu.data >= 0), "ReLU should be non-negative"
+        
+        # Sigmoid: (0, 1)
+        assert np.all(y_sigmoid.data > 0), "Sigmoid should be positive"
+        assert np.all(y_sigmoid.data < 1), "Sigmoid should be less than 1"
+        
+        # Tanh: (-1, 1)
+        assert np.all(y_tanh.data > -1), "Tanh should be greater than -1"
+        assert np.all(y_tanh.data < 1), "Tanh should be less than 1"
+
+
+# Integration tests with edge cases
+class TestActivationEdgeCases:
+    """Test edge cases and boundary conditions."""
+    
+    def test_zero_tensor(self):
+        """Test all activations with zero tensor"""
+        relu = ReLU()
+        sigmoid = Sigmoid()
+        tanh = Tanh()
+        
+        x = Tensor([[0.0, 0.0, 0.0]])
+        
+        y_relu = relu(x)
+        y_sigmoid = sigmoid(x)
+        y_tanh = tanh(x)
+        
+        assert np.allclose(y_relu.data, [0.0, 0.0, 0.0]), "ReLU(0) should be 0"
+        assert np.allclose(y_sigmoid.data, [0.5, 0.5, 0.5]), "Sigmoid(0) should be 0.5"
+        assert np.allclose(y_tanh.data, [0.0, 0.0, 0.0]), "Tanh(0) should be 0"
+    
+    def test_single_element_tensor(self):
+        """Test all activations with single element tensor"""
+        relu = ReLU()
+        sigmoid = Sigmoid()
+        tanh = Tanh()
+        
+        x = Tensor([[1.0]])
+        
+        y_relu = relu(x)
+        y_sigmoid = sigmoid(x)
+        y_tanh = tanh(x)
+        
+        assert y_relu.shape == (1, 1)
+        assert y_sigmoid.shape == (1, 1)
+        assert y_tanh.shape == (1, 1)
+    
+    def test_large_tensor(self):
+        """Test activations with larger tensors"""
+        relu = ReLU()
+        sigmoid = Sigmoid()
+        tanh = Tanh()
+        
+        # Create a 10x10 tensor
+        x = Tensor(np.random.randn(10, 10))
+        
+        y_relu = relu(x)
+        y_sigmoid = sigmoid(x)
+        y_tanh = tanh(x)
+        
+        assert y_relu.shape == (10, 10)
+        assert y_sigmoid.shape == (10, 10)
+        assert y_tanh.shape == (10, 10)
+
+
+if __name__ == "__main__":
+    # Run tests with pytest
+    pytest.main([__file__, "-v"]) 
\ No newline at end of file
diff --git a/modules/layers/layers_dev.py b/modules/layers/layers_dev.py
index 5b657c1e..74b6890a 100644
--- a/modules/layers/layers_dev.py
+++ b/modules/layers/layers_dev.py
@@ -17,15 +17,20 @@ Welcome to the Layers module! This is where neural networks begin. You'll implem
 ## Learning Goals
 - Understand layers as functions that transform tensors: `y = f(x)`
 - Implement Dense layers with linear transformations: `y = Wx + b`
-- Add activation functions for nonlinearity (ReLU, Sigmoid, Tanh)
+- Use activation functions from the activations module for nonlinearity
 - See how neural networks are just function composition
 - Build intuition before diving into training
 
 ## Build → Use → Understand
-1. **Build**: Dense layers and activation functions
+1. **Build**: Dense layers using activation functions as building blocks
 2. **Use**: Transform tensors and see immediate results
 3. **Understand**: How neural networks transform information
 
+## Module Dependencies
+This module builds on the **activations** module:
+- **activations** → **layers** → **networks**
+- Clean separation of concerns: math functions → layer building blocks → full networks
+
 ## Module → Package Structure
 **🎓 Teaching vs. 🔧 Building**: 
 - **Learning side**: Work in `modules/layers/layers_dev.py`  
@@ -51,6 +56,9 @@ import sys
 from typing import Union, Optional, Callable
 from tinytorch.core.tensor import Tensor
 
+# Import activation functions from the activations module
+from tinytorch.core.activations import ReLU, Sigmoid, Tanh
+
 # Import our Tensor class
 # sys.path.append('../../')
 # from modules.tensor.tensor_dev import Tensor
@@ -203,12 +211,11 @@ try:
     print(f"Input: {x.data}")
     print(f"Output: {y.data}")
     
-    # Test with batch of examples
+    # Test with batch
     x_batch = Tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])  # Shape: (2, 3)
     y_batch = layer(x_batch)
     print(f"\nBatch input shape: {x_batch.shape}")
     print(f"Batch output shape: {y_batch.shape}")
-    print(f"Batch output: {y_batch.data}")
     
     print("✅ Dense layer working!")
     
@@ -218,14 +225,20 @@ except Exception as e:
 
 # %% [markdown]
 """
-## Step 2: Activation Functions
+## Step 2: Activation Functions - Adding Nonlinearity
 
-Dense layers alone can only learn **linear** transformations. But most real-world problems need **nonlinear** transformations.
+Now we'll use the activation functions from the **activations** module! 
 
-**Activation functions** add nonlinearity:
-- **ReLU**: `max(0, x)` - Most common, simple and effective
-- **Sigmoid**: `1 / (1 + e^(-x))` - Squashes to (0, 1)
-- **Tanh**: `tanh(x)` - Squashes to (-1, 1)
+**Clean Architecture**: We import the activation functions rather than redefining them:
+```python
+from tinytorch.core.activations import ReLU, Sigmoid, Tanh
+```
+
+**Why this matters**:
+- **Separation of concerns**: Math functions vs. layer building blocks
+- **Reusability**: Activations can be used anywhere in the system
+- **Maintainability**: One place to update activation implementations
+- **Composability**: Clean imports make neural networks easier to build
 
 **Why nonlinearity matters**: Without it, stacking layers is pointless!
 ```
@@ -234,178 +247,43 @@ Linear → NonLinear → Linear = Can learn complex patterns
 ```
 """
 
-# %%
-#| export
-class ReLU:
-    """
-    ReLU Activation: f(x) = max(0, x)
-    
-    The most popular activation function in deep learning.
-    Simple, effective, and computationally efficient.
-    
-    TODO: Implement ReLU activation function.
-    """
-    
-    def forward(self, x: Tensor) -> Tensor:
-        """
-        Apply ReLU: f(x) = max(0, x)
-        
-        Args:
-            x: Input tensor
-            
-        Returns:
-            Output tensor with ReLU applied element-wise
-            
-        TODO: Implement element-wise max(0, x) operation
-        """
-        raise NotImplementedError("Student implementation required")
-    
-    def __call__(self, x: Tensor) -> Tensor:
-        """Make activation callable: relu(x) same as relu.forward(x)"""
-        return self.forward(x)
-
-# %%
-#| hide
-#| export
-class ReLU:
-    """ReLU Activation: f(x) = max(0, x)"""
-    
-    def forward(self, x: Tensor) -> Tensor:
-        """Apply ReLU: f(x) = max(0, x)"""
-        return Tensor(np.maximum(0, x.data))
-    
-    def __call__(self, x: Tensor) -> Tensor:
-        return self.forward(x)
-
-# %%
-#| export
-class Sigmoid:
-    """
-    Sigmoid Activation: f(x) = 1 / (1 + e^(-x))
-    
-    Squashes input to range (0, 1). Often used for binary classification.
-    
-    TODO: Implement Sigmoid activation function.
-    """
-    
-    def forward(self, x: Tensor) -> Tensor:
-        """
-        Apply Sigmoid: f(x) = 1 / (1 + e^(-x))
-        
-        Args:
-            x: Input tensor
-            
-        Returns:
-            Output tensor with Sigmoid applied element-wise
-            
-        TODO: Implement sigmoid function (be careful with numerical stability!)
-        """
-        raise NotImplementedError("Student implementation required")
-    
-    def __call__(self, x: Tensor) -> Tensor:
-        return self.forward(x)
-
-# %%
-#| hide
-#| export
-class Sigmoid:
-    """Sigmoid Activation: f(x) = 1 / (1 + e^(-x))"""
-    
-    def forward(self, x: Tensor) -> Tensor:
-        """Apply Sigmoid with numerical stability"""
-        # Use the numerically stable version to avoid overflow
-        # For x >= 0: sigmoid(x) = 1 / (1 + exp(-x))
-        # For x < 0: sigmoid(x) = exp(x) / (1 + exp(x))
-        x_data = x.data
-        result = np.zeros_like(x_data)
-        
-        # Stable computation
-        positive_mask = x_data >= 0
-        result[positive_mask] = 1.0 / (1.0 + np.exp(-x_data[positive_mask]))
-        result[~positive_mask] = np.exp(x_data[~positive_mask]) / (1.0 + np.exp(x_data[~positive_mask]))
-        
-        return Tensor(result)
-    
-    def __call__(self, x: Tensor) -> Tensor:
-        return self.forward(x)
-
-# %%
-#| export
-class Tanh:
-    """
-    Tanh Activation: f(x) = tanh(x)
-    
-    Squashes input to range (-1, 1). Zero-centered output.
-    
-    TODO: Implement Tanh activation function.
-    """
-    
-    def forward(self, x: Tensor) -> Tensor:
-        """
-        Apply Tanh: f(x) = tanh(x)
-        
-        Args:
-            x: Input tensor
-            
-        Returns:
-            Output tensor with Tanh applied element-wise
-            
-        TODO: Implement tanh function
-        """
-        raise NotImplementedError("Student implementation required")
-    
-    def __call__(self, x: Tensor) -> Tensor:
-        return self.forward(x)
-
-# %%
-#| hide
-#| export
-class Tanh:
-    """Tanh Activation: f(x) = tanh(x)"""
-    
-    def forward(self, x: Tensor) -> Tensor:
-        """Apply Tanh"""
-        return Tensor(np.tanh(x.data))
-    
-    def __call__(self, x: Tensor) -> Tensor:
-        return self.forward(x)
-
 # %% [markdown]
 """
-### 🧪 Test Your Activation Functions
+### 🧪 Test Activation Functions from Activations Module
 
-Once you implement the activation functions above, run this cell to test them:
+Let's test that we can use the activation functions from the activations module:
 """
 
 # %%
-# Test activation functions
+# Test activation functions from activations module
 try:
-    print("=== Testing Activation Functions ===")
+    print("=== Testing Activation Functions from Activations Module ===")
     
     # Test data: mix of positive, negative, and zero
     x = Tensor([[-2.0, -1.0, 0.0, 1.0, 2.0]])
     print(f"Input: {x.data}")
     
-    # Test ReLU
+    # Test ReLU from activations module
     relu = ReLU()
     y_relu = relu(x)
     print(f"ReLU output: {y_relu.data}")
     
-    # Test Sigmoid
+    # Test Sigmoid from activations module
     sigmoid = Sigmoid()
     y_sigmoid = sigmoid(x)
     print(f"Sigmoid output: {y_sigmoid.data}")
     
-    # Test Tanh
+    # Test Tanh from activations module
     tanh = Tanh()
     y_tanh = tanh(x)
     print(f"Tanh output: {y_tanh.data}")
     
-    print("✅ Activation functions working!")
+    print("✅ Activation functions from activations module working!")
+    print("🎉 Clean architecture: layers module uses activations module!")
     
 except Exception as e:
     print(f"❌ Error: {e}")
-    print("Make sure to implement the activation functions above!")
+    print("Make sure the activations module is properly exported!")
 
 # %% [markdown]
 """
@@ -418,6 +296,11 @@ Input → Dense → ReLU → Dense → Sigmoid → Output
 ```
 
 This is a 2-layer neural network that can learn complex nonlinear patterns!
+
+**Notice the clean architecture**:
+- Dense layers handle linear transformations
+- Activation functions (from activations module) handle nonlinearity
+- Composition creates complex behaviors from simple building blocks
 """
 
 # %%
@@ -431,9 +314,9 @@ try:
     # Output: 2 neurons with Sigmoid
     
     layer1 = Dense(input_size=3, output_size=4)
-    activation1 = ReLU()
+    activation1 = ReLU()  # From activations module
     layer2 = Dense(input_size=4, output_size=2)
-    activation2 = Sigmoid()
+    activation2 = Sigmoid()  # From activations module
     
     print("Network architecture:")
     print(f"  Input: 3 features")
@@ -458,28 +341,36 @@ try:
     print(f"Output values: {output.data}")
     
     print("\n🎉 Neural network working! You just built your first neural network!")
+    print("🏗️  Clean architecture: Dense layers + Activations module = Neural Network")
     print("Notice how the network transforms 3D input into 2D output through learned transformations.")
     
 except Exception as e:
     print(f"❌ Error: {e}")
-    print("Make sure to implement the layers and activations above!")
+    print("Make sure to implement the layers and check activations module!")
 
 # %% [markdown]
 """
 ## Step 4: Understanding What We Built
 
-Congratulations! You just implemented the fundamental building blocks of neural networks:
+Congratulations! You just implemented a clean, modular neural network architecture:
 
 ### 🧱 **What You Built**
 1. **Dense Layer**: Linear transformation `y = Wx + b`
-2. **Activation Functions**: Nonlinear transformations (ReLU, Sigmoid, Tanh)
+2. **Activation Functions**: Imported from activations module (ReLU, Sigmoid, Tanh)
 3. **Layer Composition**: Chaining layers to build networks
 
+### 🏗️ **Clean Architecture Benefits**
+- **Separation of concerns**: Math functions vs. layer building blocks
+- **Reusability**: Activations can be used across different modules
+- **Maintainability**: One place to update activation implementations
+- **Composability**: Clean imports make complex networks easier to build
+
 ### 🎯 **Key Insights**
 - **Layers are functions**: They transform tensors from one space to another
 - **Composition creates complexity**: Simple layers → complex networks
 - **Nonlinearity is crucial**: Without it, deep networks are just linear transformations
 - **Neural networks are function approximators**: They learn to map inputs to outputs
+- **Modular design**: Building blocks can be combined in many ways
 
 ### 🚀 **What's Next**
 In the next modules, you'll learn:
@@ -498,7 +389,7 @@ Then test your implementation:
 python bin/tito.py test --module layers
 ```
 
-**Great job! You've built the foundation of neural networks!** 🎉
+**Great job! You've built a clean, modular foundation for neural networks!** 🎉
 """
 
 # %%
@@ -514,9 +405,9 @@ try:
     # Build a 3-layer network for digit classification
     # 784 → 128 → 64 → 10
     layer1 = Dense(input_size=image_size, output_size=128)
-    relu1 = ReLU()
+    relu1 = ReLU()  # From activations module
     layer2 = Dense(input_size=128, output_size=64)
-    relu2 = ReLU()
+    relu2 = ReLU()  # From activations module
     layer3 = Dense(input_size=64, output_size=num_classes)
     softmax = Sigmoid()  # Using Sigmoid as a simple "probability-like" output
     
@@ -541,8 +432,38 @@ try:
     print(f"  Sample predictions: {predictions.data[0]}")  # First image predictions
     
     print("\n🎉 You built a neural network that could classify images!")
+    print("🏗️  Clean architecture: Dense layers + Activations module = Image Classifier")
     print("With training, this network could learn to recognize handwritten digits!")
     
 except Exception as e:
     print(f"❌ Error: {e}")
-    print("Check your layer implementations!") 
\ No newline at end of file
+    print("Check your layer implementations and activations module!")
+
+# %% [markdown]
+"""
+## 🎓 Module Summary
+
+### What You Learned
+1. **Layer Architecture**: Dense layers as linear transformations
+2. **Clean Dependencies**: Layers module uses activations module
+3. **Function Composition**: Simple building blocks → complex networks
+4. **Modular Design**: Separation of concerns for maintainable code
+
+### Key Architectural Insight
+```
+activations (math functions) → layers (building blocks) → networks (applications)
+```
+
+This clean dependency graph makes the system:
+- **Understandable**: Each module has a clear purpose
+- **Testable**: Each module can be tested independently
+- **Reusable**: Components can be used across different contexts
+- **Maintainable**: Changes are localized to appropriate modules
+
+### Next Steps
+- **Training**: Learn how networks learn from data
+- **Advanced Architectures**: CNNs, RNNs, Transformers
+- **Applications**: Real-world machine learning problems
+
+**Congratulations on building a clean, modular neural network foundation!** 🚀
+""" 
\ No newline at end of file
diff --git a/modules/layers/tests/test_layers.py b/modules/layers/tests/test_layers.py
index c85e208a..4f0af00b 100644
--- a/modules/layers/tests/test_layers.py
+++ b/modules/layers/tests/test_layers.py
@@ -18,7 +18,11 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
 
 # Import from the module's development file
 # Note: This imports the instructor version with full implementation
-from layers_dev import Dense, ReLU, Sigmoid, Tanh, Tensor
+from layers_dev import Dense, Tensor
+
+# Import activation functions from the activations module
+sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(__file__)), '..', 'activations'))
+from activations_dev import ReLU, Sigmoid, Tanh
 
 def safe_numpy(tensor):
     """Get numpy array from tensor, using .numpy() if available, otherwise .data"""
diff --git a/tinytorch/_modidx.py b/tinytorch/_modidx.py
index fe281e09..6c56fd46 100644
--- a/tinytorch/_modidx.py
+++ b/tinytorch/_modidx.py
@@ -5,7 +5,30 @@ d = { 'settings': { 'branch': 'main',
                 'doc_host': 'https://tinytorch.github.io',
                 'git_url': 'https://github.com/tinytorch/TinyTorch/',
                 'lib_path': 'tinytorch'},
-  'syms': { 'tinytorch.core.tensor': { 'tinytorch.core.tensor.Tensor': ('tensor/tensor_dev.html#tensor', 'tinytorch/core/tensor.py'),
+  'syms': { 'tinytorch.core.activations': {},
+            'tinytorch.core.layers': { 'tinytorch.core.layers.Dense': ('layers/layers_dev.html#dense', 'tinytorch/core/layers.py'),
+                                       'tinytorch.core.layers.Dense.__call__': ( 'layers/layers_dev.html#dense.__call__',
+                                                                                 'tinytorch/core/layers.py'),
+                                       'tinytorch.core.layers.Dense.__init__': ( 'layers/layers_dev.html#dense.__init__',
+                                                                                 'tinytorch/core/layers.py'),
+                                       'tinytorch.core.layers.Dense.forward': ( 'layers/layers_dev.html#dense.forward',
+                                                                                'tinytorch/core/layers.py'),
+                                       'tinytorch.core.layers.ReLU': ('layers/layers_dev.html#relu', 'tinytorch/core/layers.py'),
+                                       'tinytorch.core.layers.ReLU.__call__': ( 'layers/layers_dev.html#relu.__call__',
+                                                                                'tinytorch/core/layers.py'),
+                                       'tinytorch.core.layers.ReLU.forward': ( 'layers/layers_dev.html#relu.forward',
+                                                                               'tinytorch/core/layers.py'),
+                                       'tinytorch.core.layers.Sigmoid': ('layers/layers_dev.html#sigmoid', 'tinytorch/core/layers.py'),
+                                       'tinytorch.core.layers.Sigmoid.__call__': ( 'layers/layers_dev.html#sigmoid.__call__',
+                                                                                   'tinytorch/core/layers.py'),
+                                       'tinytorch.core.layers.Sigmoid.forward': ( 'layers/layers_dev.html#sigmoid.forward',
+                                                                                  'tinytorch/core/layers.py'),
+                                       'tinytorch.core.layers.Tanh': ('layers/layers_dev.html#tanh', 'tinytorch/core/layers.py'),
+                                       'tinytorch.core.layers.Tanh.__call__': ( 'layers/layers_dev.html#tanh.__call__',
+                                                                                'tinytorch/core/layers.py'),
+                                       'tinytorch.core.layers.Tanh.forward': ( 'layers/layers_dev.html#tanh.forward',
+                                                                               'tinytorch/core/layers.py')},
+            'tinytorch.core.tensor': { 'tinytorch.core.tensor.Tensor': ('tensor/tensor_dev.html#tensor', 'tinytorch/core/tensor.py'),
                                        'tinytorch.core.tensor.Tensor.__init__': ( 'tensor/tensor_dev.html#tensor.__init__',
                                                                                   'tinytorch/core/tensor.py'),
                                        'tinytorch.core.tensor.Tensor.__repr__': ( 'tensor/tensor_dev.html#tensor.__repr__',
@@ -22,7 +45,21 @@ d = { 'settings': { 'branch': 'main',
                                                                                       'tinytorch/core/tensor.py'),
                                        'tinytorch.core.tensor._add_utility_methods': ( 'tensor/tensor_dev.html#_add_utility_methods',
                                                                                        'tinytorch/core/tensor.py')},
-            'tinytorch.core.utils': { 'tinytorch.core.utils.SystemInfo': ('setup/setup_dev.html#systeminfo', 'tinytorch/core/utils.py'),
+            'tinytorch.core.utils': { 'tinytorch.core.utils.DeveloperProfile': ( 'setup/setup_dev.html#developerprofile',
+                                                                                 'tinytorch/core/utils.py'),
+                                      'tinytorch.core.utils.DeveloperProfile.__init__': ( 'setup/setup_dev.html#developerprofile.__init__',
+                                                                                          'tinytorch/core/utils.py'),
+                                      'tinytorch.core.utils.DeveloperProfile.__str__': ( 'setup/setup_dev.html#developerprofile.__str__',
+                                                                                         'tinytorch/core/utils.py'),
+                                      'tinytorch.core.utils.DeveloperProfile._load_default_flame': ( 'setup/setup_dev.html#developerprofile._load_default_flame',
+                                                                                                     'tinytorch/core/utils.py'),
+                                      'tinytorch.core.utils.DeveloperProfile.get_ascii_art': ( 'setup/setup_dev.html#developerprofile.get_ascii_art',
+                                                                                               'tinytorch/core/utils.py'),
+                                      'tinytorch.core.utils.DeveloperProfile.get_full_profile': ( 'setup/setup_dev.html#developerprofile.get_full_profile',
+                                                                                                  'tinytorch/core/utils.py'),
+                                      'tinytorch.core.utils.DeveloperProfile.get_signature': ( 'setup/setup_dev.html#developerprofile.get_signature',
+                                                                                               'tinytorch/core/utils.py'),
+                                      'tinytorch.core.utils.SystemInfo': ('setup/setup_dev.html#systeminfo', 'tinytorch/core/utils.py'),
                                       'tinytorch.core.utils.SystemInfo.__init__': ( 'setup/setup_dev.html#systeminfo.__init__',
                                                                                     'tinytorch/core/utils.py'),
                                       'tinytorch.core.utils.SystemInfo.__str__': ( 'setup/setup_dev.html#systeminfo.__str__',
diff --git a/tinytorch/core/activations.py b/tinytorch/core/activations.py
new file mode 100644
index 00000000..beec6336
--- /dev/null
+++ b/tinytorch/core/activations.py
@@ -0,0 +1,58 @@
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/activations/activations_dev.py.
+
+# %% auto 0
+__all__ = ['ReLU', 'Sigmoid', 'Tanh']
+
+# %% ../../modules/activations/activations_dev.py auto 1
+import math
+import numpy as np
+import matplotlib.pyplot as plt
+import os
+import sys
+
+# TinyTorch imports
+from tinytorch.core.tensor import Tensor
+
+# %% ../../modules/activations/activations_dev.py auto 2
+class ReLU:
+    """ReLU Activation: f(x) = max(0, x)"""
+    
+    def forward(self, x: Tensor) -> Tensor:
+        """Apply ReLU: f(x) = max(0, x)"""
+        return Tensor(np.maximum(0, x.data))
+    
+    def __call__(self, x: Tensor) -> Tensor:
+        return self.forward(x)
+
+# %% ../../modules/activations/activations_dev.py auto 3
+class Sigmoid:
+    """Sigmoid Activation: f(x) = 1 / (1 + e^(-x))"""
+    
+    def forward(self, x: Tensor) -> Tensor:
+        """Apply Sigmoid with numerical stability"""
+        # Use the numerically stable version to avoid overflow
+        # For x >= 0: sigmoid(x) = 1 / (1 + exp(-x))
+        # For x < 0: sigmoid(x) = exp(x) / (1 + exp(x))
+        x_data = x.data
+        result = np.zeros_like(x_data)
+        
+        # Stable computation
+        positive_mask = x_data >= 0
+        result[positive_mask] = 1.0 / (1.0 + np.exp(-x_data[positive_mask]))
+        result[~positive_mask] = np.exp(x_data[~positive_mask]) / (1.0 + np.exp(x_data[~positive_mask]))
+        
+        return Tensor(result)
+    
+    def __call__(self, x: Tensor) -> Tensor:
+        return self.forward(x)
+
+# %% ../../modules/activations/activations_dev.py auto 4
+class Tanh:
+    """Tanh Activation: f(x) = tanh(x)"""
+    
+    def forward(self, x: Tensor) -> Tensor:
+        """Apply Tanh"""
+        return Tensor(np.tanh(x.data))
+    
+    def __call__(self, x: Tensor) -> Tensor:
+        return self.forward(x) 
\ No newline at end of file
diff --git a/tinytorch/core/layers.py b/tinytorch/core/layers.py
index 567b612a..6134438a 100644
--- a/tinytorch/core/layers.py
+++ b/tinytorch/core/layers.py
@@ -1,7 +1,7 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/layers/layers_dev.ipynb.
 
 # %% auto 0
-__all__ = ['Dense', 'ReLU', 'Sigmoid', 'Tanh']
+__all__ = ['Dense']
 
 # %% ../../modules/layers/layers_dev.ipynb 2
 import numpy as np
@@ -10,6 +10,9 @@ import sys
 from typing import Union, Optional, Callable
 from .tensor import Tensor
 
+# Import activation functions from the activations module
+from .activations import ReLU, Sigmoid, Tanh
+
 # Import our Tensor class
 # sys.path.append('../../')
 # from modules.tensor.tensor_dev import Tensor
@@ -109,130 +112,3 @@ class Dense:
     def __call__(self, x: Tensor) -> Tensor:
         """Make layer callable: layer(x) same as layer.forward(x)"""
         return self.forward(x)
-
-# %% ../../modules/layers/layers_dev.ipynb 9
-class ReLU:
-    """
-    ReLU Activation: f(x) = max(0, x)
-    
-    The most popular activation function in deep learning.
-    Simple, effective, and computationally efficient.
-    
-    TODO: Implement ReLU activation function.
-    """
-    
-    def forward(self, x: Tensor) -> Tensor:
-        """
-        Apply ReLU: f(x) = max(0, x)
-        
-        Args:
-            x: Input tensor
-            
-        Returns:
-            Output tensor with ReLU applied element-wise
-            
-        TODO: Implement element-wise max(0, x) operation
-        """
-        raise NotImplementedError("Student implementation required")
-    
-    def __call__(self, x: Tensor) -> Tensor:
-        """Make activation callable: relu(x) same as relu.forward(x)"""
-        return self.forward(x)
-
-# %% ../../modules/layers/layers_dev.ipynb 10
-class ReLU:
-    """ReLU Activation: f(x) = max(0, x)"""
-    
-    def forward(self, x: Tensor) -> Tensor:
-        """Apply ReLU: f(x) = max(0, x)"""
-        return Tensor(np.maximum(0, x.data))
-    
-    def __call__(self, x: Tensor) -> Tensor:
-        return self.forward(x)
-
-# %% ../../modules/layers/layers_dev.ipynb 11
-class Sigmoid:
-    """
-    Sigmoid Activation: f(x) = 1 / (1 + e^(-x))
-    
-    Squashes input to range (0, 1). Often used for binary classification.
-    
-    TODO: Implement Sigmoid activation function.
-    """
-    
-    def forward(self, x: Tensor) -> Tensor:
-        """
-        Apply Sigmoid: f(x) = 1 / (1 + e^(-x))
-        
-        Args:
-            x: Input tensor
-            
-        Returns:
-            Output tensor with Sigmoid applied element-wise
-            
-        TODO: Implement sigmoid function (be careful with numerical stability!)
-        """
-        raise NotImplementedError("Student implementation required")
-    
-    def __call__(self, x: Tensor) -> Tensor:
-        return self.forward(x)
-
-# %% ../../modules/layers/layers_dev.ipynb 12
-class Sigmoid:
-    """Sigmoid Activation: f(x) = 1 / (1 + e^(-x))"""
-    
-    def forward(self, x: Tensor) -> Tensor:
-        """Apply Sigmoid with numerical stability"""
-        # Use the numerically stable version to avoid overflow
-        # For x >= 0: sigmoid(x) = 1 / (1 + exp(-x))
-        # For x < 0: sigmoid(x) = exp(x) / (1 + exp(x))
-        x_data = x.data
-        result = np.zeros_like(x_data)
-        
-        # Stable computation
-        positive_mask = x_data >= 0
-        result[positive_mask] = 1.0 / (1.0 + np.exp(-x_data[positive_mask]))
-        result[~positive_mask] = np.exp(x_data[~positive_mask]) / (1.0 + np.exp(x_data[~positive_mask]))
-        
-        return Tensor(result)
-    
-    def __call__(self, x: Tensor) -> Tensor:
-        return self.forward(x)
-
-# %% ../../modules/layers/layers_dev.ipynb 13
-class Tanh:
-    """
-    Tanh Activation: f(x) = tanh(x)
-    
-    Squashes input to range (-1, 1). Zero-centered output.
-    
-    TODO: Implement Tanh activation function.
-    """
-    
-    def forward(self, x: Tensor) -> Tensor:
-        """
-        Apply Tanh: f(x) = tanh(x)
-        
-        Args:
-            x: Input tensor
-            
-        Returns:
-            Output tensor with Tanh applied element-wise
-            
-        TODO: Implement tanh function
-        """
-        raise NotImplementedError("Student implementation required")
-    
-    def __call__(self, x: Tensor) -> Tensor:
-        return self.forward(x)
-
-# %% ../../modules/layers/layers_dev.ipynb 14
-class Tanh:
-    """Tanh Activation: f(x) = tanh(x)"""
-    
-    def forward(self, x: Tensor) -> Tensor:
-        """Apply Tanh"""
-        return Tensor(np.tanh(x.data))
-    
-    def __call__(self, x: Tensor) -> Tensor:
-        return self.forward(x)
diff --git a/tinytorch/core/utils.py b/tinytorch/core/utils.py
index df63d59a..ef2bdf91 100644
--- a/tinytorch/core/utils.py
+++ b/tinytorch/core/utils.py
@@ -1,22 +1,98 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/setup/setup_dev.ipynb.
 
 # %% auto 0
-__all__ = ['hello_tinytorch', 'add_numbers', 'SystemInfo']
+__all__ = ['hello_tinytorch', 'add_numbers', 'SystemInfo', 'DeveloperProfile']
 
 # %% ../../modules/setup/setup_dev.ipynb 3
 def hello_tinytorch():
-    """A simple hello world function for TinyTorch."""
-    return "Hello from TinyTorch! 🔥"
+    """
+    A simple hello world function for TinyTorch.
+    
+    TODO: Implement this function to display TinyTorch ASCII art and welcome message.
+    Load the flame art from tinytorch_flame.txt file with graceful fallback.
+    """
+    raise NotImplementedError("Student implementation required")
+
+def add_numbers(a, b):
+    """
+    Add two numbers together.
+    
+    TODO: Implement addition of two numbers.
+    This is the foundation of all mathematical operations in ML.
+    """
+    raise NotImplementedError("Student implementation required")
+
+# %% ../../modules/setup/setup_dev.ipynb 4
+def hello_tinytorch():
+    """Display the TinyTorch ASCII art and welcome message."""
+    try:
+        # Get the directory containing this file
+        current_dir = Path(__file__).parent
+        art_file = current_dir / "tinytorch_flame.txt"
+        
+        if art_file.exists():
+            with open(art_file, 'r') as f:
+                ascii_art = f.read()
+            print(ascii_art)
+            print("Tiny🔥Torch")
+            print("Build ML Systems from Scratch!")
+        else:
+            print("🔥 TinyTorch 🔥")
+            print("Build ML Systems from Scratch!")
+    except NameError:
+        # Handle case when running in notebook where __file__ is not defined
+        try:
+            art_file = Path(os.getcwd()) / "tinytorch_flame.txt"
+            if art_file.exists():
+                with open(art_file, 'r') as f:
+                    ascii_art = f.read()
+                print(ascii_art)
+                print("Tiny🔥Torch")
+                print("Build ML Systems from Scratch!")
+            else:
+                print("🔥 TinyTorch 🔥")
+                print("Build ML Systems from Scratch!")
+        except:
+            print("🔥 TinyTorch 🔥")
+            print("Build ML Systems from Scratch!")
 
 def add_numbers(a, b):
     """Add two numbers together."""
     return a + b
 
+# %% ../../modules/setup/setup_dev.ipynb 8
+class SystemInfo:
+    """
+    Simple system information class.
+    
+    TODO: Implement this class to collect and display system information.
+    """
+    
+    def __init__(self):
+        """
+        Initialize system information collection.
+        
+        TODO: Collect Python version, platform, and machine information.
+        """
+        raise NotImplementedError("Student implementation required")
+    
+    def __str__(self):
+        """
+        Return human-readable system information.
+        
+        TODO: Format system info as a readable string.
+        """
+        raise NotImplementedError("Student implementation required")
+    
+    def is_compatible(self):
+        """
+        Check if system meets minimum requirements.
+        
+        TODO: Check if Python version is >= 3.8
+        """
+        raise NotImplementedError("Student implementation required")
 
-# %% ../../modules/setup/setup_dev.ipynb 6
-import sys
-import platform
-
+# %% ../../modules/setup/setup_dev.ipynb 9
 class SystemInfo:
     """Simple system information class."""
     
@@ -32,3 +108,145 @@ class SystemInfo:
         """Check if system meets minimum requirements."""
         return self.python_version >= (3, 8)
 
+# %% ../../modules/setup/setup_dev.ipynb 13
+class DeveloperProfile:
+    """
+    Developer profile for personalizing TinyTorch experience.
+    
+    TODO: Implement this class to store and display developer information.
+    Default to course instructor but allow students to personalize.
+    """
+    
+    @staticmethod
+    def _load_default_flame():
+        """
+        Load the default TinyTorch flame ASCII art from file.
+        
+        TODO: Implement file loading for tinytorch_flame.txt with fallback.
+        """
+        raise NotImplementedError("Student implementation required")
+    
+    def __init__(self, name="Vijay Janapa Reddi", affiliation="Harvard University", 
+                 email="vj@eecs.harvard.edu", github_username="profvjreddi", ascii_art=None):
+        """
+        Initialize developer profile.
+        
+        TODO: Store developer information with sensible defaults.
+        Students should be able to customize this with their own info and ASCII art.
+        """
+        raise NotImplementedError("Student implementation required")
+    
+    def __str__(self):
+        """
+        Return formatted developer information.
+        
+        TODO: Format developer info as a professional signature with optional ASCII art.
+        """
+        raise NotImplementedError("Student implementation required")
+    
+    def get_signature(self):
+        """
+        Get a short signature for code headers.
+        
+        TODO: Return a concise signature like "Built by Name (@github)"
+        """
+        raise NotImplementedError("Student implementation required")
+    
+    def get_ascii_art(self):
+        """
+        Get ASCII art for the profile.
+        
+        TODO: Return custom ASCII art or default flame loaded from file.
+        """
+        raise NotImplementedError("Student implementation required")
+
+# %% ../../modules/setup/setup_dev.ipynb 14
+class DeveloperProfile:
+    """Developer profile for personalizing TinyTorch experience."""
+    
+    @staticmethod
+    def _load_default_flame():
+        """Load the default TinyTorch flame ASCII art from file."""
+        try:
+            # Try to load from the same directory as this module
+            try:
+                # Try to get the directory of the current file
+                current_dir = os.path.dirname(__file__)
+            except NameError:
+                # If __file__ is not defined (e.g., in notebook), use current directory
+                current_dir = os.getcwd()
+            
+            flame_path = os.path.join(current_dir, 'tinytorch_flame.txt')
+            
+            with open(flame_path, 'r', encoding='utf-8') as f:
+                flame_art = f.read()
+            
+            # Add the Tiny🔥Torch text below the flame
+            return f"""{flame_art}
+                    
+                    Tiny🔥Torch
+            Build ML Systems from Scratch!
+            """
+        except (FileNotFoundError, IOError):
+            # Fallback to simple flame if file not found
+            return """
+    🔥 TinyTorch Developer 🔥
+         .  .  .  .  .  .
+        .    .  .  .  .   .
+       .  .    .  .  .  .  .
+      .  .  .    .  .  .  .  .
+     .  .  .  .    .  .  .  .  .
+    .  .  .  .  .    .  .  .  .  .
+   .  .  .  .  .  .    .  .  .  .  .
+  .  .  .  .  .  .  .    .  .  .  .  .
+ .  .  .  .  .  .  .  .    .  .  .  .  .
+.  .  .  .  .  .  .  .  .    .  .  .  .  .
+ \\  \\  \\  \\  \\  \\  \\  \\  \\  /  /  /  /  /  /
+  \\  \\  \\  \\  \\  \\  \\  \\  /  /  /  /  /  /
+   \\  \\  \\  \\  \\  \\  \\  /  /  /  /  /  /
+    \\  \\  \\  \\  \\  \\  /  /  /  /  /  /
+     \\  \\  \\  \\  \\  /  /  /  /  /  /
+      \\  \\  \\  \\  /  /  /  /  /  /
+       \\  \\  \\  /  /  /  /  /  /
+        \\  \\  /  /  /  /  /  /
+         \\  /  /  /  /  /  /
+          \\/  /  /  /  /  /
+           \\/  /  /  /  /
+            \\/  /  /  /
+             \\/  /  /
+              \\/  /
+               \\/
+                    
+                    Tiny🔥Torch
+            Build ML Systems from Scratch!
+            """
+    
+    def __init__(self, name="Vijay Janapa Reddi", affiliation="Harvard University", 
+                 email="vj@eecs.harvard.edu", github_username="profvjreddi", ascii_art=None):
+        self.name = name
+        self.affiliation = affiliation
+        self.email = email
+        self.github_username = github_username
+        self.ascii_art = ascii_art or self._load_default_flame()
+    
+    def __str__(self):
+        return f"👨‍💻 {self.name} | {self.affiliation} | @{self.github_username}"
+    
+    def get_signature(self):
+        """Get a short signature for code headers."""
+        return f"Built by {self.name} (@{self.github_username})"
+    
+    def get_ascii_art(self):
+        """Get ASCII art for the profile."""
+        return self.ascii_art
+    
+    def get_full_profile(self):
+        """Get complete profile with ASCII art."""
+        return f"""{self.ascii_art}
+        
+👨‍💻 Developer: {self.name}
+🏛️  Affiliation: {self.affiliation}
+📧 Email: {self.email}
+🐙 GitHub: @{self.github_username}
+🔥 Ready to build ML systems from scratch!
+"""