mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-05-10 16:38:39 -05:00
🎯 Issues Fixed: 1. MLP Architecture: Convert from function to proper class with .network, .input_size attributes 2. Polymorphic Layers: Updated Dense and Activations in exported package to preserve input types 3. Design Decision: Remove default output activation from MLP (test expects 3 layers, not 4) ✅ Impact: 04_networks external tests now pass 25/25 (was 18/25) 🔧 Technical Changes: - Convert MLP function → MLP class with attributes and .network property - Fix tinytorch.core.layers.Dense to use type(x)(result) instead of Tensor(result) - Fix tinytorch.core.activations (ReLU/Sigmoid/Tanh/Softmax) for polymorphic behavior - Set output_activation=None default for general-purpose MLP - All layers/activations now work with MockTensor for better testability This makes the networks module fully compatible with external testing frameworks and provides proper OOP design for MLP.
247 lines
8.2 KiB
Python
247 lines
8.2 KiB
Python
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/02_activations/activations_dev.ipynb.
|
|
|
|
# %% auto 0
|
|
__all__ = ['visualize_activation_function', 'visualize_activation_on_data', 'ReLU', 'Sigmoid', 'Tanh', 'Softmax']
|
|
|
|
# %% ../../modules/source/02_activations/activations_dev.ipynb 1
|
|
import math
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
import os
|
|
import sys
|
|
from typing import Union, List
|
|
|
|
# Import our Tensor class - try from package first, then from local module
|
|
try:
|
|
from tinytorch.core.tensor import Tensor
|
|
except ImportError:
|
|
# For development, import from local tensor module
|
|
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor'))
|
|
from tensor_dev import Tensor
|
|
|
|
# %% ../../modules/source/02_activations/activations_dev.ipynb 2
|
|
def _should_show_plots():
|
|
"""Check if we should show plots (disable during testing)"""
|
|
# Check multiple conditions that indicate we're in test mode
|
|
is_pytest = (
|
|
'pytest' in sys.modules or
|
|
'test' in sys.argv or
|
|
os.environ.get('PYTEST_CURRENT_TEST') is not None or
|
|
any('test' in arg for arg in sys.argv) or
|
|
any('pytest' in arg for arg in sys.argv)
|
|
)
|
|
|
|
# Show plots in development mode (when not in test mode)
|
|
return not is_pytest
|
|
|
|
# %% ../../modules/source/02_activations/activations_dev.ipynb 3
|
|
def visualize_activation_function(activation_fn, name: str, x_range: tuple = (-5, 5), num_points: int = 100):
|
|
"""Visualize an activation function's behavior"""
|
|
if not _should_show_plots():
|
|
return
|
|
|
|
try:
|
|
|
|
# Generate input values
|
|
x_vals = np.linspace(x_range[0], x_range[1], num_points)
|
|
|
|
# Apply activation function
|
|
y_vals = []
|
|
for x in x_vals:
|
|
input_tensor = Tensor([[x]])
|
|
output = activation_fn(input_tensor)
|
|
y_vals.append(output.data.item())
|
|
|
|
# Create plot
|
|
plt.figure(figsize=(10, 6))
|
|
plt.plot(x_vals, y_vals, 'b-', linewidth=2, label=f'{name} Activation')
|
|
plt.grid(True, alpha=0.3)
|
|
plt.xlabel('Input (x)')
|
|
plt.ylabel(f'{name}(x)')
|
|
plt.title(f'{name} Activation Function')
|
|
plt.legend()
|
|
plt.show()
|
|
|
|
except ImportError:
|
|
print(" 📊 Matplotlib not available - skipping visualization")
|
|
except Exception as e:
|
|
print(f" ⚠️ Visualization error: {e}")
|
|
|
|
def visualize_activation_on_data(activation_fn, name: str, data: Tensor):
|
|
"""Show activation function applied to sample data"""
|
|
if not _should_show_plots():
|
|
return
|
|
|
|
try:
|
|
output = activation_fn(data)
|
|
print(f" 📊 {name} Example:")
|
|
print(f" Input: {data.data.flatten()}")
|
|
print(f" Output: {output.data.flatten()}")
|
|
print(f" Range: [{output.data.min():.3f}, {output.data.max():.3f}]")
|
|
|
|
except Exception as e:
|
|
print(f" ⚠️ Data visualization error: {e}")
|
|
|
|
# %% ../../modules/source/02_activations/activations_dev.ipynb 8
|
|
class ReLU:
|
|
"""
|
|
ReLU Activation Function: f(x) = max(0, x)
|
|
|
|
The most popular activation function in deep learning.
|
|
Simple, fast, and effective for most applications.
|
|
"""
|
|
|
|
def forward(self, x: Tensor) -> Tensor:
|
|
"""
|
|
Apply ReLU activation: f(x) = max(0, x)
|
|
|
|
TODO: Implement ReLU activation
|
|
|
|
APPROACH:
|
|
1. For each element in the input tensor, apply max(0, element)
|
|
2. Return a new Tensor with the results
|
|
|
|
EXAMPLE:
|
|
Input: Tensor([[-1, 0, 1, 2, -3]])
|
|
Expected: Tensor([[0, 0, 1, 2, 0]])
|
|
|
|
HINTS:
|
|
- Use np.maximum(0, x.data) for element-wise max
|
|
- Remember to return a new Tensor object
|
|
- The shape should remain the same as input
|
|
"""
|
|
### BEGIN SOLUTION
|
|
result = np.maximum(0, x.data)
|
|
return type(x)(result)
|
|
### END SOLUTION
|
|
|
|
def __call__(self, x: Tensor) -> Tensor:
|
|
"""Make the class callable: relu(x) instead of relu.forward(x)"""
|
|
return self.forward(x)
|
|
|
|
# %% ../../modules/source/02_activations/activations_dev.ipynb 12
|
|
class Sigmoid:
|
|
"""
|
|
Sigmoid Activation Function: f(x) = 1 / (1 + e^(-x))
|
|
|
|
Smooth S-shaped function that squashes inputs to (0, 1).
|
|
Useful for binary classification and probabilistic outputs.
|
|
"""
|
|
|
|
def forward(self, x: Tensor) -> Tensor:
|
|
"""
|
|
Apply Sigmoid activation: f(x) = 1 / (1 + e^(-x))
|
|
|
|
TODO: Implement Sigmoid activation with numerical stability
|
|
|
|
APPROACH:
|
|
1. Clip input values to prevent overflow (e.g., between -500 and 500)
|
|
2. Apply the sigmoid formula: 1 / (1 + exp(-x))
|
|
3. Return a new Tensor with the results
|
|
|
|
EXAMPLE:
|
|
Input: Tensor([[-2, 0, 2]])
|
|
Expected: Tensor([[0.119, 0.5, 0.881]]) (approximately)
|
|
|
|
HINTS:
|
|
- Use np.clip(x.data, -500, 500) for numerical stability
|
|
- Use np.exp() for the exponential function
|
|
- Be careful with very large/small inputs to avoid overflow
|
|
"""
|
|
### BEGIN SOLUTION
|
|
# Clip for numerical stability
|
|
clipped = np.clip(x.data, -500, 500)
|
|
result = 1 / (1 + np.exp(-clipped))
|
|
return type(x)(result)
|
|
### END SOLUTION
|
|
|
|
def __call__(self, x: Tensor) -> Tensor:
|
|
"""Make the class callable: sigmoid(x) instead of sigmoid.forward(x)"""
|
|
return self.forward(x)
|
|
|
|
# %% ../../modules/source/02_activations/activations_dev.ipynb 16
|
|
class Tanh:
|
|
"""
|
|
Tanh Activation Function: f(x) = tanh(x)
|
|
|
|
Zero-centered S-shaped function that squashes inputs to (-1, 1).
|
|
Better than sigmoid for hidden layers due to zero-centered outputs.
|
|
"""
|
|
|
|
def forward(self, x: Tensor) -> Tensor:
|
|
"""
|
|
Apply Tanh activation: f(x) = tanh(x)
|
|
|
|
TODO: Implement Tanh activation
|
|
|
|
APPROACH:
|
|
1. Use NumPy's tanh function for numerical stability
|
|
2. Apply to the tensor data
|
|
3. Return a new Tensor with the results
|
|
|
|
EXAMPLE:
|
|
Input: Tensor([[-2, 0, 2]])
|
|
Expected: Tensor([[-0.964, 0.0, 0.964]]) (approximately)
|
|
|
|
HINTS:
|
|
- Use np.tanh(x.data) - NumPy handles the math
|
|
- Much simpler than implementing the formula manually
|
|
- NumPy's tanh is numerically stable
|
|
"""
|
|
### BEGIN SOLUTION
|
|
result = np.tanh(x.data)
|
|
return type(x)(result)
|
|
### END SOLUTION
|
|
|
|
def __call__(self, x: Tensor) -> Tensor:
|
|
"""Make the class callable: tanh(x) instead of tanh.forward(x)"""
|
|
return self.forward(x)
|
|
|
|
# %% ../../modules/source/02_activations/activations_dev.ipynb 20
|
|
class Softmax:
|
|
"""
|
|
Softmax Activation Function: f(x_i) = e^(x_i) / Σ(e^(x_j))
|
|
|
|
Converts a vector of numbers into a probability distribution.
|
|
Essential for multi-class classification and attention mechanisms.
|
|
"""
|
|
|
|
def forward(self, x: Tensor) -> Tensor:
|
|
"""
|
|
Apply Softmax activation: f(x_i) = e^(x_i) / Σ(e^(x_j))
|
|
|
|
TODO: Implement Softmax activation with numerical stability
|
|
|
|
APPROACH:
|
|
1. Subtract max value from inputs for numerical stability
|
|
2. Compute exponentials: e^(x_i - max)
|
|
3. Divide by sum of exponentials
|
|
4. Return a new Tensor with the results
|
|
|
|
EXAMPLE:
|
|
Input: Tensor([[1, 2, 3]])
|
|
Expected: Tensor([[0.09, 0.24, 0.67]]) (approximately, sums to 1)
|
|
|
|
HINTS:
|
|
- Use np.max(x.data, axis=-1, keepdims=True) for stability
|
|
- Use np.exp() for exponentials
|
|
- Use np.sum() for the denominator
|
|
- Make sure the result sums to 1 along the last axis
|
|
"""
|
|
### BEGIN SOLUTION
|
|
# Subtract max for numerical stability
|
|
x_max = np.max(x.data, axis=-1, keepdims=True)
|
|
x_shifted = x.data - x_max
|
|
|
|
# Compute softmax
|
|
exp_x = np.exp(x_shifted)
|
|
sum_exp = np.sum(exp_x, axis=-1, keepdims=True)
|
|
result = exp_x / sum_exp
|
|
|
|
return type(x)(result)
|
|
### END SOLUTION
|
|
|
|
def __call__(self, x: Tensor) -> Tensor:
|
|
"""Make the class callable: softmax(x) instead of softmax.forward(x)"""
|
|
return self.forward(x)
|