mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-04-28 23:18:22 -05:00
- Added package structure documentation explaining modules/source/ vs tinytorch.core. - Enhanced mathematical foundations with linear algebra refresher and Universal Approximation Theorem - Added real-world applications for each activation function (ReLU, Sigmoid, Tanh, Softmax) - Included mathematical properties, derivatives, ranges, and computational costs - Added performance considerations and numerical stability explanations - Connected to production ML systems (PyTorch, TensorFlow, JAX equivalents) - Implemented streamlined 'tito export' command with automatic .py → .ipynb conversion - All functionality preserved: scripts run correctly, tests pass, package integration works - Ready to continue with remaining modules (layers, networks, cnn, dataloader)
247 lines
8.2 KiB
Python
247 lines
8.2 KiB
Python
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/02_activations/activations_dev.ipynb.
|
|
|
|
# %% auto 0
|
|
__all__ = ['visualize_activation_function', 'visualize_activation_on_data', 'ReLU', 'Sigmoid', 'Tanh', 'Softmax']
|
|
|
|
# %% ../../modules/source/02_activations/activations_dev.ipynb 1
|
|
import math
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
import os
|
|
import sys
|
|
from typing import Union, List
|
|
|
|
# Import our Tensor class - try from package first, then from local module
|
|
try:
|
|
from tinytorch.core.tensor import Tensor
|
|
except ImportError:
|
|
# For development, import from local tensor module
|
|
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor'))
|
|
from tensor_dev import Tensor
|
|
|
|
# %% ../../modules/source/02_activations/activations_dev.ipynb 2
|
|
def _should_show_plots():
|
|
"""Check if we should show plots (disable during testing)"""
|
|
# Check multiple conditions that indicate we're in test mode
|
|
is_pytest = (
|
|
'pytest' in sys.modules or
|
|
'test' in sys.argv or
|
|
os.environ.get('PYTEST_CURRENT_TEST') is not None or
|
|
any('test' in arg for arg in sys.argv) or
|
|
any('pytest' in arg for arg in sys.argv)
|
|
)
|
|
|
|
# Show plots in development mode (when not in test mode)
|
|
return not is_pytest
|
|
|
|
# %% ../../modules/source/02_activations/activations_dev.ipynb 3
|
|
def visualize_activation_function(activation_fn, name: str, x_range: tuple = (-5, 5), num_points: int = 100):
|
|
"""Visualize an activation function's behavior"""
|
|
if not _should_show_plots():
|
|
return
|
|
|
|
try:
|
|
|
|
# Generate input values
|
|
x_vals = np.linspace(x_range[0], x_range[1], num_points)
|
|
|
|
# Apply activation function
|
|
y_vals = []
|
|
for x in x_vals:
|
|
input_tensor = Tensor([[x]])
|
|
output = activation_fn(input_tensor)
|
|
y_vals.append(output.data.item())
|
|
|
|
# Create plot
|
|
plt.figure(figsize=(10, 6))
|
|
plt.plot(x_vals, y_vals, 'b-', linewidth=2, label=f'{name} Activation')
|
|
plt.grid(True, alpha=0.3)
|
|
plt.xlabel('Input (x)')
|
|
plt.ylabel(f'{name}(x)')
|
|
plt.title(f'{name} Activation Function')
|
|
plt.legend()
|
|
plt.show()
|
|
|
|
except ImportError:
|
|
print(" 📊 Matplotlib not available - skipping visualization")
|
|
except Exception as e:
|
|
print(f" ⚠️ Visualization error: {e}")
|
|
|
|
def visualize_activation_on_data(activation_fn, name: str, data: Tensor):
|
|
"""Show activation function applied to sample data"""
|
|
if not _should_show_plots():
|
|
return
|
|
|
|
try:
|
|
output = activation_fn(data)
|
|
print(f" 📊 {name} Example:")
|
|
print(f" Input: {data.data.flatten()}")
|
|
print(f" Output: {output.data.flatten()}")
|
|
print(f" Range: [{output.data.min():.3f}, {output.data.max():.3f}]")
|
|
|
|
except Exception as e:
|
|
print(f" ⚠️ Data visualization error: {e}")
|
|
|
|
# %% ../../modules/source/02_activations/activations_dev.ipynb 6
|
|
class ReLU:
|
|
"""
|
|
ReLU Activation Function: f(x) = max(0, x)
|
|
|
|
The most popular activation function in deep learning.
|
|
Simple, fast, and effective for most applications.
|
|
"""
|
|
|
|
def forward(self, x: Tensor) -> Tensor:
|
|
"""
|
|
Apply ReLU activation: f(x) = max(0, x)
|
|
|
|
TODO: Implement ReLU activation
|
|
|
|
APPROACH:
|
|
1. For each element in the input tensor, apply max(0, element)
|
|
2. Return a new Tensor with the results
|
|
|
|
EXAMPLE:
|
|
Input: Tensor([[-1, 0, 1, 2, -3]])
|
|
Expected: Tensor([[0, 0, 1, 2, 0]])
|
|
|
|
HINTS:
|
|
- Use np.maximum(0, x.data) for element-wise max
|
|
- Remember to return a new Tensor object
|
|
- The shape should remain the same as input
|
|
"""
|
|
### BEGIN SOLUTION
|
|
result = np.maximum(0, x.data)
|
|
return Tensor(result)
|
|
### END SOLUTION
|
|
|
|
def __call__(self, x: Tensor) -> Tensor:
|
|
"""Make the class callable: relu(x) instead of relu.forward(x)"""
|
|
return self.forward(x)
|
|
|
|
# %% ../../modules/source/02_activations/activations_dev.ipynb 8
|
|
class Sigmoid:
|
|
"""
|
|
Sigmoid Activation Function: f(x) = 1 / (1 + e^(-x))
|
|
|
|
Smooth S-shaped function that squashes inputs to (0, 1).
|
|
Useful for binary classification and probabilistic outputs.
|
|
"""
|
|
|
|
def forward(self, x: Tensor) -> Tensor:
|
|
"""
|
|
Apply Sigmoid activation: f(x) = 1 / (1 + e^(-x))
|
|
|
|
TODO: Implement Sigmoid activation with numerical stability
|
|
|
|
APPROACH:
|
|
1. Clip input values to prevent overflow (e.g., between -500 and 500)
|
|
2. Apply the sigmoid formula: 1 / (1 + exp(-x))
|
|
3. Return a new Tensor with the results
|
|
|
|
EXAMPLE:
|
|
Input: Tensor([[-2, 0, 2]])
|
|
Expected: Tensor([[0.119, 0.5, 0.881]]) (approximately)
|
|
|
|
HINTS:
|
|
- Use np.clip(x.data, -500, 500) for numerical stability
|
|
- Use np.exp() for the exponential function
|
|
- Be careful with very large/small inputs to avoid overflow
|
|
"""
|
|
### BEGIN SOLUTION
|
|
# Clip for numerical stability
|
|
clipped = np.clip(x.data, -500, 500)
|
|
result = 1 / (1 + np.exp(-clipped))
|
|
return Tensor(result)
|
|
### END SOLUTION
|
|
|
|
def __call__(self, x: Tensor) -> Tensor:
|
|
"""Make the class callable: sigmoid(x) instead of sigmoid.forward(x)"""
|
|
return self.forward(x)
|
|
|
|
# %% ../../modules/source/02_activations/activations_dev.ipynb 10
|
|
class Tanh:
|
|
"""
|
|
Tanh Activation Function: f(x) = tanh(x)
|
|
|
|
Zero-centered S-shaped function that squashes inputs to (-1, 1).
|
|
Better than sigmoid for hidden layers due to zero-centered outputs.
|
|
"""
|
|
|
|
def forward(self, x: Tensor) -> Tensor:
|
|
"""
|
|
Apply Tanh activation: f(x) = tanh(x)
|
|
|
|
TODO: Implement Tanh activation
|
|
|
|
APPROACH:
|
|
1. Use NumPy's tanh function for numerical stability
|
|
2. Apply to the tensor data
|
|
3. Return a new Tensor with the results
|
|
|
|
EXAMPLE:
|
|
Input: Tensor([[-2, 0, 2]])
|
|
Expected: Tensor([[-0.964, 0.0, 0.964]]) (approximately)
|
|
|
|
HINTS:
|
|
- Use np.tanh(x.data) - NumPy handles the math
|
|
- Much simpler than implementing the formula manually
|
|
- NumPy's tanh is numerically stable
|
|
"""
|
|
### BEGIN SOLUTION
|
|
result = np.tanh(x.data)
|
|
return Tensor(result)
|
|
### END SOLUTION
|
|
|
|
def __call__(self, x: Tensor) -> Tensor:
|
|
"""Make the class callable: tanh(x) instead of tanh.forward(x)"""
|
|
return self.forward(x)
|
|
|
|
# %% ../../modules/source/02_activations/activations_dev.ipynb 12
|
|
class Softmax:
|
|
"""
|
|
Softmax Activation Function: f(x_i) = e^(x_i) / Σ(e^(x_j))
|
|
|
|
Converts a vector of numbers into a probability distribution.
|
|
Essential for multi-class classification and attention mechanisms.
|
|
"""
|
|
|
|
def forward(self, x: Tensor) -> Tensor:
|
|
"""
|
|
Apply Softmax activation: f(x_i) = e^(x_i) / Σ(e^(x_j))
|
|
|
|
TODO: Implement Softmax activation with numerical stability
|
|
|
|
APPROACH:
|
|
1. Subtract max value from inputs for numerical stability
|
|
2. Compute exponentials: e^(x_i - max)
|
|
3. Divide by sum of exponentials
|
|
4. Return a new Tensor with the results
|
|
|
|
EXAMPLE:
|
|
Input: Tensor([[1, 2, 3]])
|
|
Expected: Tensor([[0.09, 0.24, 0.67]]) (approximately, sums to 1)
|
|
|
|
HINTS:
|
|
- Use np.max(x.data, axis=-1, keepdims=True) for stability
|
|
- Use np.exp() for exponentials
|
|
- Use np.sum() for the denominator
|
|
- Make sure the result sums to 1 along the last axis
|
|
"""
|
|
### BEGIN SOLUTION
|
|
# Subtract max for numerical stability
|
|
x_max = np.max(x.data, axis=-1, keepdims=True)
|
|
x_shifted = x.data - x_max
|
|
|
|
# Compute softmax
|
|
exp_x = np.exp(x_shifted)
|
|
sum_exp = np.sum(exp_x, axis=-1, keepdims=True)
|
|
result = exp_x / sum_exp
|
|
|
|
return Tensor(result)
|
|
### END SOLUTION
|
|
|
|
def __call__(self, x: Tensor) -> Tensor:
|
|
"""Make the class callable: softmax(x) instead of softmax.forward(x)"""
|
|
return self.forward(x)
|