Files
TinyTorch/tinytorch/core/activations.py
Vijay Janapa Reddi 96880b3133 Update tinytorch and tito with module exports
Re-exported all modules after restructuring:
- Updated _modidx.py with new module locations
- Removed outdated autogeneration headers
- Updated all core modules (tensor, autograd, layers, etc.)
- Updated optimization modules (quantization, compression, etc.)
- Updated TITO commands for new structure

Changes include:
- 24 tinytorch/ module files
- 24 tito/ command and core files
- Updated references from modules/source/ to modules/

All modules re-exported via nbdev from their new locations.
2025-11-10 19:42:03 -05:00

258 lines
8.2 KiB
Python
Generated
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/02_activations/activations_dev.ipynb.
# %% auto 0
__all__ = ['Sigmoid', 'ReLU', 'Tanh', 'GELU', 'Softmax']
# %% ../../modules/source/02_activations/activations_dev.ipynb 3
import numpy as np
from typing import Optional
import sys
import os
# Import will be in export cell
# %% ../../modules/source/02_activations/activations_dev.ipynb 8
from .tensor import Tensor
class Sigmoid:
"""
Sigmoid activation: σ(x) = 1/(1 + e^(-x))
Maps any real number to (0, 1) range.
Perfect for probabilities and binary classification.
"""
def forward(self, x: Tensor) -> Tensor:
"""
Apply sigmoid activation element-wise.
TODO: Implement sigmoid function
APPROACH:
1. Apply sigmoid formula: 1 / (1 + exp(-x))
2. Use np.exp for exponential
3. Return result wrapped in new Tensor
EXAMPLE:
>>> sigmoid = Sigmoid()
>>> x = Tensor([-2, 0, 2])
>>> result = sigmoid(x)
>>> print(result.data)
[0.119, 0.5, 0.881] # All values between 0 and 1
HINT: Use np.exp(-x.data) for numerical stability
"""
### BEGIN SOLUTION
# Apply sigmoid: 1 / (1 + exp(-x))
result_data = 1.0 / (1.0 + np.exp(-x.data))
result = Tensor(result_data)
# Track gradients if autograd is enabled and input requires_grad
if SigmoidBackward is not None and x.requires_grad:
result.requires_grad = True
result._grad_fn = SigmoidBackward(x, result)
return result
### END SOLUTION
def __call__(self, x: Tensor) -> Tensor:
"""Allows the activation to be called like a function."""
return self.forward(x)
def backward(self, grad: Tensor) -> Tensor:
"""Compute gradient (implemented in Module 05)."""
pass # Will implement backward pass in Module 05
# %% ../../modules/source/02_activations/activations_dev.ipynb 12
class ReLU:
"""
ReLU activation: f(x) = max(0, x)
Sets negative values to zero, keeps positive values unchanged.
Most popular activation for hidden layers.
"""
def forward(self, x: Tensor) -> Tensor:
"""
Apply ReLU activation element-wise.
TODO: Implement ReLU function
APPROACH:
1. Use np.maximum(0, x.data) for element-wise max with zero
2. Return result wrapped in new Tensor
EXAMPLE:
>>> relu = ReLU()
>>> x = Tensor([-2, -1, 0, 1, 2])
>>> result = relu(x)
>>> print(result.data)
[0, 0, 0, 1, 2] # Negative values become 0, positive unchanged
HINT: np.maximum handles element-wise maximum automatically
"""
### BEGIN SOLUTION
# Apply ReLU: max(0, x)
result = np.maximum(0, x.data)
return Tensor(result)
### END SOLUTION
def __call__(self, x: Tensor) -> Tensor:
"""Allows the activation to be called like a function."""
return self.forward(x)
def backward(self, grad: Tensor) -> Tensor:
"""Compute gradient (implemented in Module 05)."""
pass # Will implement backward pass in Module 05
# %% ../../modules/source/02_activations/activations_dev.ipynb 16
class Tanh:
"""
Tanh activation: f(x) = (e^x - e^(-x))/(e^x + e^(-x))
Maps any real number to (-1, 1) range.
Zero-centered alternative to sigmoid.
"""
def forward(self, x: Tensor) -> Tensor:
"""
Apply tanh activation element-wise.
TODO: Implement tanh function
APPROACH:
1. Use np.tanh(x.data) for hyperbolic tangent
2. Return result wrapped in new Tensor
EXAMPLE:
>>> tanh = Tanh()
>>> x = Tensor([-2, 0, 2])
>>> result = tanh(x)
>>> print(result.data)
[-0.964, 0.0, 0.964] # Range (-1, 1), symmetric around 0
HINT: NumPy provides np.tanh function
"""
### BEGIN SOLUTION
# Apply tanh using NumPy
result = np.tanh(x.data)
return Tensor(result)
### END SOLUTION
def __call__(self, x: Tensor) -> Tensor:
"""Allows the activation to be called like a function."""
return self.forward(x)
def backward(self, grad: Tensor) -> Tensor:
"""Compute gradient (implemented in Module 05)."""
pass # Will implement backward pass in Module 05
# %% ../../modules/source/02_activations/activations_dev.ipynb 20
class GELU:
"""
GELU activation: f(x) = x * Φ(x) ≈ x * Sigmoid(1.702 * x)
Smooth approximation to ReLU, used in modern transformers.
Where Φ(x) is the cumulative distribution function of standard normal.
"""
def forward(self, x: Tensor) -> Tensor:
"""
Apply GELU activation element-wise.
TODO: Implement GELU approximation
APPROACH:
1. Use approximation: x * sigmoid(1.702 * x)
2. Compute sigmoid part: 1 / (1 + exp(-1.702 * x))
3. Multiply by x element-wise
4. Return result wrapped in new Tensor
EXAMPLE:
>>> gelu = GELU()
>>> x = Tensor([-1, 0, 1])
>>> result = gelu(x)
>>> print(result.data)
[-0.159, 0.0, 0.841] # Smooth, like ReLU but differentiable everywhere
HINT: The 1.702 constant comes from √(2/π) approximation
"""
### BEGIN SOLUTION
# GELU approximation: x * sigmoid(1.702 * x)
# First compute sigmoid part
sigmoid_part = 1.0 / (1.0 + np.exp(-1.702 * x.data))
# Then multiply by x
result = x.data * sigmoid_part
return Tensor(result)
### END SOLUTION
def __call__(self, x: Tensor) -> Tensor:
"""Allows the activation to be called like a function."""
return self.forward(x)
def backward(self, grad: Tensor) -> Tensor:
"""Compute gradient (implemented in Module 05)."""
pass # Will implement backward pass in Module 05
# %% ../../modules/source/02_activations/activations_dev.ipynb 24
class Softmax:
"""
Softmax activation: f(x_i) = e^(x_i) / Σ(e^(x_j))
Converts any vector to a probability distribution.
Sum of all outputs equals 1.0.
"""
def forward(self, x: Tensor, dim: int = -1) -> Tensor:
"""
Apply softmax activation along specified dimension.
TODO: Implement numerically stable softmax
APPROACH:
1. Subtract max for numerical stability: x - max(x)
2. Compute exponentials: exp(x - max(x))
3. Sum along dimension: sum(exp_values)
4. Divide: exp_values / sum
5. Return result wrapped in new Tensor
EXAMPLE:
>>> softmax = Softmax()
>>> x = Tensor([1, 2, 3])
>>> result = softmax(x)
>>> print(result.data)
[0.090, 0.245, 0.665] # Sums to 1.0, larger inputs get higher probability
HINTS:
- Use np.max(x.data, axis=dim, keepdims=True) for max
- Use np.sum(exp_values, axis=dim, keepdims=True) for sum
- The max subtraction prevents overflow in exponentials
"""
### BEGIN SOLUTION
# Numerical stability: subtract max to prevent overflow
# Use Tensor operations to preserve gradient flow!
x_max_data = np.max(x.data, axis=dim, keepdims=True)
x_max = Tensor(x_max_data, requires_grad=False) # max is not differentiable in this context
x_shifted = x - x_max # Tensor subtraction!
# Compute exponentials (NumPy operation, but wrapped in Tensor)
exp_values = Tensor(np.exp(x_shifted.data), requires_grad=x_shifted.requires_grad)
# Sum along dimension (Tensor operation)
exp_sum_data = np.sum(exp_values.data, axis=dim, keepdims=True)
exp_sum = Tensor(exp_sum_data, requires_grad=exp_values.requires_grad)
# Normalize to get probabilities (Tensor division!)
result = exp_values / exp_sum
return result
### END SOLUTION
def __call__(self, x: Tensor, dim: int = -1) -> Tensor:
"""Allows the activation to be called like a function."""
return self.forward(x, dim)
def backward(self, grad: Tensor) -> Tensor:
"""Compute gradient (implemented in Module 05)."""
pass # Will implement backward pass in Module 05