Files
TinyTorch/tinytorch/core/activations.py
Vijay Janapa Reddi d4d6277604 🔧 Complete module restructuring and integration fixes
📦 Module File Organization:
- Renamed networks_dev.py → dense_dev.py in 05_dense module
- Renamed cnn_dev.py → spatial_dev.py in 06_spatial module
- Added new 07_attention module with attention_dev.py
- Updated module.yaml files to reference correct filenames
- Updated #| default_exp directives for proper package exports

🔄 Core Package Updates:
- Added tinytorch.core.dense (Sequential, MLP architectures)
- Added tinytorch.core.spatial (Conv2D, pooling operations)
- Added tinytorch.core.attention (self-attention mechanisms)
- Updated all core modules with latest implementations
- Fixed tensor assignment issues in compression module

🧪 Test Integration Fixes:
- Updated integration tests to use correct module imports
- Fixed tensor activation tests for new module structure
- Ensured compatibility with renamed components
- Maintained 100% individual module test success rate

Result: Complete 14-module TinyTorch framework with proper organization,
working integrations, and comprehensive test coverage ready for production use.
2025-07-18 02:10:49 -04:00

261 lines
9.0 KiB
Python

# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/03_activations/activations_dev.ipynb.
# %% auto 0
__all__ = ['ReLU', 'Sigmoid', 'Tanh', 'Softmax']
# %% ../../modules/source/03_activations/activations_dev.ipynb 1
import math
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
from typing import Union, List
# Import our Tensor class - try from package first, then from local module
try:
from tinytorch.core.tensor import Tensor
except ImportError:
# For development, import from local tensor module
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor'))
from tensor_dev import Tensor
# %% ../../modules/source/03_activations/activations_dev.ipynb 2
def _should_show_plots():
"""Check if we should show plots (disable during testing)"""
# Check multiple conditions that indicate we're in test mode
is_pytest = (
'pytest' in sys.modules or
'test' in sys.argv or
os.environ.get('PYTEST_CURRENT_TEST') is not None or
any('test' in arg for arg in sys.argv) or
any('pytest' in arg for arg in sys.argv)
)
# Show plots in development mode (when not in test mode)
return not is_pytest
# %% ../../modules/source/03_activations/activations_dev.ipynb 7
class ReLU:
"""
ReLU Activation Function: f(x) = max(0, x)
The most popular activation function in deep learning.
Simple, fast, and effective for most applications.
"""
def forward(self, x):
"""
Apply ReLU activation: f(x) = max(0, x)
TODO: Implement ReLU activation function.
STEP-BY-STEP IMPLEMENTATION:
1. For each element in the input tensor, apply max(0, element)
2. Use NumPy's maximum function for efficient element-wise operation
3. Return a new tensor of the same type with the results
4. Preserve the input tensor's shape
EXAMPLE USAGE:
```python
relu = ReLU()
input_tensor = Tensor([[-2, -1, 0, 1, 2]])
output = relu(input_tensor)
print(output.data) # [[0, 0, 0, 1, 2]]
```
IMPLEMENTATION HINTS:
- Use np.maximum(0, x.data) for element-wise max with 0
- Return the same type as input: return type(x)(result)
- The shape should remain the same as input
- Don't modify the input tensor (immutable operations)
LEARNING CONNECTIONS:
- This is like torch.nn.ReLU() in PyTorch
- Used in virtually every modern neural network
- Enables deep networks by preventing vanishing gradients
- Creates sparse representations (many zeros)
"""
### BEGIN SOLUTION
result = np.maximum(0, x.data)
return type(x)(result)
### END SOLUTION
def __call__(self, x):
"""Make the class callable: relu(x) instead of relu.forward(x)"""
return self.forward(x)
# %% ../../modules/source/03_activations/activations_dev.ipynb 11
class Sigmoid:
"""
Sigmoid Activation Function: f(x) = 1 / (1 + e^(-x))
Maps any real number to the range (0, 1).
Useful for binary classification and probability outputs.
"""
def forward(self, x):
"""
Apply Sigmoid activation: f(x) = 1 / (1 + e^(-x))
TODO: Implement Sigmoid activation function.
STEP-BY-STEP IMPLEMENTATION:
1. Compute the negative of input: -x.data
2. Compute the exponential: np.exp(-x.data)
3. Add 1 to the exponential: 1 + np.exp(-x.data)
4. Take the reciprocal: 1 / (1 + np.exp(-x.data))
5. Return as new Tensor
EXAMPLE USAGE:
```python
sigmoid = Sigmoid()
input_tensor = Tensor([[-2, -1, 0, 1, 2]])
output = sigmoid(input_tensor)
print(output.data) # [[0.119, 0.269, 0.5, 0.731, 0.881]]
```
IMPLEMENTATION HINTS:
- Use np.exp() for exponential function
- Formula: 1 / (1 + np.exp(-x.data))
- Handle potential overflow with np.clip(-x.data, -500, 500)
- Return Tensor(result)
LEARNING CONNECTIONS:
- This is like torch.nn.Sigmoid() in PyTorch
- Used in binary classification output layers
- Key component in LSTM and GRU gating mechanisms
- Historically important for early neural networks
"""
### BEGIN SOLUTION
# Clip to prevent overflow
clipped_input = np.clip(-x.data, -500, 500)
result = 1 / (1 + np.exp(clipped_input))
return type(x)(result)
### END SOLUTION
def __call__(self, x):
"""Make the class callable: sigmoid(x) instead of sigmoid.forward(x)"""
return self.forward(x)
# %% ../../modules/source/03_activations/activations_dev.ipynb 15
class Tanh:
"""
Tanh Activation Function: f(x) = (e^x - e^(-x)) / (e^x + e^(-x))
Zero-centered activation function with range (-1, 1).
Better gradient properties than sigmoid.
"""
def forward(self, x: Tensor) -> Tensor:
"""
Apply Tanh activation: f(x) = (e^x - e^(-x)) / (e^x + e^(-x))
TODO: Implement Tanh activation function.
STEP-BY-STEP IMPLEMENTATION:
1. Use NumPy's built-in tanh function: np.tanh(x.data)
2. Alternatively, implement manually:
- Compute e^x and e^(-x)
- Calculate (e^x - e^(-x)) / (e^x + e^(-x))
3. Return as new Tensor
EXAMPLE USAGE:
```python
tanh = Tanh()
input_tensor = Tensor([[-2, -1, 0, 1, 2]])
output = tanh(input_tensor)
print(output.data) # [[-0.964, -0.762, 0, 0.762, 0.964]]
```
IMPLEMENTATION HINTS:
- Use np.tanh(x.data) for simplicity
- Manual implementation: (np.exp(x.data) - np.exp(-x.data)) / (np.exp(x.data) + np.exp(-x.data))
- Handle overflow by clipping inputs: np.clip(x.data, -500, 500)
- Return Tensor(result)
LEARNING CONNECTIONS:
- This is like torch.nn.Tanh() in PyTorch
- Used in RNN, LSTM, and GRU cells
- Better than sigmoid for hidden layers
- Zero-centered outputs help with gradient flow
"""
### BEGIN SOLUTION
# Use NumPy's built-in tanh function
result = np.tanh(x.data)
return type(x)(result)
### END SOLUTION
def __call__(self, x: Tensor) -> Tensor:
"""Make the class callable: tanh(x) instead of tanh.forward(x)"""
return self.forward(x)
# %% ../../modules/source/03_activations/activations_dev.ipynb 19
class Softmax:
"""
Softmax Activation Function: f(x_i) = e^(x_i) / Σ(e^(x_j))
Converts a vector of real numbers into a probability distribution.
Essential for multi-class classification.
"""
def forward(self, x):
"""
Apply Softmax activation: f(x_i) = e^(x_i) / Σ(e^(x_j))
TODO: Implement Softmax activation function.
STEP-BY-STEP IMPLEMENTATION:
1. Handle empty input case
2. Subtract max value for numerical stability: x - max(x)
3. Compute exponentials: np.exp(x - max(x))
4. Compute sum of exponentials: np.sum(exp_values)
5. Divide each exponential by the sum: exp_values / sum
6. Return as same tensor type as input
EXAMPLE USAGE:
```python
softmax = Softmax()
input_tensor = Tensor([[1, 2, 3]])
output = softmax(input_tensor)
print(output.data) # [[0.09, 0.24, 0.67]]
print(np.sum(output.data)) # 1.0
```
IMPLEMENTATION HINTS:
- Handle empty case: if x.data.size == 0: return type(x)(x.data.copy())
- Subtract max for numerical stability: x_shifted = x.data - np.max(x.data, axis=-1, keepdims=True)
- Compute exponentials: exp_values = np.exp(x_shifted)
- Sum along last axis: sum_exp = np.sum(exp_values, axis=-1, keepdims=True)
- Divide: result = exp_values / sum_exp
- Return same type as input: return type(x)(result)
LEARNING CONNECTIONS:
- This is like torch.nn.Softmax() in PyTorch
- Used in classification output layers
- Key component in attention mechanisms
- Enables probability-based decision making
"""
### BEGIN SOLUTION
# Handle empty input
if x.data.size == 0:
return type(x)(x.data.copy())
# Subtract max for numerical stability
x_shifted = x.data - np.max(x.data, axis=-1, keepdims=True)
# Compute exponentials
exp_values = np.exp(x_shifted)
# Sum along last axis
sum_exp = np.sum(exp_values, axis=-1, keepdims=True)
# Divide to get probabilities
result = exp_values / sum_exp
return type(x)(result)
### END SOLUTION
def __call__(self, x):
"""Make the class callable: softmax(x) instead of softmax.forward(x)"""
return self.forward(x)