mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-05-10 16:38:39 -05:00
📦 Module File Organization: - Renamed networks_dev.py → dense_dev.py in 05_dense module - Renamed cnn_dev.py → spatial_dev.py in 06_spatial module - Added new 07_attention module with attention_dev.py - Updated module.yaml files to reference correct filenames - Updated #| default_exp directives for proper package exports 🔄 Core Package Updates: - Added tinytorch.core.dense (Sequential, MLP architectures) - Added tinytorch.core.spatial (Conv2D, pooling operations) - Added tinytorch.core.attention (self-attention mechanisms) - Updated all core modules with latest implementations - Fixed tensor assignment issues in compression module 🧪 Test Integration Fixes: - Updated integration tests to use correct module imports - Fixed tensor activation tests for new module structure - Ensured compatibility with renamed components - Maintained 100% individual module test success rate Result: Complete 14-module TinyTorch framework with proper organization, working integrations, and comprehensive test coverage ready for production use.
261 lines
9.0 KiB
Python
261 lines
9.0 KiB
Python
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/03_activations/activations_dev.ipynb.
|
|
|
|
# %% auto 0
|
|
__all__ = ['ReLU', 'Sigmoid', 'Tanh', 'Softmax']
|
|
|
|
# %% ../../modules/source/03_activations/activations_dev.ipynb 1
|
|
import math
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
import os
|
|
import sys
|
|
from typing import Union, List
|
|
|
|
# Import our Tensor class - try from package first, then from local module
|
|
try:
|
|
from tinytorch.core.tensor import Tensor
|
|
except ImportError:
|
|
# For development, import from local tensor module
|
|
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor'))
|
|
from tensor_dev import Tensor
|
|
|
|
# %% ../../modules/source/03_activations/activations_dev.ipynb 2
|
|
def _should_show_plots():
|
|
"""Check if we should show plots (disable during testing)"""
|
|
# Check multiple conditions that indicate we're in test mode
|
|
is_pytest = (
|
|
'pytest' in sys.modules or
|
|
'test' in sys.argv or
|
|
os.environ.get('PYTEST_CURRENT_TEST') is not None or
|
|
any('test' in arg for arg in sys.argv) or
|
|
any('pytest' in arg for arg in sys.argv)
|
|
)
|
|
|
|
# Show plots in development mode (when not in test mode)
|
|
return not is_pytest
|
|
|
|
# %% ../../modules/source/03_activations/activations_dev.ipynb 7
|
|
class ReLU:
|
|
"""
|
|
ReLU Activation Function: f(x) = max(0, x)
|
|
|
|
The most popular activation function in deep learning.
|
|
Simple, fast, and effective for most applications.
|
|
"""
|
|
|
|
def forward(self, x):
|
|
"""
|
|
Apply ReLU activation: f(x) = max(0, x)
|
|
|
|
TODO: Implement ReLU activation function.
|
|
|
|
STEP-BY-STEP IMPLEMENTATION:
|
|
1. For each element in the input tensor, apply max(0, element)
|
|
2. Use NumPy's maximum function for efficient element-wise operation
|
|
3. Return a new tensor of the same type with the results
|
|
4. Preserve the input tensor's shape
|
|
|
|
EXAMPLE USAGE:
|
|
```python
|
|
relu = ReLU()
|
|
input_tensor = Tensor([[-2, -1, 0, 1, 2]])
|
|
output = relu(input_tensor)
|
|
print(output.data) # [[0, 0, 0, 1, 2]]
|
|
```
|
|
|
|
IMPLEMENTATION HINTS:
|
|
- Use np.maximum(0, x.data) for element-wise max with 0
|
|
- Return the same type as input: return type(x)(result)
|
|
- The shape should remain the same as input
|
|
- Don't modify the input tensor (immutable operations)
|
|
|
|
LEARNING CONNECTIONS:
|
|
- This is like torch.nn.ReLU() in PyTorch
|
|
- Used in virtually every modern neural network
|
|
- Enables deep networks by preventing vanishing gradients
|
|
- Creates sparse representations (many zeros)
|
|
"""
|
|
### BEGIN SOLUTION
|
|
result = np.maximum(0, x.data)
|
|
return type(x)(result)
|
|
### END SOLUTION
|
|
|
|
def __call__(self, x):
|
|
"""Make the class callable: relu(x) instead of relu.forward(x)"""
|
|
return self.forward(x)
|
|
|
|
# %% ../../modules/source/03_activations/activations_dev.ipynb 11
|
|
class Sigmoid:
|
|
"""
|
|
Sigmoid Activation Function: f(x) = 1 / (1 + e^(-x))
|
|
|
|
Maps any real number to the range (0, 1).
|
|
Useful for binary classification and probability outputs.
|
|
"""
|
|
|
|
def forward(self, x):
|
|
"""
|
|
Apply Sigmoid activation: f(x) = 1 / (1 + e^(-x))
|
|
|
|
TODO: Implement Sigmoid activation function.
|
|
|
|
STEP-BY-STEP IMPLEMENTATION:
|
|
1. Compute the negative of input: -x.data
|
|
2. Compute the exponential: np.exp(-x.data)
|
|
3. Add 1 to the exponential: 1 + np.exp(-x.data)
|
|
4. Take the reciprocal: 1 / (1 + np.exp(-x.data))
|
|
5. Return as new Tensor
|
|
|
|
EXAMPLE USAGE:
|
|
```python
|
|
sigmoid = Sigmoid()
|
|
input_tensor = Tensor([[-2, -1, 0, 1, 2]])
|
|
output = sigmoid(input_tensor)
|
|
print(output.data) # [[0.119, 0.269, 0.5, 0.731, 0.881]]
|
|
```
|
|
|
|
IMPLEMENTATION HINTS:
|
|
- Use np.exp() for exponential function
|
|
- Formula: 1 / (1 + np.exp(-x.data))
|
|
- Handle potential overflow with np.clip(-x.data, -500, 500)
|
|
- Return Tensor(result)
|
|
|
|
LEARNING CONNECTIONS:
|
|
- This is like torch.nn.Sigmoid() in PyTorch
|
|
- Used in binary classification output layers
|
|
- Key component in LSTM and GRU gating mechanisms
|
|
- Historically important for early neural networks
|
|
"""
|
|
### BEGIN SOLUTION
|
|
# Clip to prevent overflow
|
|
clipped_input = np.clip(-x.data, -500, 500)
|
|
result = 1 / (1 + np.exp(clipped_input))
|
|
return type(x)(result)
|
|
### END SOLUTION
|
|
|
|
def __call__(self, x):
|
|
"""Make the class callable: sigmoid(x) instead of sigmoid.forward(x)"""
|
|
return self.forward(x)
|
|
|
|
# %% ../../modules/source/03_activations/activations_dev.ipynb 15
|
|
class Tanh:
|
|
"""
|
|
Tanh Activation Function: f(x) = (e^x - e^(-x)) / (e^x + e^(-x))
|
|
|
|
Zero-centered activation function with range (-1, 1).
|
|
Better gradient properties than sigmoid.
|
|
"""
|
|
|
|
def forward(self, x: Tensor) -> Tensor:
|
|
"""
|
|
Apply Tanh activation: f(x) = (e^x - e^(-x)) / (e^x + e^(-x))
|
|
|
|
TODO: Implement Tanh activation function.
|
|
|
|
STEP-BY-STEP IMPLEMENTATION:
|
|
1. Use NumPy's built-in tanh function: np.tanh(x.data)
|
|
2. Alternatively, implement manually:
|
|
- Compute e^x and e^(-x)
|
|
- Calculate (e^x - e^(-x)) / (e^x + e^(-x))
|
|
3. Return as new Tensor
|
|
|
|
EXAMPLE USAGE:
|
|
```python
|
|
tanh = Tanh()
|
|
input_tensor = Tensor([[-2, -1, 0, 1, 2]])
|
|
output = tanh(input_tensor)
|
|
print(output.data) # [[-0.964, -0.762, 0, 0.762, 0.964]]
|
|
```
|
|
|
|
IMPLEMENTATION HINTS:
|
|
- Use np.tanh(x.data) for simplicity
|
|
- Manual implementation: (np.exp(x.data) - np.exp(-x.data)) / (np.exp(x.data) + np.exp(-x.data))
|
|
- Handle overflow by clipping inputs: np.clip(x.data, -500, 500)
|
|
- Return Tensor(result)
|
|
|
|
LEARNING CONNECTIONS:
|
|
- This is like torch.nn.Tanh() in PyTorch
|
|
- Used in RNN, LSTM, and GRU cells
|
|
- Better than sigmoid for hidden layers
|
|
- Zero-centered outputs help with gradient flow
|
|
"""
|
|
### BEGIN SOLUTION
|
|
# Use NumPy's built-in tanh function
|
|
result = np.tanh(x.data)
|
|
return type(x)(result)
|
|
### END SOLUTION
|
|
|
|
def __call__(self, x: Tensor) -> Tensor:
|
|
"""Make the class callable: tanh(x) instead of tanh.forward(x)"""
|
|
return self.forward(x)
|
|
|
|
# %% ../../modules/source/03_activations/activations_dev.ipynb 19
|
|
class Softmax:
|
|
"""
|
|
Softmax Activation Function: f(x_i) = e^(x_i) / Σ(e^(x_j))
|
|
|
|
Converts a vector of real numbers into a probability distribution.
|
|
Essential for multi-class classification.
|
|
"""
|
|
|
|
def forward(self, x):
|
|
"""
|
|
Apply Softmax activation: f(x_i) = e^(x_i) / Σ(e^(x_j))
|
|
|
|
TODO: Implement Softmax activation function.
|
|
|
|
STEP-BY-STEP IMPLEMENTATION:
|
|
1. Handle empty input case
|
|
2. Subtract max value for numerical stability: x - max(x)
|
|
3. Compute exponentials: np.exp(x - max(x))
|
|
4. Compute sum of exponentials: np.sum(exp_values)
|
|
5. Divide each exponential by the sum: exp_values / sum
|
|
6. Return as same tensor type as input
|
|
|
|
EXAMPLE USAGE:
|
|
```python
|
|
softmax = Softmax()
|
|
input_tensor = Tensor([[1, 2, 3]])
|
|
output = softmax(input_tensor)
|
|
print(output.data) # [[0.09, 0.24, 0.67]]
|
|
print(np.sum(output.data)) # 1.0
|
|
```
|
|
|
|
IMPLEMENTATION HINTS:
|
|
- Handle empty case: if x.data.size == 0: return type(x)(x.data.copy())
|
|
- Subtract max for numerical stability: x_shifted = x.data - np.max(x.data, axis=-1, keepdims=True)
|
|
- Compute exponentials: exp_values = np.exp(x_shifted)
|
|
- Sum along last axis: sum_exp = np.sum(exp_values, axis=-1, keepdims=True)
|
|
- Divide: result = exp_values / sum_exp
|
|
- Return same type as input: return type(x)(result)
|
|
|
|
LEARNING CONNECTIONS:
|
|
- This is like torch.nn.Softmax() in PyTorch
|
|
- Used in classification output layers
|
|
- Key component in attention mechanisms
|
|
- Enables probability-based decision making
|
|
"""
|
|
### BEGIN SOLUTION
|
|
# Handle empty input
|
|
if x.data.size == 0:
|
|
return type(x)(x.data.copy())
|
|
|
|
# Subtract max for numerical stability
|
|
x_shifted = x.data - np.max(x.data, axis=-1, keepdims=True)
|
|
|
|
# Compute exponentials
|
|
exp_values = np.exp(x_shifted)
|
|
|
|
# Sum along last axis
|
|
sum_exp = np.sum(exp_values, axis=-1, keepdims=True)
|
|
|
|
# Divide to get probabilities
|
|
result = exp_values / sum_exp
|
|
|
|
return type(x)(result)
|
|
### END SOLUTION
|
|
|
|
def __call__(self, x):
|
|
"""Make the class callable: softmax(x) instead of softmax.forward(x)"""
|
|
return self.forward(x)
|