mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-05-22 20:46:14 -05:00
Major directory restructure to support both developer and learner workflows: Structure Changes: - NEW: src/ directory for Python source files (version controlled) - Files renamed: tensor.py → 01_tensor.py (matches directory naming) - All 20 modules moved from modules/ to src/ - CHANGED: modules/ now holds generated notebooks (gitignored) - Generated from src/*.py using jupytext - Learners work in notebooks, developers work in Python source - UNCHANGED: tinytorch/ package (still auto-generated from notebooks) Workflow: src/*.py → modules/*.ipynb → tinytorch/*.py Command Updates: - Updated export command to read from src/ and generate to modules/ - Export flow: discovers modules in src/, converts to notebooks in modules/, exports to tinytorch/ - All 20 modules tested and working Configuration: - Updated .gitignore to ignore modules/ directory - Updated README.md with new three-layer architecture explanation - Updated export.py source mappings and paths Benefits: - Clean separation: developers edit Python, learners use notebooks - Better version control: only Python source committed, notebooks generated - Flexible learning: can work in notebooks OR Python source - Maintains backward compatibility: tinytorch package unchanged Tested: - Single module export: tito export 01_tensor ✅ - All modules export: tito export --all ✅ - Package imports: from tinytorch.core.tensor import Tensor ✅ - 20/20 modules successfully converted and exported
247 lines
9.5 KiB
Python
Generated
247 lines
9.5 KiB
Python
Generated
# ╔═══════════════════════════════════════════════════════════════════════════════╗
|
|
# ║ 🚨 CRITICAL WARNING 🚨 ║
|
|
# ║ AUTOGENERATED! DO NOT EDIT! ║
|
|
# ║ ║
|
|
# ║ This file is AUTOMATICALLY GENERATED from source modules. ║
|
|
# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║
|
|
# ║ ║
|
|
# ║ ✅ TO EDIT: src/XX_losses/XX_losses.py ║
|
|
# ║ ✅ TO EXPORT: Run 'tito module complete <module_name>' ║
|
|
# ║ ║
|
|
# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║
|
|
# ║ Editing it directly may break module functionality and training. ║
|
|
# ║ ║
|
|
# ║ 🎓 LEARNING TIP: Work in src/ (developers) or modules/ (learners) ║
|
|
# ║ The tinytorch/ directory is generated code - edit source files instead! ║
|
|
# ╚═══════════════════════════════════════════════════════════════════════════════╝
|
|
# %% auto 0
|
|
__all__ = ['EPSILON', 'log_softmax', 'MSELoss', 'CrossEntropyLoss', 'BinaryCrossEntropyLoss']
|
|
|
|
# %% ../../modules/04_losses/04_losses.ipynb 3
|
|
import numpy as np
|
|
from typing import Optional
|
|
|
|
# Import from TinyTorch package (previous modules must be completed and exported)
|
|
from .tensor import Tensor
|
|
from .activations import ReLU
|
|
from .layers import Linear
|
|
|
|
# Constants for numerical stability
|
|
EPSILON = 1e-7 # Small value to prevent log(0) and numerical instability
|
|
|
|
# %% ../../modules/04_losses/04_losses.ipynb 8
|
|
def log_softmax(x: Tensor, dim: int = -1) -> Tensor:
|
|
"""
|
|
Compute log-softmax with numerical stability.
|
|
|
|
TODO: Implement numerically stable log-softmax using the log-sum-exp trick
|
|
|
|
APPROACH:
|
|
1. Find maximum along dimension (for stability)
|
|
2. Subtract max from input (prevents overflow)
|
|
3. Compute log(sum(exp(shifted_input)))
|
|
4. Return input - max - log_sum_exp
|
|
|
|
EXAMPLE:
|
|
>>> logits = Tensor([[1.0, 2.0, 3.0], [0.1, 0.2, 0.9]])
|
|
>>> result = log_softmax(logits, dim=-1)
|
|
>>> print(result.shape)
|
|
(2, 3)
|
|
|
|
HINT: Use np.max(x.data, axis=dim, keepdims=True) to preserve dimensions
|
|
"""
|
|
### BEGIN SOLUTION
|
|
# Step 1: Find max along dimension for numerical stability
|
|
max_vals = np.max(x.data, axis=dim, keepdims=True)
|
|
|
|
# Step 2: Subtract max to prevent overflow
|
|
shifted = x.data - max_vals
|
|
|
|
# Step 3: Compute log(sum(exp(shifted)))
|
|
log_sum_exp = np.log(np.sum(np.exp(shifted), axis=dim, keepdims=True))
|
|
|
|
# Step 4: Return log_softmax = input - max - log_sum_exp
|
|
result = x.data - max_vals - log_sum_exp
|
|
|
|
return Tensor(result)
|
|
### END SOLUTION
|
|
|
|
# %% ../../modules/04_losses/04_losses.ipynb 11
|
|
class MSELoss:
|
|
"""Mean Squared Error loss for regression tasks."""
|
|
|
|
def __init__(self):
|
|
"""Initialize MSE loss function."""
|
|
pass
|
|
|
|
def forward(self, predictions: Tensor, targets: Tensor) -> Tensor:
|
|
"""
|
|
Compute mean squared error between predictions and targets.
|
|
|
|
TODO: Implement MSE loss calculation
|
|
|
|
APPROACH:
|
|
1. Compute difference: predictions - targets
|
|
2. Square the differences: diff²
|
|
3. Take mean across all elements
|
|
|
|
EXAMPLE:
|
|
>>> loss_fn = MSELoss()
|
|
>>> predictions = Tensor([1.0, 2.0, 3.0])
|
|
>>> targets = Tensor([1.5, 2.5, 2.8])
|
|
>>> loss = loss_fn(predictions, targets)
|
|
>>> print(f"MSE Loss: {loss.data:.4f}")
|
|
MSE Loss: 0.1467
|
|
|
|
HINTS:
|
|
- Use (predictions.data - targets.data) for element-wise difference
|
|
- Square with **2 or np.power(diff, 2)
|
|
- Use np.mean() to average over all elements
|
|
"""
|
|
### BEGIN SOLUTION
|
|
# Step 1: Compute element-wise difference
|
|
diff = predictions.data - targets.data
|
|
|
|
# Step 2: Square the differences
|
|
squared_diff = diff ** 2
|
|
|
|
# Step 3: Take mean across all elements
|
|
mse = np.mean(squared_diff)
|
|
|
|
return Tensor(mse)
|
|
### END SOLUTION
|
|
|
|
def __call__(self, predictions: Tensor, targets: Tensor) -> Tensor:
|
|
"""Allows the loss function to be called like a function."""
|
|
return self.forward(predictions, targets)
|
|
|
|
def backward(self) -> Tensor:
|
|
"""
|
|
Compute gradients (implemented in Module 05: Autograd).
|
|
|
|
For now, this is a stub that students can ignore.
|
|
"""
|
|
pass
|
|
|
|
# %% ../../modules/04_losses/04_losses.ipynb 14
|
|
class CrossEntropyLoss:
|
|
"""Cross-entropy loss for multi-class classification."""
|
|
|
|
def __init__(self):
|
|
"""Initialize cross-entropy loss function."""
|
|
pass
|
|
|
|
def forward(self, logits: Tensor, targets: Tensor) -> Tensor:
|
|
"""
|
|
Compute cross-entropy loss between logits and target class indices.
|
|
|
|
TODO: Implement cross-entropy loss with numerical stability
|
|
|
|
APPROACH:
|
|
1. Compute log-softmax of logits (numerically stable)
|
|
2. Select log-probabilities for correct classes
|
|
3. Return negative mean of selected log-probabilities
|
|
|
|
EXAMPLE:
|
|
>>> loss_fn = CrossEntropyLoss()
|
|
>>> logits = Tensor([[2.0, 1.0, 0.1], [0.5, 1.5, 0.8]]) # 2 samples, 3 classes
|
|
>>> targets = Tensor([0, 1]) # First sample is class 0, second is class 1
|
|
>>> loss = loss_fn(logits, targets)
|
|
>>> print(f"Cross-Entropy Loss: {loss.data:.4f}")
|
|
|
|
HINTS:
|
|
- Use log_softmax() for numerical stability
|
|
- targets.data.astype(int) ensures integer indices
|
|
- Use np.arange(batch_size) for row indexing: log_probs[np.arange(batch_size), targets]
|
|
- Return negative mean: -np.mean(selected_log_probs)
|
|
"""
|
|
### BEGIN SOLUTION
|
|
# Step 1: Compute log-softmax for numerical stability
|
|
log_probs = log_softmax(logits, dim=-1)
|
|
|
|
# Step 2: Select log-probabilities for correct classes
|
|
batch_size = logits.shape[0]
|
|
target_indices = targets.data.astype(int)
|
|
|
|
# Select correct class log-probabilities using advanced indexing
|
|
selected_log_probs = log_probs.data[np.arange(batch_size), target_indices]
|
|
|
|
# Step 3: Return negative mean (cross-entropy is negative log-likelihood)
|
|
cross_entropy = -np.mean(selected_log_probs)
|
|
|
|
return Tensor(cross_entropy)
|
|
### END SOLUTION
|
|
|
|
def __call__(self, logits: Tensor, targets: Tensor) -> Tensor:
|
|
"""Allows the loss function to be called like a function."""
|
|
return self.forward(logits, targets)
|
|
|
|
def backward(self) -> Tensor:
|
|
"""
|
|
Compute gradients (implemented in Module 05: Autograd).
|
|
|
|
For now, this is a stub that students can ignore.
|
|
"""
|
|
pass
|
|
|
|
# %% ../../modules/04_losses/04_losses.ipynb 17
|
|
class BinaryCrossEntropyLoss:
|
|
"""Binary cross-entropy loss for binary classification."""
|
|
|
|
def __init__(self):
|
|
"""Initialize binary cross-entropy loss function."""
|
|
pass
|
|
|
|
def forward(self, predictions: Tensor, targets: Tensor) -> Tensor:
|
|
"""
|
|
Compute binary cross-entropy loss.
|
|
|
|
TODO: Implement binary cross-entropy with numerical stability
|
|
|
|
APPROACH:
|
|
1. Clamp predictions to avoid log(0) and log(1)
|
|
2. Compute: -(targets * log(predictions) + (1-targets) * log(1-predictions))
|
|
3. Return mean across all samples
|
|
|
|
EXAMPLE:
|
|
>>> loss_fn = BinaryCrossEntropyLoss()
|
|
>>> predictions = Tensor([0.9, 0.1, 0.7, 0.3]) # Probabilities between 0 and 1
|
|
>>> targets = Tensor([1.0, 0.0, 1.0, 0.0]) # Binary labels
|
|
>>> loss = loss_fn(predictions, targets)
|
|
>>> print(f"Binary Cross-Entropy Loss: {loss.data:.4f}")
|
|
|
|
HINTS:
|
|
- Use np.clip(predictions.data, 1e-7, 1-1e-7) to prevent log(0)
|
|
- Binary cross-entropy: -(targets * log(preds) + (1-targets) * log(1-preds))
|
|
- Use np.mean() to average over all samples
|
|
"""
|
|
### BEGIN SOLUTION
|
|
# Step 1: Clamp predictions to avoid numerical issues with log(0) and log(1)
|
|
eps = EPSILON
|
|
clamped_preds = np.clip(predictions.data, eps, 1 - eps)
|
|
|
|
# Step 2: Compute binary cross-entropy
|
|
# BCE = -(targets * log(preds) + (1-targets) * log(1-preds))
|
|
log_preds = np.log(clamped_preds)
|
|
log_one_minus_preds = np.log(1 - clamped_preds)
|
|
|
|
bce_per_sample = -(targets.data * log_preds + (1 - targets.data) * log_one_minus_preds)
|
|
|
|
# Step 3: Return mean across all samples
|
|
bce_loss = np.mean(bce_per_sample)
|
|
|
|
return Tensor(bce_loss)
|
|
### END SOLUTION
|
|
|
|
def __call__(self, predictions: Tensor, targets: Tensor) -> Tensor:
|
|
"""Allows the loss function to be called like a function."""
|
|
return self.forward(predictions, targets)
|
|
|
|
def backward(self) -> Tensor:
|
|
"""
|
|
Compute gradients (implemented in Module 05: Autograd).
|
|
|
|
For now, this is a stub that students can ignore.
|
|
"""
|
|
pass
|