mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-03-10 00:32:00 -05:00
Re-exported all modules after restructuring: - Updated _modidx.py with new module locations - Removed outdated autogeneration headers - Updated all core modules (tensor, autograd, layers, etc.) - Updated optimization modules (quantization, compression, etc.) - Updated TITO commands for new structure Changes include: - 24 tinytorch/ module files - 24 tito/ command and core files - Updated references from modules/source/ to modules/ All modules re-exported via nbdev from their new locations.
237 lines
8.0 KiB
Python
Generated
237 lines
8.0 KiB
Python
Generated
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/04_losses/losses_dev.ipynb.
|
|
|
|
# %% auto 0
|
|
__all__ = ['import_previous_module', 'log_softmax', 'MSELoss', 'CrossEntropyLoss', 'BinaryCrossEntropyLoss']
|
|
|
|
# %% ../../modules/source/04_losses/losses_dev.ipynb 3
|
|
import numpy as np
|
|
from typing import Optional
|
|
|
|
def import_previous_module(module_name: str, component_name: str):
|
|
import sys
|
|
import os
|
|
sys.path.append(os.path.join(os.path.dirname(__file__), '..', module_name))
|
|
module = __import__(f"{module_name.split('_')[1]}_dev")
|
|
return getattr(module, component_name)
|
|
|
|
# Import from tinytorch package
|
|
from .tensor import Tensor
|
|
from .layers import Linear
|
|
from .activations import ReLU
|
|
|
|
# %% ../../modules/source/04_losses/losses_dev.ipynb 8
|
|
def log_softmax(x: Tensor, dim: int = -1) -> Tensor:
|
|
"""
|
|
Compute log-softmax with numerical stability.
|
|
|
|
TODO: Implement numerically stable log-softmax using the log-sum-exp trick
|
|
|
|
APPROACH:
|
|
1. Find maximum along dimension (for stability)
|
|
2. Subtract max from input (prevents overflow)
|
|
3. Compute log(sum(exp(shifted_input)))
|
|
4. Return input - max - log_sum_exp
|
|
|
|
EXAMPLE:
|
|
>>> logits = Tensor([[1.0, 2.0, 3.0], [0.1, 0.2, 0.9]])
|
|
>>> result = log_softmax(logits, dim=-1)
|
|
>>> print(result.shape)
|
|
(2, 3)
|
|
|
|
HINT: Use np.max(x.data, axis=dim, keepdims=True) to preserve dimensions
|
|
"""
|
|
### BEGIN SOLUTION
|
|
# Step 1: Find max along dimension for numerical stability
|
|
max_vals = np.max(x.data, axis=dim, keepdims=True)
|
|
|
|
# Step 2: Subtract max to prevent overflow
|
|
shifted = x.data - max_vals
|
|
|
|
# Step 3: Compute log(sum(exp(shifted)))
|
|
log_sum_exp = np.log(np.sum(np.exp(shifted), axis=dim, keepdims=True))
|
|
|
|
# Step 4: Return log_softmax = input - max - log_sum_exp
|
|
result = x.data - max_vals - log_sum_exp
|
|
|
|
return Tensor(result)
|
|
### END SOLUTION
|
|
|
|
# %% ../../modules/source/04_losses/losses_dev.ipynb 11
|
|
class MSELoss:
|
|
"""Mean Squared Error loss for regression tasks."""
|
|
|
|
def __init__(self):
|
|
"""Initialize MSE loss function."""
|
|
pass
|
|
|
|
def forward(self, predictions: Tensor, targets: Tensor) -> Tensor:
|
|
"""
|
|
Compute mean squared error between predictions and targets.
|
|
|
|
TODO: Implement MSE loss calculation
|
|
|
|
APPROACH:
|
|
1. Compute difference: predictions - targets
|
|
2. Square the differences: diff²
|
|
3. Take mean across all elements
|
|
|
|
EXAMPLE:
|
|
>>> loss_fn = MSELoss()
|
|
>>> predictions = Tensor([1.0, 2.0, 3.0])
|
|
>>> targets = Tensor([1.5, 2.5, 2.8])
|
|
>>> loss = loss_fn(predictions, targets)
|
|
>>> print(f"MSE Loss: {loss.data:.4f}")
|
|
MSE Loss: 0.1467
|
|
|
|
HINTS:
|
|
- Use (predictions.data - targets.data) for element-wise difference
|
|
- Square with **2 or np.power(diff, 2)
|
|
- Use np.mean() to average over all elements
|
|
"""
|
|
### BEGIN SOLUTION
|
|
# Step 1: Compute element-wise difference
|
|
diff = predictions.data - targets.data
|
|
|
|
# Step 2: Square the differences
|
|
squared_diff = diff ** 2
|
|
|
|
# Step 3: Take mean across all elements
|
|
mse = np.mean(squared_diff)
|
|
|
|
return Tensor(mse)
|
|
### END SOLUTION
|
|
|
|
def __call__(self, predictions: Tensor, targets: Tensor) -> Tensor:
|
|
"""Allows the loss function to be called like a function."""
|
|
return self.forward(predictions, targets)
|
|
|
|
def backward(self) -> Tensor:
|
|
"""
|
|
Compute gradients (implemented in Module 05: Autograd).
|
|
|
|
For now, this is a stub that students can ignore.
|
|
"""
|
|
pass
|
|
|
|
# %% ../../modules/source/04_losses/losses_dev.ipynb 14
|
|
class CrossEntropyLoss:
|
|
"""Cross-entropy loss for multi-class classification."""
|
|
|
|
def __init__(self):
|
|
"""Initialize cross-entropy loss function."""
|
|
pass
|
|
|
|
def forward(self, logits: Tensor, targets: Tensor) -> Tensor:
|
|
"""
|
|
Compute cross-entropy loss between logits and target class indices.
|
|
|
|
TODO: Implement cross-entropy loss with numerical stability
|
|
|
|
APPROACH:
|
|
1. Compute log-softmax of logits (numerically stable)
|
|
2. Select log-probabilities for correct classes
|
|
3. Return negative mean of selected log-probabilities
|
|
|
|
EXAMPLE:
|
|
>>> loss_fn = CrossEntropyLoss()
|
|
>>> logits = Tensor([[2.0, 1.0, 0.1], [0.5, 1.5, 0.8]]) # 2 samples, 3 classes
|
|
>>> targets = Tensor([0, 1]) # First sample is class 0, second is class 1
|
|
>>> loss = loss_fn(logits, targets)
|
|
>>> print(f"Cross-Entropy Loss: {loss.data:.4f}")
|
|
|
|
HINTS:
|
|
- Use log_softmax() for numerical stability
|
|
- targets.data.astype(int) ensures integer indices
|
|
- Use np.arange(batch_size) for row indexing: log_probs[np.arange(batch_size), targets]
|
|
- Return negative mean: -np.mean(selected_log_probs)
|
|
"""
|
|
### BEGIN SOLUTION
|
|
# Step 1: Compute log-softmax for numerical stability
|
|
log_probs = log_softmax(logits, dim=-1)
|
|
|
|
# Step 2: Select log-probabilities for correct classes
|
|
batch_size = logits.shape[0]
|
|
target_indices = targets.data.astype(int)
|
|
|
|
# Select correct class log-probabilities using advanced indexing
|
|
selected_log_probs = log_probs.data[np.arange(batch_size), target_indices]
|
|
|
|
# Step 3: Return negative mean (cross-entropy is negative log-likelihood)
|
|
cross_entropy = -np.mean(selected_log_probs)
|
|
|
|
return Tensor(cross_entropy)
|
|
### END SOLUTION
|
|
|
|
def __call__(self, logits: Tensor, targets: Tensor) -> Tensor:
|
|
"""Allows the loss function to be called like a function."""
|
|
return self.forward(logits, targets)
|
|
|
|
def backward(self) -> Tensor:
|
|
"""
|
|
Compute gradients (implemented in Module 05: Autograd).
|
|
|
|
For now, this is a stub that students can ignore.
|
|
"""
|
|
pass
|
|
|
|
# %% ../../modules/source/04_losses/losses_dev.ipynb 17
|
|
class BinaryCrossEntropyLoss:
|
|
"""Binary cross-entropy loss for binary classification."""
|
|
|
|
def __init__(self):
|
|
"""Initialize binary cross-entropy loss function."""
|
|
pass
|
|
|
|
def forward(self, predictions: Tensor, targets: Tensor) -> Tensor:
|
|
"""
|
|
Compute binary cross-entropy loss.
|
|
|
|
TODO: Implement binary cross-entropy with numerical stability
|
|
|
|
APPROACH:
|
|
1. Clamp predictions to avoid log(0) and log(1)
|
|
2. Compute: -(targets * log(predictions) + (1-targets) * log(1-predictions))
|
|
3. Return mean across all samples
|
|
|
|
EXAMPLE:
|
|
>>> loss_fn = BinaryCrossEntropyLoss()
|
|
>>> predictions = Tensor([0.9, 0.1, 0.7, 0.3]) # Probabilities between 0 and 1
|
|
>>> targets = Tensor([1.0, 0.0, 1.0, 0.0]) # Binary labels
|
|
>>> loss = loss_fn(predictions, targets)
|
|
>>> print(f"Binary Cross-Entropy Loss: {loss.data:.4f}")
|
|
|
|
HINTS:
|
|
- Use np.clip(predictions.data, 1e-7, 1-1e-7) to prevent log(0)
|
|
- Binary cross-entropy: -(targets * log(preds) + (1-targets) * log(1-preds))
|
|
- Use np.mean() to average over all samples
|
|
"""
|
|
### BEGIN SOLUTION
|
|
# Step 1: Clamp predictions to avoid numerical issues with log(0) and log(1)
|
|
eps = 1e-7
|
|
clamped_preds = np.clip(predictions.data, eps, 1 - eps)
|
|
|
|
# Step 2: Compute binary cross-entropy
|
|
# BCE = -(targets * log(preds) + (1-targets) * log(1-preds))
|
|
log_preds = np.log(clamped_preds)
|
|
log_one_minus_preds = np.log(1 - clamped_preds)
|
|
|
|
bce_per_sample = -(targets.data * log_preds + (1 - targets.data) * log_one_minus_preds)
|
|
|
|
# Step 3: Return mean across all samples
|
|
bce_loss = np.mean(bce_per_sample)
|
|
|
|
return Tensor(bce_loss)
|
|
### END SOLUTION
|
|
|
|
def __call__(self, predictions: Tensor, targets: Tensor) -> Tensor:
|
|
"""Allows the loss function to be called like a function."""
|
|
return self.forward(predictions, targets)
|
|
|
|
def backward(self) -> Tensor:
|
|
"""
|
|
Compute gradients (implemented in Module 05: Autograd).
|
|
|
|
For now, this is a stub that students can ignore.
|
|
"""
|
|
pass
|