# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/09_training/training_dev.ipynb. # %% auto 0 __all__ = ['setup_import_paths', 'MeanSquaredError', 'CrossEntropyLoss', 'BinaryCrossEntropyLoss', 'Accuracy', 'Trainer'] # %% ../../modules/source/09_training/training_dev.ipynb 1 import numpy as np import sys import os import pickle import json from pathlib import Path from typing import List, Dict, Any, Optional, Union, Callable, Tuple from collections import defaultdict import time # Helper function to set up import paths def setup_import_paths(): """Set up import paths for development modules.""" import sys import os # Add module directories to path base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) module_dirs = [ '01_tensor', '02_activations', '03_layers', '04_networks', '05_cnn', '06_dataloader', '07_autograd', '08_optimizers' ] for module_dir in module_dirs: sys.path.append(os.path.join(base_dir, module_dir)) # Set up paths setup_import_paths() # Import all the building blocks we need try: from tinytorch.core.tensor import Tensor from tinytorch.core.activations import ReLU, Sigmoid, Tanh, Softmax from tinytorch.core.layers import Dense from tinytorch.core.networks import Sequential, create_mlp from tinytorch.core.cnn import Conv2D, flatten from tinytorch.core.dataloader import Dataset, DataLoader from tinytorch.core.autograd import Variable from tinytorch.core.optimizers import SGD, Adam, StepLR except ImportError: # For development, create mock classes or import from local modules try: from tensor_dev import Tensor from activations_dev import ReLU, Sigmoid, Tanh, Softmax from layers_dev import Dense from networks_dev import Sequential, create_mlp from cnn_dev import Conv2D, flatten from dataloader_dev import Dataset, DataLoader from autograd_dev import Variable from optimizers_dev import SGD, Adam, StepLR except ImportError: # Create minimal mock classes for development class Tensor: def __init__(self, data): self.data = np.array(data) def __str__(self): return f"Tensor({self.data})" class Variable: def __init__(self, data, requires_grad=True): self.data = Tensor(data) self.requires_grad = requires_grad self.grad = None def zero_grad(self): self.grad = None def backward(self): if self.requires_grad: self.grad = Variable(1.0, requires_grad=False) def __str__(self): return f"Variable({self.data})" class SGD: def __init__(self, parameters, learning_rate=0.01): self.parameters = parameters self.learning_rate = learning_rate def zero_grad(self): for param in self.parameters: if hasattr(param, 'zero_grad'): param.zero_grad() def step(self): pass class Sequential: def __init__(self, layers=None): self.layers = layers or [] def __call__(self, x): for layer in self.layers: x = layer(x) return x class DataLoader: def __init__(self, dataset, batch_size=32, shuffle=True): self.dataset = dataset self.batch_size = batch_size self.shuffle = shuffle def __iter__(self): return iter([(Tensor([1, 2, 3]), Tensor([0]))]) # %% ../../modules/source/09_training/training_dev.ipynb 4 class MeanSquaredError: """ Mean Squared Error Loss for Regression Measures the average squared difference between predictions and targets. MSE = (1/n) * Σ(y_pred - y_true)² """ def __init__(self): """Initialize MSE loss function.""" pass def __call__(self, y_pred: Tensor, y_true: Tensor) -> Tensor: """ Compute MSE loss between predictions and targets. Args: y_pred: Model predictions (shape: [batch_size, ...]) y_true: True targets (shape: [batch_size, ...]) Returns: Scalar loss value TODO: Implement Mean Squared Error loss computation. APPROACH: 1. Compute difference: diff = y_pred - y_true 2. Square the differences: squared_diff = diff² 3. Take mean over all elements: mean(squared_diff) 4. Return as scalar Tensor EXAMPLE: y_pred = Tensor([[1.0, 2.0], [3.0, 4.0]]) y_true = Tensor([[1.5, 2.5], [2.5, 3.5]]) loss = mse_loss(y_pred, y_true) # Should return: mean([(1.0-1.5)², (2.0-2.5)², (3.0-2.5)², (4.0-3.5)²]) # = mean([0.25, 0.25, 0.25, 0.25]) = 0.25 HINTS: - Use tensor subtraction: y_pred - y_true - Use element-wise multiplication for squaring: diff * diff - Use np.mean() to get the average - Return Tensor(scalar_value) """ ### BEGIN SOLUTION # Compute difference diff = y_pred - y_true # Square the differences squared_diff = diff * diff # Take mean over all elements mean_loss = np.mean(squared_diff.data) return Tensor(mean_loss) ### END SOLUTION def forward(self, y_pred: Tensor, y_true: Tensor) -> Tensor: """Alternative interface for forward pass.""" return self.__call__(y_pred, y_true) # %% ../../modules/source/09_training/training_dev.ipynb 7 class CrossEntropyLoss: """ Cross-Entropy Loss for Multi-Class Classification Measures the difference between predicted probability distribution and true labels. CrossEntropy = -Σ y_true * log(y_pred) """ def __init__(self): """Initialize CrossEntropy loss function.""" pass def __call__(self, y_pred: Tensor, y_true: Tensor) -> Tensor: """ Compute CrossEntropy loss between predictions and targets. Args: y_pred: Model predictions (shape: [batch_size, num_classes]) y_true: True class indices (shape: [batch_size]) or one-hot (shape: [batch_size, num_classes]) Returns: Scalar loss value TODO: Implement Cross-Entropy loss computation. APPROACH: 1. Handle both class indices and one-hot encoded labels 2. Apply softmax to predictions for probability distribution 3. Compute log probabilities: log(softmax(y_pred)) 4. Calculate cross-entropy: -mean(y_true * log_probs) 5. Return scalar loss EXAMPLE: y_pred = Tensor([[2.0, 1.0, 0.1], [0.5, 2.1, 0.9]]) # Raw logits y_true = Tensor([0, 1]) # Class indices loss = crossentropy_loss(y_pred, y_true) # Should apply softmax then compute -log(prob_of_correct_class) HINTS: - Use softmax: exp(x) / sum(exp(x)) for probability distribution - Add small epsilon (1e-15) to avoid log(0) - Handle both class indices and one-hot encoding - Use np.log for logarithm computation """ ### BEGIN SOLUTION # Handle both 1D and 2D prediction arrays if y_pred.data.ndim == 1: # Reshape 1D to 2D for consistency (single sample) y_pred_2d = y_pred.data.reshape(1, -1) else: y_pred_2d = y_pred.data # Apply softmax to get probability distribution exp_pred = np.exp(y_pred_2d - np.max(y_pred_2d, axis=1, keepdims=True)) softmax_pred = exp_pred / np.sum(exp_pred, axis=1, keepdims=True) # Add small epsilon to avoid log(0) epsilon = 1e-15 softmax_pred = np.clip(softmax_pred, epsilon, 1.0 - epsilon) # Handle class indices vs one-hot encoding if len(y_true.data.shape) == 1: # y_true contains class indices batch_size = y_true.data.shape[0] log_probs = np.log(softmax_pred[np.arange(batch_size), y_true.data.astype(int)]) loss = -np.mean(log_probs) else: # y_true is one-hot encoded log_probs = np.log(softmax_pred) loss = -np.mean(np.sum(y_true.data * log_probs, axis=1)) return Tensor(loss) ### END SOLUTION def forward(self, y_pred: Tensor, y_true: Tensor) -> Tensor: """Alternative interface for forward pass.""" return self.__call__(y_pred, y_true) # %% ../../modules/source/09_training/training_dev.ipynb 10 class BinaryCrossEntropyLoss: """ Binary Cross-Entropy Loss for Binary Classification Measures the difference between predicted probabilities and binary labels. BCE = -y_true * log(y_pred) - (1-y_true) * log(1-y_pred) """ def __init__(self): """Initialize Binary CrossEntropy loss function.""" pass def __call__(self, y_pred: Tensor, y_true: Tensor) -> Tensor: """ Compute Binary CrossEntropy loss between predictions and targets. Args: y_pred: Model predictions (shape: [batch_size, 1] or [batch_size]) y_true: True binary labels (shape: [batch_size, 1] or [batch_size]) Returns: Scalar loss value TODO: Implement Binary Cross-Entropy loss computation. APPROACH: 1. Apply sigmoid to predictions for probability values 2. Clip probabilities to avoid log(0) and log(1) 3. Compute: -y_true * log(y_pred) - (1-y_true) * log(1-y_pred) 4. Take mean over batch 5. Return scalar loss EXAMPLE: y_pred = Tensor([[2.0], [0.0], [-1.0]]) # Raw logits y_true = Tensor([[1.0], [1.0], [0.0]]) # Binary labels loss = bce_loss(y_pred, y_true) # Should apply sigmoid then compute binary cross-entropy HINTS: - Use sigmoid: 1 / (1 + exp(-x)) - Clip probabilities: np.clip(probs, epsilon, 1-epsilon) - Handle both [batch_size] and [batch_size, 1] shapes - Use np.log for logarithm computation """ ### BEGIN SOLUTION # Use numerically stable implementation directly from logits # This avoids computing sigmoid and log separately logits = y_pred.data.flatten() labels = y_true.data.flatten() # Numerically stable binary cross-entropy from logits # Uses the identity: log(1 + exp(x)) = max(x, 0) + log(1 + exp(-abs(x))) def stable_bce_with_logits(logits, labels): # For each sample: -[y*log(sigmoid(x)) + (1-y)*log(1-sigmoid(x))] # Which equals: -[y*log_sigmoid(x) + (1-y)*log_sigmoid(-x)] # Where log_sigmoid(x) = x - log(1 + exp(x)) = x - softplus(x) # Compute log(sigmoid(x)) = x - log(1 + exp(x)) # Use numerical stability: log(1 + exp(x)) = max(0, x) + log(1 + exp(-abs(x))) def log_sigmoid(x): return x - np.maximum(0, x) - np.log(1 + np.exp(-np.abs(x))) # Compute log(1 - sigmoid(x)) = -x - log(1 + exp(-x)) def log_one_minus_sigmoid(x): return -x - np.maximum(0, -x) - np.log(1 + np.exp(-np.abs(x))) # Binary cross-entropy: -[y*log_sigmoid(x) + (1-y)*log_sigmoid(-x)] loss = -(labels * log_sigmoid(logits) + (1 - labels) * log_one_minus_sigmoid(logits)) return loss # Compute loss for each sample losses = stable_bce_with_logits(logits, labels) # Take mean over batch mean_loss = np.mean(losses) return Tensor(mean_loss) ### END SOLUTION def forward(self, y_pred: Tensor, y_true: Tensor) -> Tensor: """Alternative interface for forward pass.""" return self.__call__(y_pred, y_true) # %% ../../modules/source/09_training/training_dev.ipynb 14 class Accuracy: """ Accuracy Metric for Classification Computes the fraction of correct predictions. Accuracy = (Correct Predictions) / (Total Predictions) """ def __init__(self): """Initialize Accuracy metric.""" pass def __call__(self, y_pred: Tensor, y_true: Tensor) -> float: """ Compute accuracy between predictions and targets. Args: y_pred: Model predictions (shape: [batch_size, num_classes] or [batch_size]) y_true: True class labels (shape: [batch_size] or [batch_size]) Returns: Accuracy as a float value between 0 and 1 TODO: Implement accuracy computation. APPROACH: 1. Convert predictions to class indices (argmax for multi-class) 2. Convert true labels to class indices if needed 3. Count correct predictions 4. Divide by total predictions 5. Return as float EXAMPLE: y_pred = Tensor([[0.9, 0.1], [0.2, 0.8], [0.6, 0.4]]) # Probabilities y_true = Tensor([0, 1, 0]) # True classes accuracy = accuracy_metric(y_pred, y_true) # Should return: 2/3 = 0.667 (first and second predictions correct) HINTS: - Use np.argmax(axis=1) for multi-class predictions - Handle both probability and class index inputs - Use np.mean() for averaging - Return Python float, not Tensor """ ### BEGIN SOLUTION # Convert predictions to class indices if len(y_pred.data.shape) > 1 and y_pred.data.shape[1] > 1: # Multi-class: use argmax pred_classes = np.argmax(y_pred.data, axis=1) else: # Binary classification: threshold at 0.5 pred_classes = (y_pred.data.flatten() > 0.5).astype(int) # Convert true labels to class indices if needed if len(y_true.data.shape) > 1 and y_true.data.shape[1] > 1: # One-hot encoded true_classes = np.argmax(y_true.data, axis=1) else: # Already class indices true_classes = y_true.data.flatten().astype(int) # Compute accuracy correct = np.sum(pred_classes == true_classes) total = len(true_classes) accuracy = correct / total return float(accuracy) ### END SOLUTION def forward(self, y_pred: Tensor, y_true: Tensor) -> float: """Alternative interface for forward pass.""" return self.__call__(y_pred, y_true) # %% ../../modules/source/09_training/training_dev.ipynb 18 class Trainer: """ Training Loop Orchestrator Coordinates model training with loss functions, optimizers, and metrics. """ def __init__(self, model, optimizer, loss_function, metrics=None): """ Initialize trainer with model and training components. Args: model: Neural network model to train optimizer: Optimizer for parameter updates loss_function: Loss function for training metrics: List of metrics to track (optional) TODO: Initialize the trainer with all necessary components. APPROACH: 1. Store model, optimizer, loss function, and metrics 2. Initialize history tracking for losses and metrics 3. Set up training state (epoch, step counters) 4. Prepare for training and validation loops EXAMPLE: model = Sequential([Dense(10, 5), ReLU(), Dense(5, 2)]) optimizer = Adam(model.parameters, learning_rate=0.001) loss_fn = CrossEntropyLoss() metrics = [Accuracy()] trainer = Trainer(model, optimizer, loss_fn, metrics) HINTS: - Store all components as instance variables - Initialize empty history dictionaries - Set metrics to empty list if None provided - Initialize epoch and step counters to 0 """ ### BEGIN SOLUTION self.model = model self.optimizer = optimizer self.loss_function = loss_function self.metrics = metrics or [] # Training history self.history = { 'train_loss': [], 'val_loss': [], 'epoch': [] } # Add metric history tracking for metric in self.metrics: metric_name = metric.__class__.__name__.lower() self.history[f'train_{metric_name}'] = [] self.history[f'val_{metric_name}'] = [] # Training state self.current_epoch = 0 self.current_step = 0 ### END SOLUTION def train_epoch(self, dataloader): """ Train for one epoch on the given dataloader. Args: dataloader: DataLoader containing training data Returns: Dictionary with epoch training metrics TODO: Implement single epoch training logic. APPROACH: 1. Initialize epoch metrics tracking 2. Iterate through batches in dataloader 3. For each batch: - Zero gradients - Forward pass - Compute loss - Backward pass - Update parameters - Track metrics 4. Return averaged metrics for the epoch HINTS: - Use optimizer.zero_grad() before each batch - Call loss.backward() for gradient computation - Use optimizer.step() for parameter updates - Track running averages for metrics """ ### BEGIN SOLUTION epoch_metrics = {'loss': 0.0} # Initialize metric tracking for metric in self.metrics: metric_name = metric.__class__.__name__.lower() epoch_metrics[metric_name] = 0.0 batch_count = 0 for batch_x, batch_y in dataloader: # Zero gradients self.optimizer.zero_grad() # Forward pass predictions = self.model(batch_x) # Compute loss loss = self.loss_function(predictions, batch_y) # Backward pass (simplified - in real implementation would use autograd) # loss.backward() # Update parameters self.optimizer.step() # Track metrics epoch_metrics['loss'] += loss.data for metric in self.metrics: metric_name = metric.__class__.__name__.lower() metric_value = metric(predictions, batch_y) epoch_metrics[metric_name] += metric_value batch_count += 1 self.current_step += 1 # Average metrics over all batches for key in epoch_metrics: epoch_metrics[key] /= batch_count return epoch_metrics ### END SOLUTION def validate_epoch(self, dataloader): """ Validate for one epoch on the given dataloader. Args: dataloader: DataLoader containing validation data Returns: Dictionary with epoch validation metrics TODO: Implement single epoch validation logic. APPROACH: 1. Initialize epoch metrics tracking 2. Iterate through batches in dataloader 3. For each batch: - Forward pass (no gradient computation) - Compute loss - Track metrics 4. Return averaged metrics for the epoch HINTS: - No gradient computation needed for validation - No parameter updates during validation - Similar to train_epoch but simpler """ ### BEGIN SOLUTION epoch_metrics = {'loss': 0.0} # Initialize metric tracking for metric in self.metrics: metric_name = metric.__class__.__name__.lower() epoch_metrics[metric_name] = 0.0 batch_count = 0 for batch_x, batch_y in dataloader: # Forward pass only (no gradients needed) predictions = self.model(batch_x) # Compute loss loss = self.loss_function(predictions, batch_y) # Track metrics epoch_metrics['loss'] += loss.data for metric in self.metrics: metric_name = metric.__class__.__name__.lower() metric_value = metric(predictions, batch_y) epoch_metrics[metric_name] += metric_value batch_count += 1 # Average metrics over all batches for key in epoch_metrics: epoch_metrics[key] /= batch_count return epoch_metrics ### END SOLUTION def fit(self, train_dataloader, val_dataloader=None, epochs=10, verbose=True): """ Train the model for specified number of epochs. Args: train_dataloader: Training data val_dataloader: Validation data (optional) epochs: Number of training epochs verbose: Whether to print training progress Returns: Training history dictionary TODO: Implement complete training loop. APPROACH: 1. Loop through epochs 2. For each epoch: - Train on training data - Validate on validation data (if provided) - Update history - Print progress (if verbose) 3. Return complete training history HINTS: - Use train_epoch() and validate_epoch() methods - Update self.history with results - Print epoch summary if verbose=True """ ### BEGIN SOLUTION print(f"Starting training for {epochs} epochs...") for epoch in range(epochs): self.current_epoch = epoch # Training phase train_metrics = self.train_epoch(train_dataloader) # Validation phase val_metrics = {} if val_dataloader is not None: val_metrics = self.validate_epoch(val_dataloader) # Update history self.history['epoch'].append(epoch) self.history['train_loss'].append(train_metrics['loss']) if val_dataloader is not None: self.history['val_loss'].append(val_metrics['loss']) # Update metric history for metric in self.metrics: metric_name = metric.__class__.__name__.lower() self.history[f'train_{metric_name}'].append(train_metrics[metric_name]) if val_dataloader is not None: self.history[f'val_{metric_name}'].append(val_metrics[metric_name]) # Print progress if verbose: train_loss = train_metrics['loss'] print(f"Epoch {epoch+1}/{epochs} - train_loss: {train_loss:.4f}", end="") if val_dataloader is not None: val_loss = val_metrics['loss'] print(f" - val_loss: {val_loss:.4f}", end="") for metric in self.metrics: metric_name = metric.__class__.__name__.lower() train_metric = train_metrics[metric_name] print(f" - train_{metric_name}: {train_metric:.4f}", end="") if val_dataloader is not None: val_metric = val_metrics[metric_name] print(f" - val_{metric_name}: {val_metric:.4f}", end="") print() # New line print("Training completed!") return self.history ### END SOLUTION