TinyTorch/tinytorch/core/losses.py

# ╔═══════════════════════════════════════════════════════════════════════════════╗
# ║                        🚨 CRITICAL WARNING 🚨                                ║
# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
# ║                                                                               ║
# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
# ║                                                                               ║
# ║  ✅ TO EDIT: src/XX_losses/XX_losses.py                             ║
# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
# ║                                                                               ║
# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
# ║     Editing it directly may break module functionality and training.         ║
# ║                                                                               ║
# ║  🎓 LEARNING TIP: Work in src/ (developers) or modules/ (learners)    ║
# ║     The tinytorch/ directory is generated code - edit source files instead!  ║
# ╚═══════════════════════════════════════════════════════════════════════════════╝
# %% auto 0
__all__ = ['EPSILON', 'log_softmax', 'MSELoss', 'CrossEntropyLoss', 'BinaryCrossEntropyLoss']

# %% ../../modules/04_losses/04_losses.ipynb 3
import numpy as np
from typing import Optional

# Import from TinyTorch package (previous modules must be completed and exported)
from .tensor import Tensor
from .activations import ReLU
from .layers import Linear

# Constants for numerical stability
EPSILON = 1e-7  # Small value to prevent log(0) and numerical instability

# %% ../../modules/04_losses/04_losses.ipynb 8
def log_softmax(x: Tensor, dim: int = -1) -> Tensor:
    """
    Compute log-softmax with numerical stability.

    TODO: Implement numerically stable log-softmax using the log-sum-exp trick

    APPROACH:
    1. Find maximum along dimension (for stability)
    2. Subtract max from input (prevents overflow)
    3. Compute log(sum(exp(shifted_input)))
    4. Return input - max - log_sum_exp

    EXAMPLE:
    >>> logits = Tensor([[1.0, 2.0, 3.0], [0.1, 0.2, 0.9]])
    >>> result = log_softmax(logits, dim=-1)
    >>> print(result.shape)
    (2, 3)

    HINT: Use np.max(x.data, axis=dim, keepdims=True) to preserve dimensions
    """
    ### BEGIN SOLUTION
    # Step 1: Find max along dimension for numerical stability
    max_vals = np.max(x.data, axis=dim, keepdims=True)

    # Step 2: Subtract max to prevent overflow
    shifted = x.data - max_vals

    # Step 3: Compute log(sum(exp(shifted)))
    log_sum_exp = np.log(np.sum(np.exp(shifted), axis=dim, keepdims=True))

    # Step 4: Return log_softmax = input - max - log_sum_exp
    result = x.data - max_vals - log_sum_exp

    return Tensor(result)
    ### END SOLUTION

# %% ../../modules/04_losses/04_losses.ipynb 11
class MSELoss:
    """Mean Squared Error loss for regression tasks."""

    def __init__(self):
        """Initialize MSE loss function."""
        pass

    def forward(self, predictions: Tensor, targets: Tensor) -> Tensor:
        """
        Compute mean squared error between predictions and targets.

        TODO: Implement MSE loss calculation

        APPROACH:
        1. Compute difference: predictions - targets
        2. Square the differences: diff²
        3. Take mean across all elements

        EXAMPLE:
        >>> loss_fn = MSELoss()
        >>> predictions = Tensor([1.0, 2.0, 3.0])
        >>> targets = Tensor([1.5, 2.5, 2.8])
        >>> loss = loss_fn(predictions, targets)
        >>> print(f"MSE Loss: {loss.data:.4f}")
        MSE Loss: 0.1467

        HINTS:
        - Use (predictions.data - targets.data) for element-wise difference
        - Square with **2 or np.power(diff, 2)
        - Use np.mean() to average over all elements
        """
        ### BEGIN SOLUTION
        # Step 1: Compute element-wise difference
        diff = predictions.data - targets.data

        # Step 2: Square the differences
        squared_diff = diff ** 2

        # Step 3: Take mean across all elements
        mse = np.mean(squared_diff)

        return Tensor(mse)
        ### END SOLUTION

    def __call__(self, predictions: Tensor, targets: Tensor) -> Tensor:
        """Allows the loss function to be called like a function."""
        return self.forward(predictions, targets)

    def backward(self) -> Tensor:
        """
        Compute gradients (implemented in Module 05: Autograd).

        For now, this is a stub that students can ignore.
        """
        pass

# %% ../../modules/04_losses/04_losses.ipynb 14
class CrossEntropyLoss:
    """Cross-entropy loss for multi-class classification."""

    def __init__(self):
        """Initialize cross-entropy loss function."""
        pass

    def forward(self, logits: Tensor, targets: Tensor) -> Tensor:
        """
        Compute cross-entropy loss between logits and target class indices.

        TODO: Implement cross-entropy loss with numerical stability

        APPROACH:
        1. Compute log-softmax of logits (numerically stable)
        2. Select log-probabilities for correct classes
        3. Return negative mean of selected log-probabilities

        EXAMPLE:
        >>> loss_fn = CrossEntropyLoss()
        >>> logits = Tensor([[2.0, 1.0, 0.1], [0.5, 1.5, 0.8]])  # 2 samples, 3 classes
        >>> targets = Tensor([0, 1])  # First sample is class 0, second is class 1
        >>> loss = loss_fn(logits, targets)
        >>> print(f"Cross-Entropy Loss: {loss.data:.4f}")

        HINTS:
        - Use log_softmax() for numerical stability
        - targets.data.astype(int) ensures integer indices
        - Use np.arange(batch_size) for row indexing: log_probs[np.arange(batch_size), targets]
        - Return negative mean: -np.mean(selected_log_probs)
        """
        ### BEGIN SOLUTION
        # Step 1: Compute log-softmax for numerical stability
        log_probs = log_softmax(logits, dim=-1)

        # Step 2: Select log-probabilities for correct classes
        batch_size = logits.shape[0]
        target_indices = targets.data.astype(int)

        # Select correct class log-probabilities using advanced indexing
        selected_log_probs = log_probs.data[np.arange(batch_size), target_indices]

        # Step 3: Return negative mean (cross-entropy is negative log-likelihood)
        cross_entropy = -np.mean(selected_log_probs)

        return Tensor(cross_entropy)
        ### END SOLUTION

    def __call__(self, logits: Tensor, targets: Tensor) -> Tensor:
        """Allows the loss function to be called like a function."""
        return self.forward(logits, targets)

    def backward(self) -> Tensor:
        """
        Compute gradients (implemented in Module 05: Autograd).

        For now, this is a stub that students can ignore.
        """
        pass

# %% ../../modules/04_losses/04_losses.ipynb 17
class BinaryCrossEntropyLoss:
    """Binary cross-entropy loss for binary classification."""

    def __init__(self):
        """Initialize binary cross-entropy loss function."""
        pass

    def forward(self, predictions: Tensor, targets: Tensor) -> Tensor:
        """
        Compute binary cross-entropy loss.

        TODO: Implement binary cross-entropy with numerical stability

        APPROACH:
        1. Clamp predictions to avoid log(0) and log(1)
        2. Compute: -(targets * log(predictions) + (1-targets) * log(1-predictions))
        3. Return mean across all samples

        EXAMPLE:
        >>> loss_fn = BinaryCrossEntropyLoss()
        >>> predictions = Tensor([0.9, 0.1, 0.7, 0.3])  # Probabilities between 0 and 1
        >>> targets = Tensor([1.0, 0.0, 1.0, 0.0])      # Binary labels
        >>> loss = loss_fn(predictions, targets)
        >>> print(f"Binary Cross-Entropy Loss: {loss.data:.4f}")

        HINTS:
        - Use np.clip(predictions.data, 1e-7, 1-1e-7) to prevent log(0)
        - Binary cross-entropy: -(targets * log(preds) + (1-targets) * log(1-preds))
        - Use np.mean() to average over all samples
        """
        ### BEGIN SOLUTION
        # Step 1: Clamp predictions to avoid numerical issues with log(0) and log(1)
        eps = EPSILON
        clamped_preds = np.clip(predictions.data, eps, 1 - eps)

        # Step 2: Compute binary cross-entropy
        # BCE = -(targets * log(preds) + (1-targets) * log(1-preds))
        log_preds = np.log(clamped_preds)
        log_one_minus_preds = np.log(1 - clamped_preds)

        bce_per_sample = -(targets.data * log_preds + (1 - targets.data) * log_one_minus_preds)

        # Step 3: Return mean across all samples
        bce_loss = np.mean(bce_per_sample)

        return Tensor(bce_loss)
        ### END SOLUTION

    def __call__(self, predictions: Tensor, targets: Tensor) -> Tensor:
        """Allows the loss function to be called like a function."""
        return self.forward(predictions, targets)

    def backward(self) -> Tensor:
        """
        Compute gradients (implemented in Module 05: Autograd).

        For now, this is a stub that students can ignore.
        """
        pass