TinyTorch/tinytorch/core/networks.py

# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/05_dense/dense_dev.ipynb.

# %% auto 0
__all__ = ['Sequential', 'create_mlp', 'MLP']

# %% ../../modules/source/05_dense/dense_dev.ipynb 1
import numpy as np
import sys
import os
from typing import List, Union, Optional, Callable
import matplotlib.pyplot as plt

# Import all the building blocks we need - try package first, then local modules
try:
    from tinytorch.core.tensor import Tensor
    from tinytorch.core.layers import Dense, Module
    from tinytorch.core.activations import ReLU, Sigmoid, Tanh, Softmax
except ImportError:
    # For development, import from local modules
    sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor'))
    sys.path.append(os.path.join(os.path.dirname(__file__), '..', '02_activations'))
    sys.path.append(os.path.join(os.path.dirname(__file__), '..', '03_layers'))
    from tensor_dev import Tensor
    from activations_dev import ReLU, Sigmoid, Tanh, Softmax
    from layers_dev import Dense, Module

# %% ../../modules/source/05_dense/dense_dev.ipynb 2
def _should_show_plots():
    """Check if we should show plots (disable during testing)"""
    # Check multiple conditions that indicate we're in test mode
    is_pytest = (
        'pytest' in sys.modules or
        'test' in sys.argv or
        os.environ.get('PYTEST_CURRENT_TEST') is not None or
        any('test' in arg for arg in sys.argv) or
        any('pytest' in arg for arg in sys.argv)
    )

    # Show plots in development mode (when not in test mode)
    return not is_pytest

# %% ../../modules/source/05_dense/dense_dev.ipynb 7
class Sequential(Module):
    """
    Sequential Network: Composes layers in sequence

    The most fundamental network architecture.
    Applies layers in order: f(x) = layer_n(...layer_2(layer_1(x)))
    Inherits from Module for automatic parameter collection.
    """

    def __init__(self, layers: Optional[List] = None):
        """
        Initialize Sequential network with layers.

        Args:
            layers: List of layers to compose in order (optional, defaults to empty list)

        TODO: Store the layers and implement forward pass

        APPROACH:
        1. Store the layers list as an instance variable
        2. Initialize empty list if no layers provided
        3. Prepare for forward pass implementation

        EXAMPLE:
        Sequential([Dense(3,4), ReLU(), Dense(4,2)])
        creates a 3-layer network: Dense → ReLU → Dense

        HINTS:
        - Use self.layers to store the layers
        - Handle empty initialization case
        """
        ### BEGIN SOLUTION
        super().__init__()  # Initialize Module base class
        self.layers = layers if layers is not None else []
        # Register all layers as sub-modules for parameter collection
        for i, layer in enumerate(self.layers):
            setattr(self, f'layer_{i}', layer)
        ### END SOLUTION

    def forward(self, x: Tensor) -> Tensor:
        """
        Forward pass through all layers in sequence.

        Args:
            x: Input tensor

        Returns:
            Output tensor after passing through all layers

        TODO: Implement sequential forward pass through all layers

        APPROACH:
        1. Start with the input tensor
        2. Apply each layer in sequence
        3. Each layer's output becomes the next layer's input
        4. Return the final output

        EXAMPLE:
        Input: Tensor([[1, 2, 3]])
        Layer1 (Dense): Tensor([[1.4, 2.8]])
        Layer2 (ReLU): Tensor([[1.4, 2.8]])
        Layer3 (Dense): Tensor([[0.7]])
        Output: Tensor([[0.7]])

        HINTS:
        - Use a for loop: for layer in self.layers:
        - Apply each layer: x = layer(x)
        - The output of one layer becomes input to the next
        - Return the final result
        """
        ### BEGIN SOLUTION
        # Apply each layer in sequence
        for layer in self.layers:
            x = layer(x)
        return x
        ### END SOLUTION

    def __call__(self, x: Tensor) -> Tensor:
        """Make the network callable: sequential(x) instead of sequential.forward(x)"""
        return self.forward(x)

    def add(self, layer):
        """Add a layer to the network."""
        self.layers.append(layer)
        # Register the new layer for parameter collection
        setattr(self, f'layer_{len(self.layers)-1}', layer)

# %% ../../modules/source/05_dense/dense_dev.ipynb 11
def create_mlp(input_size: int, hidden_sizes: List[int], output_size: int,
               activation=ReLU, output_activation=Sigmoid) -> Sequential:
    """
    Create a Multi-Layer Perceptron (MLP) network.

    Args:
        input_size: Number of input features
        hidden_sizes: List of hidden layer sizes
        output_size: Number of output features
        activation: Activation function for hidden layers (default: ReLU)
        output_activation: Activation function for output layer (default: Sigmoid)

    Returns:
        Sequential network with MLP architecture

    TODO: Implement MLP creation with alternating Dense and activation layers.

    APPROACH:
    1. Start with an empty list of layers
    2. Add layers in this pattern:
       - Dense(input_size → first_hidden_size)
       - Activation()
       - Dense(first_hidden_size → second_hidden_size)
       - Activation()
       - ...
       - Dense(last_hidden_size → output_size)
       - Output_activation()
    3. Return Sequential(layers)

    EXAMPLE:
    create_mlp(3, [4, 2], 1) creates:
    Dense(3→4) → ReLU → Dense(4→2) → ReLU → Dense(2→1) → Sigmoid

    HINTS:
    - Start with layers = []
    - Track current_size starting with input_size
    - For each hidden_size: add Dense(current_size, hidden_size), then activation
    - Finally add Dense(last_hidden_size, output_size), then output_activation
    - Return Sequential(layers)
    """
    layers = []
    current_size = input_size

    # Add hidden layers with activations
    for hidden_size in hidden_sizes:
        layers.append(Dense(current_size, hidden_size))
        layers.append(activation())
        current_size = hidden_size

    # Add output layer with output activation
    layers.append(Dense(current_size, output_size))
    layers.append(output_activation())

    return Sequential(layers)

# %% ../../modules/source/05_dense/dense_dev.ipynb 19
class MLP:
    """
    Multi-Layer Perceptron (MLP) class.

    A convenient wrapper around Sequential networks for standard MLP architectures.
    Maintains parameter information and provides a clean interface.

    Args:
        input_size: Number of input features
        hidden_size: Size of the single hidden layer
        output_size: Number of output features
        activation: Activation function for hidden layer (default: ReLU)
        output_activation: Activation function for output layer (default: Sigmoid)
    """

    def __init__(self, input_size: int, hidden_size: int, output_size: int,
                 activation=ReLU, output_activation=None):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        # Build the network layers
        layers = []

        # Input to hidden layer
        layers.append(Dense(input_size, hidden_size))
        layers.append(activation())

        # Hidden to output layer
        layers.append(Dense(hidden_size, output_size))
        if output_activation is not None:
            layers.append(output_activation())

        self.network = Sequential(layers)

    def forward(self, x):
        """Forward pass through the MLP network."""
        return self.network.forward(x)

    def __call__(self, x):
        """Make the MLP callable."""
        return self.forward(x)