TinyTorch/tinytorch/core/networks.py

# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/networks/networks_dev.ipynb.

# %% auto 0
__all__ = ['Sequential', 'visualize_network_architecture', 'visualize_data_flow', 'compare_networks', 'create_mlp',
           'analyze_network_behavior', 'create_classification_network', 'create_regression_network']

# %% ../../modules/networks/networks_dev.ipynb 3
import numpy as np
import sys
from typing import List, Union, Optional, Callable
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.patches import FancyBboxPatch, ConnectionPatch
import seaborn as sns

# Import our building blocks
from .tensor import Tensor
from .layers import Dense
from .activations import ReLU, Sigmoid, Tanh

# %% ../../modules/networks/networks_dev.ipynb 4
def _should_show_plots():
    """Check if we should show plots (disable during testing)"""
    return 'pytest' not in sys.modules and 'test' not in sys.argv

# %% ../../modules/networks/networks_dev.ipynb 6
class Sequential:
    """
    Sequential Network: Composes layers in sequence

    The most fundamental network architecture.
    Applies layers in order: f(x) = layer_n(...layer_2(layer_1(x)))

    Args:
        layers: List of layers to compose

    TODO: Implement the Sequential network with forward pass.
    """

    def __init__(self, layers: List):
        """
        Initialize Sequential network with layers.

        Args:
            layers: List of layers to compose in order

        TODO: Store the layers and implement forward pass
        """
        raise NotImplementedError("Student implementation required")

    def forward(self, x: Tensor) -> Tensor:
        """
        Forward pass through all layers in sequence.

        Args:
            x: Input tensor

        Returns:
            Output tensor after passing through all layers

        TODO: Implement sequential forward pass through all layers
        """
        raise NotImplementedError("Student implementation required")

    def __call__(self, x: Tensor) -> Tensor:
        """Make network callable: network(x) same as network.forward(x)"""
        return self.forward(x)

# %% ../../modules/networks/networks_dev.ipynb 7
class Sequential:
    """
    Sequential Network: Composes layers in sequence

    The most fundamental network architecture.
    Applies layers in order: f(x) = layer_n(...layer_2(layer_1(x)))
    """

    def __init__(self, layers: List):
        """Initialize Sequential network with layers."""
        self.layers = layers

    def forward(self, x: Tensor) -> Tensor:
        """Forward pass through all layers in sequence."""
        # Apply each layer in order
        for layer in self.layers:
            x = layer(x)
        return x

    def __call__(self, x: Tensor) -> Tensor:
        """Make network callable: network(x) same as network.forward(x)"""
        return self.forward(x)

# %% ../../modules/networks/networks_dev.ipynb 11
def visualize_network_architecture(network: Sequential, title: str = "Network Architecture"):
    """
    Create a visual representation of network architecture.

    Args:
        network: Sequential network to visualize
        title: Title for the plot
    """
    if not _should_show_plots():
        print("📊 Plots disabled during testing - this is normal!")
        return

    fig, ax = plt.subplots(1, 1, figsize=(12, 8))

    # Network parameters
    layer_count = len(network.layers)
    layer_height = 0.8
    layer_spacing = 1.2

    # Colors for different layer types
    colors = {
        'Dense': '#4CAF50',      # Green
        'ReLU': '#2196F3',       # Blue
        'Sigmoid': '#FF9800',    # Orange
        'Tanh': '#9C27B0',       # Purple
        'default': '#757575'      # Gray
    }

    # Draw layers
    for i, layer in enumerate(network.layers):
        # Determine layer type and color
        layer_type = type(layer).__name__
        color = colors.get(layer_type, colors['default'])

        # Layer position
        x = i * layer_spacing
        y = 0

        # Create layer box
        layer_box = FancyBboxPatch(
            (x - 0.3, y - layer_height/2),
            0.6, layer_height,
            boxstyle="round,pad=0.1",
            facecolor=color,
            edgecolor='black',
            linewidth=2,
            alpha=0.8
        )
        ax.add_patch(layer_box)

        # Add layer label
        ax.text(x, y, layer_type, ha='center', va='center',
                fontsize=10, fontweight='bold', color='white')

        # Add layer details
        if hasattr(layer, 'input_size') and hasattr(layer, 'output_size'):
            details = f"{layer.input_size}→{layer.output_size}"
            ax.text(x, y - 0.3, details, ha='center', va='center',
                   fontsize=8, color='white')

        # Draw connections to next layer
        if i < layer_count - 1:
            next_x = (i + 1) * layer_spacing
            connection = ConnectionPatch(
                (x + 0.3, y), (next_x - 0.3, y),
                "data", "data",
                arrowstyle="->", shrinkA=5, shrinkB=5,
                mutation_scale=20, fc="black", lw=2
            )
            ax.add_patch(connection)

    # Formatting
    ax.set_xlim(-0.5, (layer_count - 1) * layer_spacing + 0.5)
    ax.set_ylim(-1, 1)
    ax.set_aspect('equal')
    ax.axis('off')

    # Add title
    plt.title(title, fontsize=16, fontweight='bold', pad=20)

    # Add legend
    legend_elements = []
    for layer_type, color in colors.items():
        if layer_type != 'default':
            legend_elements.append(patches.Patch(color=color, label=layer_type))

    ax.legend(handles=legend_elements, loc='upper right', bbox_to_anchor=(1, 1))

    plt.tight_layout()
    plt.show()

# %% ../../modules/networks/networks_dev.ipynb 12
def visualize_data_flow(network: Sequential, input_data: Tensor, title: str = "Data Flow Through Network"):
    """
    Visualize how data flows through the network.

    Args:
        network: Sequential network
        input_data: Input tensor
        title: Title for the plot
    """
    if not _should_show_plots():
        print("📊 Plots disabled during testing - this is normal!")
        return

    # Get intermediate outputs
    intermediate_outputs = []
    x = input_data

    for i, layer in enumerate(network.layers):
        x = layer(x)
        intermediate_outputs.append({
            'layer': network.layers[i],
            'output': x,
            'layer_index': i
        })

    # Create visualization
    fig, axes = plt.subplots(2, len(network.layers), figsize=(4*len(network.layers), 8))
    if len(network.layers) == 1:
        axes = axes.reshape(1, -1)

    for i, (layer, output) in enumerate(zip(network.layers, intermediate_outputs)):
        # Top row: Layer information
        ax_top = axes[0, i] if len(network.layers) > 1 else axes[0]

        # Layer type and details
        layer_type = type(layer).__name__
        ax_top.text(0.5, 0.8, layer_type, ha='center', va='center',
                   fontsize=12, fontweight='bold')

        if hasattr(layer, 'input_size') and hasattr(layer, 'output_size'):
            ax_top.text(0.5, 0.6, f"{layer.input_size} → {layer.output_size}",
                       ha='center', va='center', fontsize=10)

        # Output shape
        ax_top.text(0.5, 0.4, f"Shape: {output['output'].shape}",
                   ha='center', va='center', fontsize=9)

        # Output statistics
        output_data = output['output'].data
        ax_top.text(0.5, 0.2, f"Mean: {np.mean(output_data):.3f}",
                   ha='center', va='center', fontsize=9)
        ax_top.text(0.5, 0.1, f"Std: {np.std(output_data):.3f}",
                   ha='center', va='center', fontsize=9)

        ax_top.set_xlim(0, 1)
        ax_top.set_ylim(0, 1)
        ax_top.axis('off')

        # Bottom row: Output visualization
        ax_bottom = axes[1, i] if len(network.layers) > 1 else axes[1]

        # Show output as heatmap or histogram
        output_data = output['output'].data.flatten()

        if len(output_data) <= 20:  # Small output - show as bars
            ax_bottom.bar(range(len(output_data)), output_data, alpha=0.7)
            ax_bottom.set_title(f"Layer {i+1} Output")
            ax_bottom.set_xlabel("Output Index")
            ax_bottom.set_ylabel("Value")
        else:  # Large output - show histogram
            ax_bottom.hist(output_data, bins=20, alpha=0.7, edgecolor='black')
            ax_bottom.set_title(f"Layer {i+1} Output Distribution")
            ax_bottom.set_xlabel("Value")
            ax_bottom.set_ylabel("Frequency")

        ax_bottom.grid(True, alpha=0.3)

    plt.suptitle(title, fontsize=14, fontweight='bold')
    plt.tight_layout()
    plt.show()

# %% ../../modules/networks/networks_dev.ipynb 13
def compare_networks(networks: List[Sequential], network_names: List[str],
                    input_data: Tensor, title: str = "Network Comparison"):
    """
    Compare different network architectures side-by-side.

    Args:
        networks: List of networks to compare
        network_names: Names for each network
        input_data: Input tensor to test with
        title: Title for the plot
    """
    if not _should_show_plots():
        print("📊 Plots disabled during testing - this is normal!")
        return

    fig, axes = plt.subplots(2, len(networks), figsize=(6*len(networks), 10))
    if len(networks) == 1:
        axes = axes.reshape(2, -1)

    for i, (network, name) in enumerate(zip(networks, network_names)):
        # Get network output
        output = network(input_data)

        # Top row: Architecture visualization
        ax_top = axes[0, i] if len(networks) > 1 else axes[0]

        # Count layer types
        layer_types = {}
        for layer in network.layers:
            layer_type = type(layer).__name__
            layer_types[layer_type] = layer_types.get(layer_type, 0) + 1

        # Create pie chart of layer types
        if layer_types:
            labels = list(layer_types.keys())
            sizes = list(layer_types.values())
            colors = plt.cm.Set3(np.linspace(0, 1, len(labels)))

            ax_top.pie(sizes, labels=labels, autopct='%1.1f%%', colors=colors)
            ax_top.set_title(f"{name}\nLayer Distribution")

        # Bottom row: Output comparison
        ax_bottom = axes[1, i] if len(networks) > 1 else axes[1]

        output_data = output.data.flatten()

        # Show output statistics
        ax_bottom.hist(output_data, bins=20, alpha=0.7, edgecolor='black')
        ax_bottom.axvline(np.mean(output_data), color='red', linestyle='--',
                         label=f'Mean: {np.mean(output_data):.3f}')
        ax_bottom.axvline(np.median(output_data), color='green', linestyle='--',
                         label=f'Median: {np.median(output_data):.3f}')

        ax_bottom.set_title(f"{name} Output Distribution")
        ax_bottom.set_xlabel("Output Value")
        ax_bottom.set_ylabel("Frequency")
        ax_bottom.legend()
        ax_bottom.grid(True, alpha=0.3)

    plt.suptitle(title, fontsize=16, fontweight='bold')
    plt.tight_layout()
    plt.show()

# %% ../../modules/networks/networks_dev.ipynb 15
def create_mlp(input_size: int, hidden_sizes: List[int], output_size: int,
               activation=ReLU, output_activation=Sigmoid) -> Sequential:
    """
    Create a Multi-Layer Perceptron (MLP) network.

    Args:
        input_size: Number of input features
        hidden_sizes: List of hidden layer sizes
        output_size: Number of output features
        activation: Activation function for hidden layers
        output_activation: Activation function for output layer

    Returns:
        Sequential network
    """
    layers = []

    # Input layer
    if hidden_sizes:
        layers.append(Dense(input_size, hidden_sizes[0]))
        layers.append(activation())

        # Hidden layers
        for i in range(len(hidden_sizes) - 1):
            layers.append(Dense(hidden_sizes[i], hidden_sizes[i + 1]))
            layers.append(activation())

        # Output layer
        layers.append(Dense(hidden_sizes[-1], output_size))
    else:
        # Direct input to output
        layers.append(Dense(input_size, output_size))

    layers.append(output_activation())

    return Sequential(layers)

# %% ../../modules/networks/networks_dev.ipynb 18
def analyze_network_behavior(network: Sequential, input_data: Tensor,
                           title: str = "Network Behavior Analysis"):
    """
    Analyze how a network behaves with different types of input.

    Args:
        network: Network to analyze
        input_data: Input tensor
        title: Title for the plot
    """
    if not _should_show_plots():
        print("📊 Plots disabled during testing - this is normal!")
        return

    fig, axes = plt.subplots(2, 3, figsize=(15, 10))

    # 1. Input vs Output relationship
    ax1 = axes[0, 0]
    input_flat = input_data.data.flatten()
    output = network(input_data)
    output_flat = output.data.flatten()

    ax1.scatter(input_flat, output_flat, alpha=0.6)
    ax1.plot([input_flat.min(), input_flat.max()],
             [input_flat.min(), input_flat.max()], 'r--', alpha=0.5, label='y=x')
    ax1.set_xlabel('Input Values')
    ax1.set_ylabel('Output Values')
    ax1.set_title('Input vs Output')
    ax1.legend()
    ax1.grid(True, alpha=0.3)

    # 2. Output distribution
    ax2 = axes[0, 1]
    ax2.hist(output_flat, bins=20, alpha=0.7, edgecolor='black')
    ax2.axvline(np.mean(output_flat), color='red', linestyle='--',
                label=f'Mean: {np.mean(output_flat):.3f}')
    ax2.set_xlabel('Output Values')
    ax2.set_ylabel('Frequency')
    ax2.set_title('Output Distribution')
    ax2.legend()
    ax2.grid(True, alpha=0.3)

    # 3. Layer-by-layer activation patterns
    ax3 = axes[0, 2]
    activations = []
    x = input_data

    for layer in network.layers:
        x = layer(x)
        if hasattr(layer, 'input_size'):  # Dense layer
            activations.append(np.mean(x.data))
        else:  # Activation layer
            activations.append(np.mean(x.data))

    ax3.plot(range(len(activations)), activations, 'bo-', linewidth=2, markersize=8)
    ax3.set_xlabel('Layer Index')
    ax3.set_ylabel('Mean Activation')
    ax3.set_title('Layer-by-Layer Activations')
    ax3.grid(True, alpha=0.3)

    # 4. Network depth analysis
    ax4 = axes[1, 0]
    layer_types = [type(layer).__name__ for layer in network.layers]
    layer_counts = {}
    for layer_type in layer_types:
        layer_counts[layer_type] = layer_counts.get(layer_type, 0) + 1

    if layer_counts:
        ax4.bar(layer_counts.keys(), layer_counts.values(), alpha=0.7)
        ax4.set_xlabel('Layer Type')
        ax4.set_ylabel('Count')
        ax4.set_title('Layer Type Distribution')
        ax4.grid(True, alpha=0.3)

    # 5. Shape transformation
    ax5 = axes[1, 1]
    shapes = [input_data.shape]
    x = input_data

    for layer in network.layers:
        x = layer(x)
        shapes.append(x.shape)

    layer_indices = range(len(shapes))
    shape_sizes = [np.prod(shape) for shape in shapes]

    ax5.plot(layer_indices, shape_sizes, 'go-', linewidth=2, markersize=8)
    ax5.set_xlabel('Layer Index')
    ax5.set_ylabel('Tensor Size')
    ax5.set_title('Shape Transformation')
    ax5.grid(True, alpha=0.3)

    # 6. Network summary
    ax6 = axes[1, 2]
    ax6.axis('off')

    summary_text = f"""
Network Summary:
• Total Layers: {len(network.layers)}
• Input Shape: {input_data.shape}
• Output Shape: {output.shape}
• Parameters: {sum(np.prod(layer.weights.data.shape) if hasattr(layer, 'weights') else 0 for layer in network.layers)}
• Architecture: {' → '.join([type(layer).__name__ for layer in network.layers])}
    """

    ax6.text(0.05, 0.95, summary_text, transform=ax6.transAxes,
             fontsize=10, verticalalignment='top', fontfamily='monospace')

    plt.suptitle(title, fontsize=16, fontweight='bold')
    plt.tight_layout()
    plt.show()

# %% ../../modules/networks/networks_dev.ipynb 21
def create_classification_network(input_size: int, num_classes: int,
                                hidden_sizes: List[int] = None) -> Sequential:
    """
    Create a network for classification problems.

    Args:
        input_size: Number of input features
        num_classes: Number of output classes
        hidden_sizes: List of hidden layer sizes (default: [input_size//2])

    Returns:
        Sequential network for classification
    """
    if hidden_sizes is None:
        hidden_sizes = [input_size // 2]

    return create_mlp(
        input_size=input_size,
        hidden_sizes=hidden_sizes,
        output_size=num_classes,
        activation=ReLU,
        output_activation=Sigmoid
    )

# %% ../../modules/networks/networks_dev.ipynb 22
def create_regression_network(input_size: int, output_size: int = 1,
                             hidden_sizes: List[int] = None) -> Sequential:
    """
    Create a network for regression problems.

    Args:
        input_size: Number of input features
        output_size: Number of output values (default: 1)
        hidden_sizes: List of hidden layer sizes (default: [input_size//2])

    Returns:
        Sequential network for regression
    """
    if hidden_sizes is None:
        hidden_sizes = [input_size // 2]

    return create_mlp(
        input_size=input_size,
        hidden_sizes=hidden_sizes,
        output_size=output_size,
        activation=ReLU,
        output_activation=Tanh  # No activation for regression
    )