# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/09_spatial/spatial_dev.ipynb.

# %% auto 0
__all__ = ['Conv2d', 'MaxPool2d', 'AvgPool2d', 'SimpleCNN']

# %% ../../modules/source/09_spatial/spatial_dev.ipynb 1
import numpy as np

from .tensor import Tensor

# %% ../../modules/source/09_spatial/spatial_dev.ipynb 6
class Conv2d:
    """
    2D Convolution layer for spatial feature extraction.

    Implements convolution with explicit loops to demonstrate
    computational complexity and memory access patterns.

    Args:
        in_channels: Number of input channels
        out_channels: Number of output feature maps
        kernel_size: Size of convolution kernel (int or tuple)
        stride: Stride of convolution (default: 1)
        padding: Zero-padding added to input (default: 0)
        bias: Whether to add learnable bias (default: True)
    """

    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, bias=True):
        """
        Initialize Conv2d layer with proper weight initialization.

        TODO: Complete Conv2d initialization

        APPROACH:
        1. Store hyperparameters (channels, kernel_size, stride, padding)
        2. Initialize weights using He initialization for ReLU compatibility
        3. Initialize bias (if enabled) to zeros
        4. Use proper shapes: weight (out_channels, in_channels, kernel_h, kernel_w)

        WEIGHT INITIALIZATION:
        - He init: std = sqrt(2 / (in_channels * kernel_h * kernel_w))
        - This prevents vanishing/exploding gradients with ReLU

        HINT: Convert kernel_size to tuple if it's an integer
        """
        super().__init__()

        ### BEGIN SOLUTION
        self.in_channels = in_channels
        self.out_channels = out_channels

        # Handle kernel_size as int or tuple
        if isinstance(kernel_size, int):
            self.kernel_size = (kernel_size, kernel_size)
        else:
            self.kernel_size = kernel_size

        self.stride = stride
        self.padding = padding

        # He initialization for ReLU networks
        kernel_h, kernel_w = self.kernel_size
        fan_in = in_channels * kernel_h * kernel_w
        std = np.sqrt(2.0 / fan_in)

        # Weight shape: (out_channels, in_channels, kernel_h, kernel_w)
        self.weight = Tensor(np.random.normal(0, std,
                           (out_channels, in_channels, kernel_h, kernel_w)))

        # Bias initialization
        if bias:
            self.bias = Tensor(np.zeros(out_channels))
        else:
            self.bias = None
        ### END SOLUTION

    def forward(self, x):
        """
        Forward pass through Conv2d layer.

        TODO: Implement convolution with explicit loops

        APPROACH:
        1. Extract input dimensions and validate
        2. Calculate output dimensions
        3. Apply padding if needed
        4. Implement 6 nested loops for full convolution
        5. Add bias if present

        LOOP STRUCTURE:
        for batch in range(batch_size):
            for out_ch in range(out_channels):
                for out_h in range(out_height):
                    for out_w in range(out_width):
                        for k_h in range(kernel_height):
                            for k_w in range(kernel_width):
                                for in_ch in range(in_channels):
                                    # Accumulate: out += input * weight

        EXAMPLE:
        >>> conv = Conv2d(3, 16, kernel_size=3, padding=1)
        >>> x = Tensor(np.random.randn(2, 3, 32, 32))  # batch=2, RGB, 32x32
        >>> out = conv(x)
        >>> print(out.shape)  # Should be (2, 16, 32, 32)

        HINTS:
        - Handle padding by creating padded input array
        - Watch array bounds in inner loops
        - Accumulate products for each output position
        """
        ### BEGIN SOLUTION
        # Input validation and shape extraction
        if len(x.shape) != 4:
            raise ValueError(f"Expected 4D input (batch, channels, height, width), got {x.shape}")

        batch_size, in_channels, in_height, in_width = x.shape
        out_channels = self.out_channels
        kernel_h, kernel_w = self.kernel_size

        # Calculate output dimensions
        out_height = (in_height + 2 * self.padding - kernel_h) // self.stride + 1
        out_width = (in_width + 2 * self.padding - kernel_w) // self.stride + 1

        # Apply padding if needed
        if self.padding > 0:
            padded_input = np.pad(x.data,
                                ((0, 0), (0, 0), (self.padding, self.padding), (self.padding, self.padding)),
                                mode='constant', constant_values=0)
        else:
            padded_input = x.data

        # Initialize output
        output = np.zeros((batch_size, out_channels, out_height, out_width))

        # Explicit 6-nested loop convolution to show complexity
        for b in range(batch_size):
            for out_ch in range(out_channels):
                for out_h in range(out_height):
                    for out_w in range(out_width):
                        # Calculate input region for this output position
                        in_h_start = out_h * self.stride
                        in_w_start = out_w * self.stride

                        # Accumulate convolution result
                        conv_sum = 0.0
                        for k_h in range(kernel_h):
                            for k_w in range(kernel_w):
                                for in_ch in range(in_channels):
                                    # Get input and weight values
                                    input_val = padded_input[b, in_ch,
                                                           in_h_start + k_h,
                                                           in_w_start + k_w]
                                    weight_val = self.weight.data[out_ch, in_ch, k_h, k_w]

                                    # Accumulate
                                    conv_sum += input_val * weight_val

                        # Store result
                        output[b, out_ch, out_h, out_w] = conv_sum

        # Add bias if present
        if self.bias is not None:
            # Broadcast bias across spatial dimensions
            for out_ch in range(out_channels):
                output[:, out_ch, :, :] += self.bias.data[out_ch]

        return Tensor(output)
        ### END SOLUTION

    def parameters(self):
        """Return trainable parameters."""
        params = [self.weight]
        if self.bias is not None:
            params.append(self.bias)
        return params

    def __call__(self, x):
        """Enable model(x) syntax."""
        return self.forward(x)

# %% ../../modules/source/09_spatial/spatial_dev.ipynb 11
class MaxPool2d:
    """
    2D Max Pooling layer for spatial dimension reduction.

    Applies maximum operation over spatial windows, preserving
    the strongest activations while reducing computational load.

    Args:
        kernel_size: Size of pooling window (int or tuple)
        stride: Stride of pooling operation (default: same as kernel_size)
        padding: Zero-padding added to input (default: 0)
    """

    def __init__(self, kernel_size, stride=None, padding=0):
        """
        Initialize MaxPool2d layer.

        TODO: Store pooling parameters

        APPROACH:
        1. Convert kernel_size to tuple if needed
        2. Set stride to kernel_size if not provided (non-overlapping)
        3. Store padding parameter

        HINT: Default stride equals kernel_size for non-overlapping windows
        """
        super().__init__()

        ### BEGIN SOLUTION
        # Handle kernel_size as int or tuple
        if isinstance(kernel_size, int):
            self.kernel_size = (kernel_size, kernel_size)
        else:
            self.kernel_size = kernel_size

        # Default stride equals kernel_size (non-overlapping)
        if stride is None:
            self.stride = self.kernel_size[0]
        else:
            self.stride = stride

        self.padding = padding
        ### END SOLUTION

    def forward(self, x):
        """
        Forward pass through MaxPool2d layer.

        TODO: Implement max pooling with explicit loops

        APPROACH:
        1. Extract input dimensions
        2. Calculate output dimensions
        3. Apply padding if needed
        4. Implement nested loops for pooling windows
        5. Find maximum value in each window

        LOOP STRUCTURE:
        for batch in range(batch_size):
            for channel in range(channels):
                for out_h in range(out_height):
                    for out_w in range(out_width):
                        # Find max in window [in_h:in_h+k_h, in_w:in_w+k_w]
                        max_val = -infinity
                        for k_h in range(kernel_height):
                            for k_w in range(kernel_width):
                                max_val = max(max_val, input[...])

        EXAMPLE:
        >>> pool = MaxPool2d(kernel_size=2, stride=2)
        >>> x = Tensor(np.random.randn(1, 3, 8, 8))
        >>> out = pool(x)
        >>> print(out.shape)  # Should be (1, 3, 4, 4)

        HINTS:
        - Initialize max_val to negative infinity
        - Handle stride correctly when accessing input
        - No parameters to update (pooling has no weights)
        """
        ### BEGIN SOLUTION
        # Input validation and shape extraction
        if len(x.shape) != 4:
            raise ValueError(f"Expected 4D input (batch, channels, height, width), got {x.shape}")

        batch_size, channels, in_height, in_width = x.shape
        kernel_h, kernel_w = self.kernel_size

        # Calculate output dimensions
        out_height = (in_height + 2 * self.padding - kernel_h) // self.stride + 1
        out_width = (in_width + 2 * self.padding - kernel_w) // self.stride + 1

        # Apply padding if needed
        if self.padding > 0:
            padded_input = np.pad(x.data,
                                ((0, 0), (0, 0), (self.padding, self.padding), (self.padding, self.padding)),
                                mode='constant', constant_values=-np.inf)
        else:
            padded_input = x.data

        # Initialize output
        output = np.zeros((batch_size, channels, out_height, out_width))

        # Explicit nested loop max pooling
        for b in range(batch_size):
            for c in range(channels):
                for out_h in range(out_height):
                    for out_w in range(out_width):
                        # Calculate input region for this output position
                        in_h_start = out_h * self.stride
                        in_w_start = out_w * self.stride

                        # Find maximum in window
                        max_val = -np.inf
                        for k_h in range(kernel_h):
                            for k_w in range(kernel_w):
                                input_val = padded_input[b, c,
                                                       in_h_start + k_h,
                                                       in_w_start + k_w]
                                max_val = max(max_val, input_val)

                        # Store result
                        output[b, c, out_h, out_w] = max_val

        return Tensor(output)
        ### END SOLUTION

    def parameters(self):
        """Return empty list (pooling has no parameters)."""
        return []

    def __call__(self, x):
        """Enable model(x) syntax."""
        return self.forward(x)

# %% ../../modules/source/09_spatial/spatial_dev.ipynb 13
class AvgPool2d:
    """
    2D Average Pooling layer for spatial dimension reduction.

    Applies average operation over spatial windows, smoothing
    features while reducing computational load.

    Args:
        kernel_size: Size of pooling window (int or tuple)
        stride: Stride of pooling operation (default: same as kernel_size)
        padding: Zero-padding added to input (default: 0)
    """

    def __init__(self, kernel_size, stride=None, padding=0):
        """
        Initialize AvgPool2d layer.

        TODO: Store pooling parameters (same as MaxPool2d)

        APPROACH:
        1. Convert kernel_size to tuple if needed
        2. Set stride to kernel_size if not provided
        3. Store padding parameter
        """
        super().__init__()

        ### BEGIN SOLUTION
        # Handle kernel_size as int or tuple
        if isinstance(kernel_size, int):
            self.kernel_size = (kernel_size, kernel_size)
        else:
            self.kernel_size = kernel_size

        # Default stride equals kernel_size (non-overlapping)
        if stride is None:
            self.stride = self.kernel_size[0]
        else:
            self.stride = stride

        self.padding = padding
        ### END SOLUTION

    def forward(self, x):
        """
        Forward pass through AvgPool2d layer.

        TODO: Implement average pooling with explicit loops

        APPROACH:
        1. Similar structure to MaxPool2d
        2. Instead of max, compute average of window
        3. Divide sum by window area for true average

        LOOP STRUCTURE:
        for batch in range(batch_size):
            for channel in range(channels):
                for out_h in range(out_height):
                    for out_w in range(out_width):
                        # Compute average in window
                        window_sum = 0
                        for k_h in range(kernel_height):
                            for k_w in range(kernel_width):
                                window_sum += input[...]
                        avg_val = window_sum / (kernel_height * kernel_width)

        HINT: Remember to divide by window area to get true average
        """
        ### BEGIN SOLUTION
        # Input validation and shape extraction
        if len(x.shape) != 4:
            raise ValueError(f"Expected 4D input (batch, channels, height, width), got {x.shape}")

        batch_size, channels, in_height, in_width = x.shape
        kernel_h, kernel_w = self.kernel_size

        # Calculate output dimensions
        out_height = (in_height + 2 * self.padding - kernel_h) // self.stride + 1
        out_width = (in_width + 2 * self.padding - kernel_w) // self.stride + 1

        # Apply padding if needed
        if self.padding > 0:
            padded_input = np.pad(x.data,
                                ((0, 0), (0, 0), (self.padding, self.padding), (self.padding, self.padding)),
                                mode='constant', constant_values=0)
        else:
            padded_input = x.data

        # Initialize output
        output = np.zeros((batch_size, channels, out_height, out_width))

        # Explicit nested loop average pooling
        for b in range(batch_size):
            for c in range(channels):
                for out_h in range(out_height):
                    for out_w in range(out_width):
                        # Calculate input region for this output position
                        in_h_start = out_h * self.stride
                        in_w_start = out_w * self.stride

                        # Compute sum in window
                        window_sum = 0.0
                        for k_h in range(kernel_h):
                            for k_w in range(kernel_w):
                                input_val = padded_input[b, c,
                                                       in_h_start + k_h,
                                                       in_w_start + k_w]
                                window_sum += input_val

                        # Compute average
                        avg_val = window_sum / (kernel_h * kernel_w)

                        # Store result
                        output[b, c, out_h, out_w] = avg_val

        return Tensor(output)
        ### END SOLUTION

    def parameters(self):
        """Return empty list (pooling has no parameters)."""
        return []

    def __call__(self, x):
        """Enable model(x) syntax."""
        return self.forward(x)

# %% ../../modules/source/09_spatial/spatial_dev.ipynb 21
class SimpleCNN:
    """
    Simple CNN demonstrating spatial operations integration.

    Architecture:
    - Conv2d(3→16, 3×3) + ReLU + MaxPool(2×2)
    - Conv2d(16→32, 3×3) + ReLU + MaxPool(2×2)
    - Flatten + Linear(features→num_classes)
    """

    def __init__(self, num_classes=10):
        """
        Initialize SimpleCNN.

        TODO: Build CNN architecture with spatial and dense layers

        APPROACH:
        1. Conv layer 1: 3 → 16 channels, 3×3 kernel, padding=1
        2. Pool layer 1: 2×2 max pooling
        3. Conv layer 2: 16 → 32 channels, 3×3 kernel, padding=1
        4. Pool layer 2: 2×2 max pooling
        5. Calculate flattened size and add final linear layer

        HINT: For 32×32 input → 32→16→8→4 spatial reduction
        Final feature size: 32 channels × 4×4 = 512 features
        """
        super().__init__()

        ### BEGIN SOLUTION
        # Convolutional layers
        self.conv1 = Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1)
        self.pool1 = MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.pool2 = MaxPool2d(kernel_size=2, stride=2)

        # Calculate flattened size
        # Input: 32×32 → Conv1+Pool1: 16×16 → Conv2+Pool2: 8×8
        # Wait, let's recalculate: 32×32 → Pool1: 16×16 → Pool2: 8×8
        # Final: 32 channels × 8×8 = 2048 features
        self.flattened_size = 32 * 8 * 8

        # Import Linear layer (we'll implement a simple version)
        # For now, we'll use a placeholder that we can replace
        # This represents the final classification layer
        self.num_classes = num_classes
        self.flattened_size = 32 * 8 * 8  # Will be used when we add Linear layer
        ### END SOLUTION

    def forward(self, x):
        """
        Forward pass through SimpleCNN.

        TODO: Implement CNN forward pass

        APPROACH:
        1. Apply conv1 → ReLU → pool1
        2. Apply conv2 → ReLU → pool2
        3. Flatten spatial dimensions
        4. Apply final linear layer (when available)

        For now, return features before final linear layer
        since we haven't imported Linear from layers module yet.
        """
        ### BEGIN SOLUTION
        # First conv block
        x = self.conv1(x)
        x = self.relu(x)  # ReLU activation
        x = self.pool1(x)

        # Second conv block
        x = self.conv2(x)
        x = self.relu(x)  # ReLU activation
        x = self.pool2(x)

        # Flatten for classification (reshape to 2D)
        batch_size = x.shape[0]
        x_flat = x.data.reshape(batch_size, -1)

        # Return flattened features
        # In a complete implementation, this would go through a Linear layer
        return Tensor(x_flat)
        ### END SOLUTION

    def relu(self, x):
        """Simple ReLU implementation for CNN."""
        return Tensor(np.maximum(0, x.data))

    def parameters(self):
        """Return all trainable parameters."""
        params = []
        params.extend(self.conv1.parameters())
        params.extend(self.conv2.parameters())
        # Linear layer parameters would be added here
        return params

    def __call__(self, x):
        """Enable model(x) syntax."""
        return self.forward(x)