# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/09_spatial/spatial_dev.ipynb. # %% auto 0 __all__ = ['Conv2d', 'MaxPool2d', 'AvgPool2d', 'SimpleCNN'] # %% ../../modules/source/09_spatial/spatial_dev.ipynb 1 import numpy as np from .tensor import Tensor # %% ../../modules/source/09_spatial/spatial_dev.ipynb 6 class Conv2d: """ 2D Convolution layer for spatial feature extraction. Implements convolution with explicit loops to demonstrate computational complexity and memory access patterns. Args: in_channels: Number of input channels out_channels: Number of output feature maps kernel_size: Size of convolution kernel (int or tuple) stride: Stride of convolution (default: 1) padding: Zero-padding added to input (default: 0) bias: Whether to add learnable bias (default: True) """ def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, bias=True): """ Initialize Conv2d layer with proper weight initialization. TODO: Complete Conv2d initialization APPROACH: 1. Store hyperparameters (channels, kernel_size, stride, padding) 2. Initialize weights using He initialization for ReLU compatibility 3. Initialize bias (if enabled) to zeros 4. Use proper shapes: weight (out_channels, in_channels, kernel_h, kernel_w) WEIGHT INITIALIZATION: - He init: std = sqrt(2 / (in_channels * kernel_h * kernel_w)) - This prevents vanishing/exploding gradients with ReLU HINT: Convert kernel_size to tuple if it's an integer """ super().__init__() ### BEGIN SOLUTION self.in_channels = in_channels self.out_channels = out_channels # Handle kernel_size as int or tuple if isinstance(kernel_size, int): self.kernel_size = (kernel_size, kernel_size) else: self.kernel_size = kernel_size self.stride = stride self.padding = padding # He initialization for ReLU networks kernel_h, kernel_w = self.kernel_size fan_in = in_channels * kernel_h * kernel_w std = np.sqrt(2.0 / fan_in) # Weight shape: (out_channels, in_channels, kernel_h, kernel_w) self.weight = Tensor(np.random.normal(0, std, (out_channels, in_channels, kernel_h, kernel_w))) # Bias initialization if bias: self.bias = Tensor(np.zeros(out_channels)) else: self.bias = None ### END SOLUTION def forward(self, x): """ Forward pass through Conv2d layer. TODO: Implement convolution with explicit loops APPROACH: 1. Extract input dimensions and validate 2. Calculate output dimensions 3. Apply padding if needed 4. Implement 6 nested loops for full convolution 5. Add bias if present LOOP STRUCTURE: for batch in range(batch_size): for out_ch in range(out_channels): for out_h in range(out_height): for out_w in range(out_width): for k_h in range(kernel_height): for k_w in range(kernel_width): for in_ch in range(in_channels): # Accumulate: out += input * weight EXAMPLE: >>> conv = Conv2d(3, 16, kernel_size=3, padding=1) >>> x = Tensor(np.random.randn(2, 3, 32, 32)) # batch=2, RGB, 32x32 >>> out = conv(x) >>> print(out.shape) # Should be (2, 16, 32, 32) HINTS: - Handle padding by creating padded input array - Watch array bounds in inner loops - Accumulate products for each output position """ ### BEGIN SOLUTION # Input validation and shape extraction if len(x.shape) != 4: raise ValueError(f"Expected 4D input (batch, channels, height, width), got {x.shape}") batch_size, in_channels, in_height, in_width = x.shape out_channels = self.out_channels kernel_h, kernel_w = self.kernel_size # Calculate output dimensions out_height = (in_height + 2 * self.padding - kernel_h) // self.stride + 1 out_width = (in_width + 2 * self.padding - kernel_w) // self.stride + 1 # Apply padding if needed if self.padding > 0: padded_input = np.pad(x.data, ((0, 0), (0, 0), (self.padding, self.padding), (self.padding, self.padding)), mode='constant', constant_values=0) else: padded_input = x.data # Initialize output output = np.zeros((batch_size, out_channels, out_height, out_width)) # Explicit 6-nested loop convolution to show complexity for b in range(batch_size): for out_ch in range(out_channels): for out_h in range(out_height): for out_w in range(out_width): # Calculate input region for this output position in_h_start = out_h * self.stride in_w_start = out_w * self.stride # Accumulate convolution result conv_sum = 0.0 for k_h in range(kernel_h): for k_w in range(kernel_w): for in_ch in range(in_channels): # Get input and weight values input_val = padded_input[b, in_ch, in_h_start + k_h, in_w_start + k_w] weight_val = self.weight.data[out_ch, in_ch, k_h, k_w] # Accumulate conv_sum += input_val * weight_val # Store result output[b, out_ch, out_h, out_w] = conv_sum # Add bias if present if self.bias is not None: # Broadcast bias across spatial dimensions for out_ch in range(out_channels): output[:, out_ch, :, :] += self.bias.data[out_ch] return Tensor(output) ### END SOLUTION def parameters(self): """Return trainable parameters.""" params = [self.weight] if self.bias is not None: params.append(self.bias) return params def __call__(self, x): """Enable model(x) syntax.""" return self.forward(x) # %% ../../modules/source/09_spatial/spatial_dev.ipynb 11 class MaxPool2d: """ 2D Max Pooling layer for spatial dimension reduction. Applies maximum operation over spatial windows, preserving the strongest activations while reducing computational load. Args: kernel_size: Size of pooling window (int or tuple) stride: Stride of pooling operation (default: same as kernel_size) padding: Zero-padding added to input (default: 0) """ def __init__(self, kernel_size, stride=None, padding=0): """ Initialize MaxPool2d layer. TODO: Store pooling parameters APPROACH: 1. Convert kernel_size to tuple if needed 2. Set stride to kernel_size if not provided (non-overlapping) 3. Store padding parameter HINT: Default stride equals kernel_size for non-overlapping windows """ super().__init__() ### BEGIN SOLUTION # Handle kernel_size as int or tuple if isinstance(kernel_size, int): self.kernel_size = (kernel_size, kernel_size) else: self.kernel_size = kernel_size # Default stride equals kernel_size (non-overlapping) if stride is None: self.stride = self.kernel_size[0] else: self.stride = stride self.padding = padding ### END SOLUTION def forward(self, x): """ Forward pass through MaxPool2d layer. TODO: Implement max pooling with explicit loops APPROACH: 1. Extract input dimensions 2. Calculate output dimensions 3. Apply padding if needed 4. Implement nested loops for pooling windows 5. Find maximum value in each window LOOP STRUCTURE: for batch in range(batch_size): for channel in range(channels): for out_h in range(out_height): for out_w in range(out_width): # Find max in window [in_h:in_h+k_h, in_w:in_w+k_w] max_val = -infinity for k_h in range(kernel_height): for k_w in range(kernel_width): max_val = max(max_val, input[...]) EXAMPLE: >>> pool = MaxPool2d(kernel_size=2, stride=2) >>> x = Tensor(np.random.randn(1, 3, 8, 8)) >>> out = pool(x) >>> print(out.shape) # Should be (1, 3, 4, 4) HINTS: - Initialize max_val to negative infinity - Handle stride correctly when accessing input - No parameters to update (pooling has no weights) """ ### BEGIN SOLUTION # Input validation and shape extraction if len(x.shape) != 4: raise ValueError(f"Expected 4D input (batch, channels, height, width), got {x.shape}") batch_size, channels, in_height, in_width = x.shape kernel_h, kernel_w = self.kernel_size # Calculate output dimensions out_height = (in_height + 2 * self.padding - kernel_h) // self.stride + 1 out_width = (in_width + 2 * self.padding - kernel_w) // self.stride + 1 # Apply padding if needed if self.padding > 0: padded_input = np.pad(x.data, ((0, 0), (0, 0), (self.padding, self.padding), (self.padding, self.padding)), mode='constant', constant_values=-np.inf) else: padded_input = x.data # Initialize output output = np.zeros((batch_size, channels, out_height, out_width)) # Explicit nested loop max pooling for b in range(batch_size): for c in range(channels): for out_h in range(out_height): for out_w in range(out_width): # Calculate input region for this output position in_h_start = out_h * self.stride in_w_start = out_w * self.stride # Find maximum in window max_val = -np.inf for k_h in range(kernel_h): for k_w in range(kernel_w): input_val = padded_input[b, c, in_h_start + k_h, in_w_start + k_w] max_val = max(max_val, input_val) # Store result output[b, c, out_h, out_w] = max_val return Tensor(output) ### END SOLUTION def parameters(self): """Return empty list (pooling has no parameters).""" return [] def __call__(self, x): """Enable model(x) syntax.""" return self.forward(x) # %% ../../modules/source/09_spatial/spatial_dev.ipynb 13 class AvgPool2d: """ 2D Average Pooling layer for spatial dimension reduction. Applies average operation over spatial windows, smoothing features while reducing computational load. Args: kernel_size: Size of pooling window (int or tuple) stride: Stride of pooling operation (default: same as kernel_size) padding: Zero-padding added to input (default: 0) """ def __init__(self, kernel_size, stride=None, padding=0): """ Initialize AvgPool2d layer. TODO: Store pooling parameters (same as MaxPool2d) APPROACH: 1. Convert kernel_size to tuple if needed 2. Set stride to kernel_size if not provided 3. Store padding parameter """ super().__init__() ### BEGIN SOLUTION # Handle kernel_size as int or tuple if isinstance(kernel_size, int): self.kernel_size = (kernel_size, kernel_size) else: self.kernel_size = kernel_size # Default stride equals kernel_size (non-overlapping) if stride is None: self.stride = self.kernel_size[0] else: self.stride = stride self.padding = padding ### END SOLUTION def forward(self, x): """ Forward pass through AvgPool2d layer. TODO: Implement average pooling with explicit loops APPROACH: 1. Similar structure to MaxPool2d 2. Instead of max, compute average of window 3. Divide sum by window area for true average LOOP STRUCTURE: for batch in range(batch_size): for channel in range(channels): for out_h in range(out_height): for out_w in range(out_width): # Compute average in window window_sum = 0 for k_h in range(kernel_height): for k_w in range(kernel_width): window_sum += input[...] avg_val = window_sum / (kernel_height * kernel_width) HINT: Remember to divide by window area to get true average """ ### BEGIN SOLUTION # Input validation and shape extraction if len(x.shape) != 4: raise ValueError(f"Expected 4D input (batch, channels, height, width), got {x.shape}") batch_size, channels, in_height, in_width = x.shape kernel_h, kernel_w = self.kernel_size # Calculate output dimensions out_height = (in_height + 2 * self.padding - kernel_h) // self.stride + 1 out_width = (in_width + 2 * self.padding - kernel_w) // self.stride + 1 # Apply padding if needed if self.padding > 0: padded_input = np.pad(x.data, ((0, 0), (0, 0), (self.padding, self.padding), (self.padding, self.padding)), mode='constant', constant_values=0) else: padded_input = x.data # Initialize output output = np.zeros((batch_size, channels, out_height, out_width)) # Explicit nested loop average pooling for b in range(batch_size): for c in range(channels): for out_h in range(out_height): for out_w in range(out_width): # Calculate input region for this output position in_h_start = out_h * self.stride in_w_start = out_w * self.stride # Compute sum in window window_sum = 0.0 for k_h in range(kernel_h): for k_w in range(kernel_w): input_val = padded_input[b, c, in_h_start + k_h, in_w_start + k_w] window_sum += input_val # Compute average avg_val = window_sum / (kernel_h * kernel_w) # Store result output[b, c, out_h, out_w] = avg_val return Tensor(output) ### END SOLUTION def parameters(self): """Return empty list (pooling has no parameters).""" return [] def __call__(self, x): """Enable model(x) syntax.""" return self.forward(x) # %% ../../modules/source/09_spatial/spatial_dev.ipynb 21 class SimpleCNN: """ Simple CNN demonstrating spatial operations integration. Architecture: - Conv2d(3→16, 3×3) + ReLU + MaxPool(2×2) - Conv2d(16→32, 3×3) + ReLU + MaxPool(2×2) - Flatten + Linear(features→num_classes) """ def __init__(self, num_classes=10): """ Initialize SimpleCNN. TODO: Build CNN architecture with spatial and dense layers APPROACH: 1. Conv layer 1: 3 → 16 channels, 3×3 kernel, padding=1 2. Pool layer 1: 2×2 max pooling 3. Conv layer 2: 16 → 32 channels, 3×3 kernel, padding=1 4. Pool layer 2: 2×2 max pooling 5. Calculate flattened size and add final linear layer HINT: For 32×32 input → 32→16→8→4 spatial reduction Final feature size: 32 channels × 4×4 = 512 features """ super().__init__() ### BEGIN SOLUTION # Convolutional layers self.conv1 = Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1) self.pool1 = MaxPool2d(kernel_size=2, stride=2) self.conv2 = Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1) self.pool2 = MaxPool2d(kernel_size=2, stride=2) # Calculate flattened size # Input: 32×32 → Conv1+Pool1: 16×16 → Conv2+Pool2: 8×8 # Wait, let's recalculate: 32×32 → Pool1: 16×16 → Pool2: 8×8 # Final: 32 channels × 8×8 = 2048 features self.flattened_size = 32 * 8 * 8 # Import Linear layer (we'll implement a simple version) # For now, we'll use a placeholder that we can replace # This represents the final classification layer self.num_classes = num_classes self.flattened_size = 32 * 8 * 8 # Will be used when we add Linear layer ### END SOLUTION def forward(self, x): """ Forward pass through SimpleCNN. TODO: Implement CNN forward pass APPROACH: 1. Apply conv1 → ReLU → pool1 2. Apply conv2 → ReLU → pool2 3. Flatten spatial dimensions 4. Apply final linear layer (when available) For now, return features before final linear layer since we haven't imported Linear from layers module yet. """ ### BEGIN SOLUTION # First conv block x = self.conv1(x) x = self.relu(x) # ReLU activation x = self.pool1(x) # Second conv block x = self.conv2(x) x = self.relu(x) # ReLU activation x = self.pool2(x) # Flatten for classification (reshape to 2D) batch_size = x.shape[0] x_flat = x.data.reshape(batch_size, -1) # Return flattened features # In a complete implementation, this would go through a Linear layer return Tensor(x_flat) ### END SOLUTION def relu(self, x): """Simple ReLU implementation for CNN.""" return Tensor(np.maximum(0, x.data)) def parameters(self): """Return all trainable parameters.""" params = [] params.extend(self.conv1.parameters()) params.extend(self.conv2.parameters()) # Linear layer parameters would be added here return params def __call__(self, x): """Enable model(x) syntax.""" return self.forward(x)