mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-03-11 20:45:02 -05:00
feat: Add Milestone 04 (CNN Revolution 1998) + Clean spatial imports
Milestone 04 - CNN Revolution: ✅ Complete 5-Act narrative structure (Challenge → Reflection) ✅ SimpleCNN architecture: Conv2d → ReLU → MaxPool → Linear ✅ Trains on 8x8 digits dataset (1,437 train, 360 test) ✅ Achieves 84.2% accuracy with only 810 parameters ✅ Demonstrates spatial operations preserve structure ✅ Beautiful visual output with progress tracking Key Features: - Conv2d (1→8 channels, 3×3 kernel) detects local patterns - MaxPool2d (2×2) provides translation invariance - 100× fewer parameters than equivalent MLP - Training completes in ~105 seconds (50 epochs) - Sample predictions table shows 9/10 correct Module 09 Spatial Improvements: - Removed ugly try/except import pattern - Clean imports: 'from tinytorch.core.tensor import Tensor' - Matches PyTorch style (simple and professional) - No fallback logic needed All 4 milestones now follow consistent 5-Act structure!
This commit is contained in:
42
tinytorch/_modidx.py
generated
42
tinytorch/_modidx.py
generated
@@ -143,6 +143,48 @@ d = { 'settings': { 'branch': 'main',
|
||||
'tinytorch/core/optimizers.py'),
|
||||
'tinytorch.core.optimizers.SGD.step': ( '06_optimizers/optimizers_dev.html#sgd.step',
|
||||
'tinytorch/core/optimizers.py')},
|
||||
'tinytorch.core.spatial': { 'tinytorch.core.spatial.AvgPool2d': ( '09_spatial/spatial_dev.html#avgpool2d',
|
||||
'tinytorch/core/spatial.py'),
|
||||
'tinytorch.core.spatial.AvgPool2d.__call__': ( '09_spatial/spatial_dev.html#avgpool2d.__call__',
|
||||
'tinytorch/core/spatial.py'),
|
||||
'tinytorch.core.spatial.AvgPool2d.__init__': ( '09_spatial/spatial_dev.html#avgpool2d.__init__',
|
||||
'tinytorch/core/spatial.py'),
|
||||
'tinytorch.core.spatial.AvgPool2d.forward': ( '09_spatial/spatial_dev.html#avgpool2d.forward',
|
||||
'tinytorch/core/spatial.py'),
|
||||
'tinytorch.core.spatial.AvgPool2d.parameters': ( '09_spatial/spatial_dev.html#avgpool2d.parameters',
|
||||
'tinytorch/core/spatial.py'),
|
||||
'tinytorch.core.spatial.Conv2d': ( '09_spatial/spatial_dev.html#conv2d',
|
||||
'tinytorch/core/spatial.py'),
|
||||
'tinytorch.core.spatial.Conv2d.__call__': ( '09_spatial/spatial_dev.html#conv2d.__call__',
|
||||
'tinytorch/core/spatial.py'),
|
||||
'tinytorch.core.spatial.Conv2d.__init__': ( '09_spatial/spatial_dev.html#conv2d.__init__',
|
||||
'tinytorch/core/spatial.py'),
|
||||
'tinytorch.core.spatial.Conv2d.forward': ( '09_spatial/spatial_dev.html#conv2d.forward',
|
||||
'tinytorch/core/spatial.py'),
|
||||
'tinytorch.core.spatial.Conv2d.parameters': ( '09_spatial/spatial_dev.html#conv2d.parameters',
|
||||
'tinytorch/core/spatial.py'),
|
||||
'tinytorch.core.spatial.MaxPool2d': ( '09_spatial/spatial_dev.html#maxpool2d',
|
||||
'tinytorch/core/spatial.py'),
|
||||
'tinytorch.core.spatial.MaxPool2d.__call__': ( '09_spatial/spatial_dev.html#maxpool2d.__call__',
|
||||
'tinytorch/core/spatial.py'),
|
||||
'tinytorch.core.spatial.MaxPool2d.__init__': ( '09_spatial/spatial_dev.html#maxpool2d.__init__',
|
||||
'tinytorch/core/spatial.py'),
|
||||
'tinytorch.core.spatial.MaxPool2d.forward': ( '09_spatial/spatial_dev.html#maxpool2d.forward',
|
||||
'tinytorch/core/spatial.py'),
|
||||
'tinytorch.core.spatial.MaxPool2d.parameters': ( '09_spatial/spatial_dev.html#maxpool2d.parameters',
|
||||
'tinytorch/core/spatial.py'),
|
||||
'tinytorch.core.spatial.SimpleCNN': ( '09_spatial/spatial_dev.html#simplecnn',
|
||||
'tinytorch/core/spatial.py'),
|
||||
'tinytorch.core.spatial.SimpleCNN.__call__': ( '09_spatial/spatial_dev.html#simplecnn.__call__',
|
||||
'tinytorch/core/spatial.py'),
|
||||
'tinytorch.core.spatial.SimpleCNN.__init__': ( '09_spatial/spatial_dev.html#simplecnn.__init__',
|
||||
'tinytorch/core/spatial.py'),
|
||||
'tinytorch.core.spatial.SimpleCNN.forward': ( '09_spatial/spatial_dev.html#simplecnn.forward',
|
||||
'tinytorch/core/spatial.py'),
|
||||
'tinytorch.core.spatial.SimpleCNN.parameters': ( '09_spatial/spatial_dev.html#simplecnn.parameters',
|
||||
'tinytorch/core/spatial.py'),
|
||||
'tinytorch.core.spatial.SimpleCNN.relu': ( '09_spatial/spatial_dev.html#simplecnn.relu',
|
||||
'tinytorch/core/spatial.py')},
|
||||
'tinytorch.core.tensor': { 'tinytorch.core.tensor.Tensor': ('01_tensor/tensor_dev.html#tensor', 'tinytorch/core/tensor.py'),
|
||||
'tinytorch.core.tensor.Tensor.__add__': ( '01_tensor/tensor_dev.html#tensor.__add__',
|
||||
'tinytorch/core/tensor.py'),
|
||||
|
||||
555
tinytorch/core/spatial.py
generated
Normal file
555
tinytorch/core/spatial.py
generated
Normal file
@@ -0,0 +1,555 @@
|
||||
# ╔═══════════════════════════════════════════════════════════════════════════════╗
|
||||
# ║ 🚨 CRITICAL WARNING 🚨 ║
|
||||
# ║ AUTOGENERATED! DO NOT EDIT! ║
|
||||
# ║ ║
|
||||
# ║ This file is AUTOMATICALLY GENERATED from source modules. ║
|
||||
# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║
|
||||
# ║ ║
|
||||
# ║ ✅ TO EDIT: modules/source/06_spatial/spatial_dev.py ║
|
||||
# ║ ✅ TO EXPORT: Run 'tito module complete <module_name>' ║
|
||||
# ║ ║
|
||||
# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║
|
||||
# ║ Editing it directly may break module functionality and training. ║
|
||||
# ║ ║
|
||||
# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║
|
||||
# ║ happens! The tinytorch/ directory is just the compiled output. ║
|
||||
# ╚═══════════════════════════════════════════════════════════════════════════════╝
|
||||
# %% auto 0
|
||||
__all__ = ['Conv2d', 'MaxPool2d', 'AvgPool2d', 'SimpleCNN']
|
||||
|
||||
# %% ../../modules/source/09_spatial/spatial_dev.ipynb 1
|
||||
import numpy as np
|
||||
|
||||
from .tensor import Tensor
|
||||
|
||||
# %% ../../modules/source/09_spatial/spatial_dev.ipynb 6
|
||||
class Conv2d:
|
||||
"""
|
||||
2D Convolution layer for spatial feature extraction.
|
||||
|
||||
Implements convolution with explicit loops to demonstrate
|
||||
computational complexity and memory access patterns.
|
||||
|
||||
Args:
|
||||
in_channels: Number of input channels
|
||||
out_channels: Number of output feature maps
|
||||
kernel_size: Size of convolution kernel (int or tuple)
|
||||
stride: Stride of convolution (default: 1)
|
||||
padding: Zero-padding added to input (default: 0)
|
||||
bias: Whether to add learnable bias (default: True)
|
||||
"""
|
||||
|
||||
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, bias=True):
|
||||
"""
|
||||
Initialize Conv2d layer with proper weight initialization.
|
||||
|
||||
TODO: Complete Conv2d initialization
|
||||
|
||||
APPROACH:
|
||||
1. Store hyperparameters (channels, kernel_size, stride, padding)
|
||||
2. Initialize weights using He initialization for ReLU compatibility
|
||||
3. Initialize bias (if enabled) to zeros
|
||||
4. Use proper shapes: weight (out_channels, in_channels, kernel_h, kernel_w)
|
||||
|
||||
WEIGHT INITIALIZATION:
|
||||
- He init: std = sqrt(2 / (in_channels * kernel_h * kernel_w))
|
||||
- This prevents vanishing/exploding gradients with ReLU
|
||||
|
||||
HINT: Convert kernel_size to tuple if it's an integer
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
### BEGIN SOLUTION
|
||||
self.in_channels = in_channels
|
||||
self.out_channels = out_channels
|
||||
|
||||
# Handle kernel_size as int or tuple
|
||||
if isinstance(kernel_size, int):
|
||||
self.kernel_size = (kernel_size, kernel_size)
|
||||
else:
|
||||
self.kernel_size = kernel_size
|
||||
|
||||
self.stride = stride
|
||||
self.padding = padding
|
||||
|
||||
# He initialization for ReLU networks
|
||||
kernel_h, kernel_w = self.kernel_size
|
||||
fan_in = in_channels * kernel_h * kernel_w
|
||||
std = np.sqrt(2.0 / fan_in)
|
||||
|
||||
# Weight shape: (out_channels, in_channels, kernel_h, kernel_w)
|
||||
self.weight = Tensor(np.random.normal(0, std,
|
||||
(out_channels, in_channels, kernel_h, kernel_w)))
|
||||
|
||||
# Bias initialization
|
||||
if bias:
|
||||
self.bias = Tensor(np.zeros(out_channels))
|
||||
else:
|
||||
self.bias = None
|
||||
### END SOLUTION
|
||||
|
||||
def forward(self, x):
|
||||
"""
|
||||
Forward pass through Conv2d layer.
|
||||
|
||||
TODO: Implement convolution with explicit loops
|
||||
|
||||
APPROACH:
|
||||
1. Extract input dimensions and validate
|
||||
2. Calculate output dimensions
|
||||
3. Apply padding if needed
|
||||
4. Implement 6 nested loops for full convolution
|
||||
5. Add bias if present
|
||||
|
||||
LOOP STRUCTURE:
|
||||
for batch in range(batch_size):
|
||||
for out_ch in range(out_channels):
|
||||
for out_h in range(out_height):
|
||||
for out_w in range(out_width):
|
||||
for k_h in range(kernel_height):
|
||||
for k_w in range(kernel_width):
|
||||
for in_ch in range(in_channels):
|
||||
# Accumulate: out += input * weight
|
||||
|
||||
EXAMPLE:
|
||||
>>> conv = Conv2d(3, 16, kernel_size=3, padding=1)
|
||||
>>> x = Tensor(np.random.randn(2, 3, 32, 32)) # batch=2, RGB, 32x32
|
||||
>>> out = conv(x)
|
||||
>>> print(out.shape) # Should be (2, 16, 32, 32)
|
||||
|
||||
HINTS:
|
||||
- Handle padding by creating padded input array
|
||||
- Watch array bounds in inner loops
|
||||
- Accumulate products for each output position
|
||||
"""
|
||||
### BEGIN SOLUTION
|
||||
# Input validation and shape extraction
|
||||
if len(x.shape) != 4:
|
||||
raise ValueError(f"Expected 4D input (batch, channels, height, width), got {x.shape}")
|
||||
|
||||
batch_size, in_channels, in_height, in_width = x.shape
|
||||
out_channels = self.out_channels
|
||||
kernel_h, kernel_w = self.kernel_size
|
||||
|
||||
# Calculate output dimensions
|
||||
out_height = (in_height + 2 * self.padding - kernel_h) // self.stride + 1
|
||||
out_width = (in_width + 2 * self.padding - kernel_w) // self.stride + 1
|
||||
|
||||
# Apply padding if needed
|
||||
if self.padding > 0:
|
||||
padded_input = np.pad(x.data,
|
||||
((0, 0), (0, 0), (self.padding, self.padding), (self.padding, self.padding)),
|
||||
mode='constant', constant_values=0)
|
||||
else:
|
||||
padded_input = x.data
|
||||
|
||||
# Initialize output
|
||||
output = np.zeros((batch_size, out_channels, out_height, out_width))
|
||||
|
||||
# Explicit 6-nested loop convolution to show complexity
|
||||
for b in range(batch_size):
|
||||
for out_ch in range(out_channels):
|
||||
for out_h in range(out_height):
|
||||
for out_w in range(out_width):
|
||||
# Calculate input region for this output position
|
||||
in_h_start = out_h * self.stride
|
||||
in_w_start = out_w * self.stride
|
||||
|
||||
# Accumulate convolution result
|
||||
conv_sum = 0.0
|
||||
for k_h in range(kernel_h):
|
||||
for k_w in range(kernel_w):
|
||||
for in_ch in range(in_channels):
|
||||
# Get input and weight values
|
||||
input_val = padded_input[b, in_ch,
|
||||
in_h_start + k_h,
|
||||
in_w_start + k_w]
|
||||
weight_val = self.weight.data[out_ch, in_ch, k_h, k_w]
|
||||
|
||||
# Accumulate
|
||||
conv_sum += input_val * weight_val
|
||||
|
||||
# Store result
|
||||
output[b, out_ch, out_h, out_w] = conv_sum
|
||||
|
||||
# Add bias if present
|
||||
if self.bias is not None:
|
||||
# Broadcast bias across spatial dimensions
|
||||
for out_ch in range(out_channels):
|
||||
output[:, out_ch, :, :] += self.bias.data[out_ch]
|
||||
|
||||
return Tensor(output)
|
||||
### END SOLUTION
|
||||
|
||||
def parameters(self):
|
||||
"""Return trainable parameters."""
|
||||
params = [self.weight]
|
||||
if self.bias is not None:
|
||||
params.append(self.bias)
|
||||
return params
|
||||
|
||||
def __call__(self, x):
|
||||
"""Enable model(x) syntax."""
|
||||
return self.forward(x)
|
||||
|
||||
# %% ../../modules/source/09_spatial/spatial_dev.ipynb 11
|
||||
class MaxPool2d:
|
||||
"""
|
||||
2D Max Pooling layer for spatial dimension reduction.
|
||||
|
||||
Applies maximum operation over spatial windows, preserving
|
||||
the strongest activations while reducing computational load.
|
||||
|
||||
Args:
|
||||
kernel_size: Size of pooling window (int or tuple)
|
||||
stride: Stride of pooling operation (default: same as kernel_size)
|
||||
padding: Zero-padding added to input (default: 0)
|
||||
"""
|
||||
|
||||
def __init__(self, kernel_size, stride=None, padding=0):
|
||||
"""
|
||||
Initialize MaxPool2d layer.
|
||||
|
||||
TODO: Store pooling parameters
|
||||
|
||||
APPROACH:
|
||||
1. Convert kernel_size to tuple if needed
|
||||
2. Set stride to kernel_size if not provided (non-overlapping)
|
||||
3. Store padding parameter
|
||||
|
||||
HINT: Default stride equals kernel_size for non-overlapping windows
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
### BEGIN SOLUTION
|
||||
# Handle kernel_size as int or tuple
|
||||
if isinstance(kernel_size, int):
|
||||
self.kernel_size = (kernel_size, kernel_size)
|
||||
else:
|
||||
self.kernel_size = kernel_size
|
||||
|
||||
# Default stride equals kernel_size (non-overlapping)
|
||||
if stride is None:
|
||||
self.stride = self.kernel_size[0]
|
||||
else:
|
||||
self.stride = stride
|
||||
|
||||
self.padding = padding
|
||||
### END SOLUTION
|
||||
|
||||
def forward(self, x):
|
||||
"""
|
||||
Forward pass through MaxPool2d layer.
|
||||
|
||||
TODO: Implement max pooling with explicit loops
|
||||
|
||||
APPROACH:
|
||||
1. Extract input dimensions
|
||||
2. Calculate output dimensions
|
||||
3. Apply padding if needed
|
||||
4. Implement nested loops for pooling windows
|
||||
5. Find maximum value in each window
|
||||
|
||||
LOOP STRUCTURE:
|
||||
for batch in range(batch_size):
|
||||
for channel in range(channels):
|
||||
for out_h in range(out_height):
|
||||
for out_w in range(out_width):
|
||||
# Find max in window [in_h:in_h+k_h, in_w:in_w+k_w]
|
||||
max_val = -infinity
|
||||
for k_h in range(kernel_height):
|
||||
for k_w in range(kernel_width):
|
||||
max_val = max(max_val, input[...])
|
||||
|
||||
EXAMPLE:
|
||||
>>> pool = MaxPool2d(kernel_size=2, stride=2)
|
||||
>>> x = Tensor(np.random.randn(1, 3, 8, 8))
|
||||
>>> out = pool(x)
|
||||
>>> print(out.shape) # Should be (1, 3, 4, 4)
|
||||
|
||||
HINTS:
|
||||
- Initialize max_val to negative infinity
|
||||
- Handle stride correctly when accessing input
|
||||
- No parameters to update (pooling has no weights)
|
||||
"""
|
||||
### BEGIN SOLUTION
|
||||
# Input validation and shape extraction
|
||||
if len(x.shape) != 4:
|
||||
raise ValueError(f"Expected 4D input (batch, channels, height, width), got {x.shape}")
|
||||
|
||||
batch_size, channels, in_height, in_width = x.shape
|
||||
kernel_h, kernel_w = self.kernel_size
|
||||
|
||||
# Calculate output dimensions
|
||||
out_height = (in_height + 2 * self.padding - kernel_h) // self.stride + 1
|
||||
out_width = (in_width + 2 * self.padding - kernel_w) // self.stride + 1
|
||||
|
||||
# Apply padding if needed
|
||||
if self.padding > 0:
|
||||
padded_input = np.pad(x.data,
|
||||
((0, 0), (0, 0), (self.padding, self.padding), (self.padding, self.padding)),
|
||||
mode='constant', constant_values=-np.inf)
|
||||
else:
|
||||
padded_input = x.data
|
||||
|
||||
# Initialize output
|
||||
output = np.zeros((batch_size, channels, out_height, out_width))
|
||||
|
||||
# Explicit nested loop max pooling
|
||||
for b in range(batch_size):
|
||||
for c in range(channels):
|
||||
for out_h in range(out_height):
|
||||
for out_w in range(out_width):
|
||||
# Calculate input region for this output position
|
||||
in_h_start = out_h * self.stride
|
||||
in_w_start = out_w * self.stride
|
||||
|
||||
# Find maximum in window
|
||||
max_val = -np.inf
|
||||
for k_h in range(kernel_h):
|
||||
for k_w in range(kernel_w):
|
||||
input_val = padded_input[b, c,
|
||||
in_h_start + k_h,
|
||||
in_w_start + k_w]
|
||||
max_val = max(max_val, input_val)
|
||||
|
||||
# Store result
|
||||
output[b, c, out_h, out_w] = max_val
|
||||
|
||||
return Tensor(output)
|
||||
### END SOLUTION
|
||||
|
||||
def parameters(self):
|
||||
"""Return empty list (pooling has no parameters)."""
|
||||
return []
|
||||
|
||||
def __call__(self, x):
|
||||
"""Enable model(x) syntax."""
|
||||
return self.forward(x)
|
||||
|
||||
# %% ../../modules/source/09_spatial/spatial_dev.ipynb 13
|
||||
class AvgPool2d:
|
||||
"""
|
||||
2D Average Pooling layer for spatial dimension reduction.
|
||||
|
||||
Applies average operation over spatial windows, smoothing
|
||||
features while reducing computational load.
|
||||
|
||||
Args:
|
||||
kernel_size: Size of pooling window (int or tuple)
|
||||
stride: Stride of pooling operation (default: same as kernel_size)
|
||||
padding: Zero-padding added to input (default: 0)
|
||||
"""
|
||||
|
||||
def __init__(self, kernel_size, stride=None, padding=0):
|
||||
"""
|
||||
Initialize AvgPool2d layer.
|
||||
|
||||
TODO: Store pooling parameters (same as MaxPool2d)
|
||||
|
||||
APPROACH:
|
||||
1. Convert kernel_size to tuple if needed
|
||||
2. Set stride to kernel_size if not provided
|
||||
3. Store padding parameter
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
### BEGIN SOLUTION
|
||||
# Handle kernel_size as int or tuple
|
||||
if isinstance(kernel_size, int):
|
||||
self.kernel_size = (kernel_size, kernel_size)
|
||||
else:
|
||||
self.kernel_size = kernel_size
|
||||
|
||||
# Default stride equals kernel_size (non-overlapping)
|
||||
if stride is None:
|
||||
self.stride = self.kernel_size[0]
|
||||
else:
|
||||
self.stride = stride
|
||||
|
||||
self.padding = padding
|
||||
### END SOLUTION
|
||||
|
||||
def forward(self, x):
|
||||
"""
|
||||
Forward pass through AvgPool2d layer.
|
||||
|
||||
TODO: Implement average pooling with explicit loops
|
||||
|
||||
APPROACH:
|
||||
1. Similar structure to MaxPool2d
|
||||
2. Instead of max, compute average of window
|
||||
3. Divide sum by window area for true average
|
||||
|
||||
LOOP STRUCTURE:
|
||||
for batch in range(batch_size):
|
||||
for channel in range(channels):
|
||||
for out_h in range(out_height):
|
||||
for out_w in range(out_width):
|
||||
# Compute average in window
|
||||
window_sum = 0
|
||||
for k_h in range(kernel_height):
|
||||
for k_w in range(kernel_width):
|
||||
window_sum += input[...]
|
||||
avg_val = window_sum / (kernel_height * kernel_width)
|
||||
|
||||
HINT: Remember to divide by window area to get true average
|
||||
"""
|
||||
### BEGIN SOLUTION
|
||||
# Input validation and shape extraction
|
||||
if len(x.shape) != 4:
|
||||
raise ValueError(f"Expected 4D input (batch, channels, height, width), got {x.shape}")
|
||||
|
||||
batch_size, channels, in_height, in_width = x.shape
|
||||
kernel_h, kernel_w = self.kernel_size
|
||||
|
||||
# Calculate output dimensions
|
||||
out_height = (in_height + 2 * self.padding - kernel_h) // self.stride + 1
|
||||
out_width = (in_width + 2 * self.padding - kernel_w) // self.stride + 1
|
||||
|
||||
# Apply padding if needed
|
||||
if self.padding > 0:
|
||||
padded_input = np.pad(x.data,
|
||||
((0, 0), (0, 0), (self.padding, self.padding), (self.padding, self.padding)),
|
||||
mode='constant', constant_values=0)
|
||||
else:
|
||||
padded_input = x.data
|
||||
|
||||
# Initialize output
|
||||
output = np.zeros((batch_size, channels, out_height, out_width))
|
||||
|
||||
# Explicit nested loop average pooling
|
||||
for b in range(batch_size):
|
||||
for c in range(channels):
|
||||
for out_h in range(out_height):
|
||||
for out_w in range(out_width):
|
||||
# Calculate input region for this output position
|
||||
in_h_start = out_h * self.stride
|
||||
in_w_start = out_w * self.stride
|
||||
|
||||
# Compute sum in window
|
||||
window_sum = 0.0
|
||||
for k_h in range(kernel_h):
|
||||
for k_w in range(kernel_w):
|
||||
input_val = padded_input[b, c,
|
||||
in_h_start + k_h,
|
||||
in_w_start + k_w]
|
||||
window_sum += input_val
|
||||
|
||||
# Compute average
|
||||
avg_val = window_sum / (kernel_h * kernel_w)
|
||||
|
||||
# Store result
|
||||
output[b, c, out_h, out_w] = avg_val
|
||||
|
||||
return Tensor(output)
|
||||
### END SOLUTION
|
||||
|
||||
def parameters(self):
|
||||
"""Return empty list (pooling has no parameters)."""
|
||||
return []
|
||||
|
||||
def __call__(self, x):
|
||||
"""Enable model(x) syntax."""
|
||||
return self.forward(x)
|
||||
|
||||
# %% ../../modules/source/09_spatial/spatial_dev.ipynb 21
|
||||
class SimpleCNN:
|
||||
"""
|
||||
Simple CNN demonstrating spatial operations integration.
|
||||
|
||||
Architecture:
|
||||
- Conv2d(3→16, 3×3) + ReLU + MaxPool(2×2)
|
||||
- Conv2d(16→32, 3×3) + ReLU + MaxPool(2×2)
|
||||
- Flatten + Linear(features→num_classes)
|
||||
"""
|
||||
|
||||
def __init__(self, num_classes=10):
|
||||
"""
|
||||
Initialize SimpleCNN.
|
||||
|
||||
TODO: Build CNN architecture with spatial and dense layers
|
||||
|
||||
APPROACH:
|
||||
1. Conv layer 1: 3 → 16 channels, 3×3 kernel, padding=1
|
||||
2. Pool layer 1: 2×2 max pooling
|
||||
3. Conv layer 2: 16 → 32 channels, 3×3 kernel, padding=1
|
||||
4. Pool layer 2: 2×2 max pooling
|
||||
5. Calculate flattened size and add final linear layer
|
||||
|
||||
HINT: For 32×32 input → 32→16→8→4 spatial reduction
|
||||
Final feature size: 32 channels × 4×4 = 512 features
|
||||
"""
|
||||
super().__init__()
|
||||
|
||||
### BEGIN SOLUTION
|
||||
# Convolutional layers
|
||||
self.conv1 = Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1)
|
||||
self.pool1 = MaxPool2d(kernel_size=2, stride=2)
|
||||
|
||||
self.conv2 = Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
|
||||
self.pool2 = MaxPool2d(kernel_size=2, stride=2)
|
||||
|
||||
# Calculate flattened size
|
||||
# Input: 32×32 → Conv1+Pool1: 16×16 → Conv2+Pool2: 8×8
|
||||
# Wait, let's recalculate: 32×32 → Pool1: 16×16 → Pool2: 8×8
|
||||
# Final: 32 channels × 8×8 = 2048 features
|
||||
self.flattened_size = 32 * 8 * 8
|
||||
|
||||
# Import Linear layer (we'll implement a simple version)
|
||||
# For now, we'll use a placeholder that we can replace
|
||||
# This represents the final classification layer
|
||||
self.num_classes = num_classes
|
||||
self.flattened_size = 32 * 8 * 8 # Will be used when we add Linear layer
|
||||
### END SOLUTION
|
||||
|
||||
def forward(self, x):
|
||||
"""
|
||||
Forward pass through SimpleCNN.
|
||||
|
||||
TODO: Implement CNN forward pass
|
||||
|
||||
APPROACH:
|
||||
1. Apply conv1 → ReLU → pool1
|
||||
2. Apply conv2 → ReLU → pool2
|
||||
3. Flatten spatial dimensions
|
||||
4. Apply final linear layer (when available)
|
||||
|
||||
For now, return features before final linear layer
|
||||
since we haven't imported Linear from layers module yet.
|
||||
"""
|
||||
### BEGIN SOLUTION
|
||||
# First conv block
|
||||
x = self.conv1(x)
|
||||
x = self.relu(x) # ReLU activation
|
||||
x = self.pool1(x)
|
||||
|
||||
# Second conv block
|
||||
x = self.conv2(x)
|
||||
x = self.relu(x) # ReLU activation
|
||||
x = self.pool2(x)
|
||||
|
||||
# Flatten for classification (reshape to 2D)
|
||||
batch_size = x.shape[0]
|
||||
x_flat = x.data.reshape(batch_size, -1)
|
||||
|
||||
# Return flattened features
|
||||
# In a complete implementation, this would go through a Linear layer
|
||||
return Tensor(x_flat)
|
||||
### END SOLUTION
|
||||
|
||||
def relu(self, x):
|
||||
"""Simple ReLU implementation for CNN."""
|
||||
return Tensor(np.maximum(0, x.data))
|
||||
|
||||
def parameters(self):
|
||||
"""Return all trainable parameters."""
|
||||
params = []
|
||||
params.extend(self.conv1.parameters())
|
||||
params.extend(self.conv2.parameters())
|
||||
# Linear layer parameters would be added here
|
||||
return params
|
||||
|
||||
def __call__(self, x):
|
||||
"""Enable model(x) syntax."""
|
||||
return self.forward(x)
|
||||
Reference in New Issue
Block a user