""" Checkpoint 5: Learning (After Module 6 - Spatial) Question: "Can I process spatial data like images with convolutional operations?" """ import numpy as np import pytest def test_checkpoint_05_learning(): """ Checkpoint 5: Learning Validates that students can apply spatial operations like convolution to process image-like data efficiently - the foundation of computer vision and spatial pattern recognition. """ print("\nšŸ‘ļø Checkpoint 5: Learning") print("=" * 50) try: from tinytorch.core.tensor import Tensor from tinytorch.core.spatial import Conv2D, MaxPool2D from tinytorch.core.activations import ReLU except ImportError as e: pytest.fail(f"āŒ Cannot import required classes - complete Modules 2-6 first: {e}") # Test 1: Basic convolution operation print("šŸ” Testing convolution operation...") conv = Conv2D(in_channels=1, out_channels=3, kernel_size=3) # Create a simple "image" (single channel, 5x5 pixels) image = Tensor(np.random.randn(1, 1, 5, 5)) # batch=1, channels=1, height=5, width=5 conv_output = conv(image) expected_shape = (1, 3, 3, 3) # Output size depends on kernel size and padding assert conv_output.shape == expected_shape, f"Convolution output should be {expected_shape}, got {conv_output.shape}" print(f"āœ… Convolution: {image.shape} → {conv_output.shape}") # Test 2: Pooling operation print("šŸ“‰ Testing pooling operation...") pool = MaxPool2D(kernel_size=2) # Create larger feature map for pooling feature_map = Tensor(np.random.randn(1, 3, 4, 4)) pooled = pool(feature_map) expected_pool_shape = (1, 3, 2, 2) # Pooling reduces spatial dimensions assert pooled.shape == expected_pool_shape, f"Pooling output should be {expected_pool_shape}, got {pooled.shape}" print(f"āœ… Pooling: {feature_map.shape} → {pooled.shape}") # Test 3: CNN building block (Conv + ReLU + Pool) print("šŸ—ļø Testing CNN building block...") relu = ReLU() # Simulate a small CNN layer input_image = Tensor(np.random.randn(2, 1, 8, 8)) # 2 images, 1 channel, 8x8 # CNN forward pass: Conv → ReLU → Pool conv_out = conv(input_image) activated = relu(conv_out) final_output = pool(activated) print(f"āœ… CNN block: {input_image.shape} → Conv → ReLU → Pool → {final_output.shape}") # Test 4: Multi-channel processing print("šŸŽØ Testing multi-channel processing...") # RGB image processing rgb_conv = Conv2D(in_channels=3, out_channels=16, kernel_size=3) rgb_image = Tensor(np.random.randn(1, 3, 32, 32)) # RGB image 32x32 rgb_features = rgb_conv(rgb_image) expected_rgb_shape = (1, 16, 30, 30) # 16 feature maps assert rgb_features.shape == expected_rgb_shape, f"RGB processing should output {expected_rgb_shape}, got {rgb_features.shape}" print(f"āœ… Multi-channel: {rgb_image.shape} → {rgb_features.shape}") # Test 5: Spatial hierarchy (multiple conv layers) print("šŸ”ļø Testing spatial hierarchy...") conv1 = Conv2D(in_channels=1, out_channels=8, kernel_size=3) conv2 = Conv2D(in_channels=8, out_channels=16, kernel_size=3) pool = MaxPool2D(kernel_size=2) # Build spatial hierarchy x = Tensor(np.random.randn(1, 1, 16, 16)) # Layer 1: Conv → ReLU → Pool h1 = relu(conv1(x)) p1 = pool(h1) # Layer 2: Conv → ReLU → Pool h2 = relu(conv2(p1)) p2 = pool(h2) print(f"āœ… Spatial hierarchy: {x.shape} → {h1.shape} → {p1.shape} → {h2.shape} → {p2.shape}") # Test 6: Feature map visualization concept print("šŸ–¼ļø Testing feature map properties...") # Test that convolution preserves important properties test_image = Tensor(np.ones((1, 1, 5, 5))) # All ones image test_conv = Conv2D(in_channels=1, out_channels=1, kernel_size=3) # Set simple kernel for predictable output test_conv.weight.data = np.ones((1, 1, 3, 3)) * 0.1 # Simple averaging kernel test_conv.bias.data = np.zeros((1,)) result = test_conv(test_image) # All outputs should be similar since input was uniform output_std = np.std(result.data) assert output_std < 0.1, f"Uniform input should produce low variance output, got std={output_std}" print(f"āœ… Feature extraction: uniform input → low variance output (std={output_std:.4f})") # Test 7: Edge detection demonstration print("šŸ” Testing edge detection capability...") # Create a simple edge pattern edge_image = Tensor(np.array([ [[[0, 0, 0, 1, 1], [0, 0, 0, 1, 1], [0, 0, 0, 1, 1], [0, 0, 0, 1, 1], [0, 0, 0, 1, 1]]]], dtype=np.float32)) # Simple edge detection kernel edge_conv = Conv2D(in_channels=1, out_channels=1, kernel_size=3) edge_conv.weight.data = np.array([[[[-1, 0, 1], [-1, 0, 1], [-1, 0, 1]]]], dtype=np.float32) edge_conv.bias.data = np.zeros((1,), dtype=np.float32) edge_response = edge_conv(edge_image) # Should detect the vertical edge assert edge_response.shape[2:] == (3, 3), f"Edge detection output should be 3x3, got {edge_response.shape[2:]}" print(f"āœ… Edge detection: {edge_image.shape} → detected features → {edge_response.shape}") print("\nšŸŽ‰ Learning Complete!") print("šŸ“ You can now process spatial data like images with convolutional operations") print("šŸ”§ Built capabilities: Convolution, pooling, CNN blocks, multi-channel processing") print("🧠 Breakthrough: You can now extract spatial features from images!") print("šŸŽÆ Next: Build attention mechanisms for sequence processing") if __name__ == "__main__": test_checkpoint_05_learning()