mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-03-22 09:49:40 -05:00
Test Fixes (External pytest tests - all passing): - Module 03: Reverted .weights for test helper classes - Module 08: Fixed DataLoader data format (tuple → list(zip())) - Module 10: Use CharTokenizer instead of abstract Tokenizer - Module 15: Fixed KVCache constructor args and seq_len - Module 19: Fixed Benchmark constructor args Tito CLI Improvements: - Added module name resolver: "15" → "15_quantization" - Added .ipynb file support in _get_dev_file_path() - Added notebook-to-Python conversion using jupytext - Inline tests now execute notebooks correctly Results: - External tests: 36/36 passing (100%) - Tito inline tests: 15/20 passing (75%) - Remaining failures are module code bugs, not test framework issues
1323 lines
56 KiB
Python
1323 lines
56 KiB
Python
"""
|
|
Module 06: Progressive Integration Tests
|
|
Tests that Module 06 (Spatial/CNN Operations) works correctly AND that the foundation stack (01→05) still works.
|
|
|
|
DEPENDENCY CHAIN: 01_setup → 02_tensor → 03_activations → 04_layers → 05_dense → 06_spatial
|
|
This is where we enable spatial processing for images and computer vision.
|
|
|
|
🎯 WHAT THIS TESTS:
|
|
- Module 06: Convolutional layers, pooling operations, spatial processing
|
|
- Integration: CNNs work with tensors, layers, and activations from previous modules
|
|
- Regression: Foundation stack (01→05) still works correctly
|
|
- Preparation: Ready for advanced architectures (attention, training, etc.)
|
|
|
|
💡 FOR STUDENTS: If tests fail, check:
|
|
1. Does your Conv2D class exist in tinytorch.core.spatial?
|
|
2. Does Conv2D inherit from Layer (Module 04)?
|
|
3. Do convolution operations work with Tensor objects?
|
|
4. Are spatial dimensions handled correctly?
|
|
|
|
🔧 DEBUGGING HELP:
|
|
- Conv2D input: (batch_size, channels, height, width)
|
|
- Conv2D output: (batch_size, out_channels, out_height, out_width)
|
|
- Pooling reduces spatial dimensions but preserves channels
|
|
"""
|
|
|
|
import numpy as np
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Add project root to path
|
|
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
|
|
|
|
|
class TestFoundationStackStillWorks:
|
|
"""
|
|
🔄 REGRESSION CHECK: Verify foundation stack (01→05) still works after spatial development.
|
|
|
|
💡 If these fail: You may have broken something in the foundation while working on CNN operations.
|
|
🔧 Fix: Check that your spatial code doesn't interfere with basic neural network functionality.
|
|
"""
|
|
|
|
def test_foundation_pipeline_stable(self):
|
|
"""
|
|
✅ TEST: Complete foundation pipeline (01→05) should still work
|
|
|
|
📋 FOUNDATION COMPONENTS:
|
|
- Setup environment working
|
|
- Tensor operations working
|
|
- Activation functions working
|
|
- Layer base class working
|
|
- Dense networks working
|
|
|
|
🚨 IF FAILS: Core foundation broken by spatial development
|
|
"""
|
|
try:
|
|
# Test foundation components still work
|
|
from tinytorch.core.tensor import Tensor
|
|
from tinytorch.core.layers import Linear
|
|
from tinytorch.core.activations import ReLU
|
|
|
|
# Create simple neural network
|
|
dense = Linear(10, 5)
|
|
relu = ReLU()
|
|
|
|
# Test forward pass
|
|
x = Tensor(np.random.randn(4, 10))
|
|
h = dense(x)
|
|
output = relu(h)
|
|
|
|
assert output.shape == (4, 5), \
|
|
f"❌ Foundation broken. Expected (4, 5), got {output.shape}"
|
|
|
|
assert np.all(output.data >= 0), \
|
|
"❌ ReLU not working in foundation"
|
|
|
|
except ImportError as e:
|
|
assert False, f"""
|
|
❌ FOUNDATION IMPORT BROKEN!
|
|
|
|
🔍 IMPORT ERROR: {str(e)}
|
|
|
|
🔧 HOW TO FIX:
|
|
1. Check all foundation modules are exported correctly
|
|
2. Run: tito module complete 02_tensor
|
|
3. Run: tito module complete 04_layers
|
|
4. Run: tito module complete 05_dense
|
|
5. Test imports individually:
|
|
from tinytorch.core.tensor import Tensor
|
|
from tinytorch.core.layers import Linear
|
|
from tinytorch.core.activations import ReLU
|
|
|
|
💡 FOUNDATION REQUIREMENTS:
|
|
- Tensor: Basic tensor operations
|
|
- Dense: Fully connected layers
|
|
- ReLU: Non-linear activations
|
|
- Layer: Base class for all layers
|
|
"""
|
|
except Exception as e:
|
|
assert False, f"""
|
|
❌ FOUNDATION FUNCTIONALITY BROKEN!
|
|
|
|
🔍 ERROR: {str(e)}
|
|
|
|
🔧 POSSIBLE CAUSES:
|
|
1. Dense layer forward pass broken
|
|
2. ReLU activation function broken
|
|
3. Tensor operations corrupted
|
|
4. Layer inheritance issues
|
|
|
|
💡 DEBUG STEPS:
|
|
1. Test each component separately
|
|
2. Check Dense layer: dense = Linear(5, 3); print(linear.weight.shape)
|
|
3. Check ReLU: relu = ReLU(); print(relu(Tensor([-1, 1])).data)
|
|
4. Run foundation tests: python tests/run_all_modules.py --module module_05
|
|
"""
|
|
|
|
def test_neural_network_capability_stable(self):
|
|
"""
|
|
✅ TEST: Can still build neural networks after adding spatial operations
|
|
|
|
📋 NEURAL NETWORK CAPABILITY:
|
|
- Multi-layer networks
|
|
- Non-linear problem solving
|
|
- Batch processing
|
|
- Parameter management
|
|
|
|
🎯 This ensures spatial additions don't break core ML functionality
|
|
"""
|
|
try:
|
|
from tinytorch.core.tensor import Tensor
|
|
from tinytorch.core.layers import Linear
|
|
from tinytorch.core.activations import ReLU, Sigmoid
|
|
|
|
# Build 3-layer network for XOR problem
|
|
layer1 = Linear(2, 4, bias=True)
|
|
layer2 = Linear(4, 1, bias=True)
|
|
relu = ReLU()
|
|
sigmoid = Sigmoid()
|
|
|
|
# XOR problem inputs
|
|
X = Tensor(np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32))
|
|
|
|
# Forward pass through complete network
|
|
h = relu(layer1(X)) # Non-linear hidden layer
|
|
logits = layer2(h) # Output layer
|
|
predictions = sigmoid(logits) # Probabilities
|
|
|
|
assert predictions.shape == (4, 1), \
|
|
f"❌ Neural network shape broken. Expected (4, 1), got {predictions.shape}"
|
|
|
|
assert np.all(predictions.data >= 0) and np.all(predictions.data <= 1), \
|
|
"❌ Neural network output not in valid range [0, 1]"
|
|
|
|
# Network should have capacity for XOR (non-linear problem)
|
|
param_count = layer1.weight.data.size + layer1.bias.data.size + \
|
|
layer2.weight.data.size + layer2.bias.data.size
|
|
|
|
assert param_count >= 9, \
|
|
f"❌ Network has insufficient parameters for XOR. Need ≥9, got {param_count}"
|
|
|
|
except Exception as e:
|
|
assert False, f"""
|
|
❌ NEURAL NETWORK CAPABILITY BROKEN!
|
|
|
|
🔍 ERROR: {str(e)}
|
|
|
|
🔧 NEURAL NETWORK REQUIREMENTS:
|
|
1. Dense layers must work correctly
|
|
2. Activations must chain properly
|
|
3. Multi-layer networks must function
|
|
4. Batch processing must work
|
|
5. Parameter storage must be intact
|
|
|
|
💡 XOR PROBLEM TEST:
|
|
This is a key capability test because XOR requires:
|
|
- Non-linear activation functions
|
|
- Multi-layer architecture
|
|
- Sufficient parameters
|
|
|
|
🧪 DEBUG CHECKLIST:
|
|
□ Dense layer creates correct weight/bias shapes?
|
|
□ ReLU applies element-wise to all inputs?
|
|
□ Sigmoid produces values in [0, 1] range?
|
|
□ Layer chaining preserves tensor operations?
|
|
"""
|
|
|
|
|
|
class TestModule06SpatialCore:
|
|
"""
|
|
🆕 NEW FUNCTIONALITY: Test Module 06 (Spatial/CNN) core implementation.
|
|
|
|
💡 What you're implementing: Convolutional and pooling operations for computer vision.
|
|
🎯 Goal: Enable processing of images and spatial data with CNNs.
|
|
"""
|
|
|
|
def test_conv2d_layer_exists(self):
|
|
"""
|
|
✅ TEST: Conv2D layer - Core of convolutional neural networks
|
|
|
|
📋 WHAT YOU NEED TO IMPLEMENT:
|
|
class Conv2D(Layer):
|
|
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
|
|
# Initialize convolutional weights and bias
|
|
def forward(self, x):
|
|
# Perform 2D convolution operation
|
|
|
|
🚨 IF FAILS: Conv2D layer doesn't exist or missing components
|
|
"""
|
|
try:
|
|
from tinytorch.core.spatial import Conv2D
|
|
from tinytorch.core.layers import Layer
|
|
|
|
# Conv2D should inherit from Layer
|
|
assert issubclass(Conv2D, Layer), \
|
|
"❌ Conv2D must inherit from Layer base class"
|
|
|
|
# Test Conv2D creation
|
|
conv = Conv2D(in_channels=3, out_channels=16, kernel_size=3)
|
|
|
|
# Should have convolutional parameters
|
|
assert hasattr(conv, 'weight') or hasattr(conv, 'kernel'), \
|
|
"❌ Conv2D missing convolution weights/kernel"
|
|
|
|
# Should be callable (inherits from Layer)
|
|
assert callable(conv), \
|
|
"❌ Conv2D should be callable (inherit __call__ from Layer)"
|
|
|
|
# Check parameter shapes (basic validation)
|
|
if hasattr(conv, 'weight'):
|
|
weights = conv.weights
|
|
expected_shape = (16, 3, 3, 3) # (out_channels, in_channels, kernel_h, kernel_w)
|
|
assert weights.shape == expected_shape, \
|
|
f"❌ Conv2D weights wrong shape. Expected {expected_shape}, got {weights.shape}"
|
|
|
|
except ImportError as e:
|
|
assert False, f"""
|
|
❌ CONV2D LAYER MISSING!
|
|
|
|
🔍 IMPORT ERROR: {str(e)}
|
|
|
|
🔧 HOW TO IMPLEMENT:
|
|
|
|
1. Create in modules/06_spatial/06_spatial.py:
|
|
|
|
from tinytorch.core.layers import Layer
|
|
from tinytorch.core.tensor import Tensor
|
|
import numpy as np
|
|
|
|
class Conv2D(Layer):
|
|
'''2D Convolutional layer for computer vision.'''
|
|
|
|
def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
|
|
self.in_channels = in_channels
|
|
self.out_channels = out_channels
|
|
self.kernel_size = kernel_size
|
|
self.stride = stride
|
|
self.padding = padding
|
|
|
|
# Initialize convolution weights
|
|
# Shape: (out_channels, in_channels, kernel_size, kernel_size)
|
|
self.weights = Tensor(np.random.randn(
|
|
out_channels, in_channels, kernel_size, kernel_size
|
|
) * 0.1)
|
|
|
|
# Initialize bias
|
|
self.bias = Tensor(np.random.randn(out_channels) * 0.1)
|
|
|
|
def forward(self, x):
|
|
# Implement 2D convolution
|
|
# Input: (batch_size, in_channels, height, width)
|
|
# Output: (batch_size, out_channels, out_height, out_width)
|
|
|
|
# For now, simplified implementation
|
|
batch_size, in_ch, height, width = x.shape
|
|
|
|
# Calculate output dimensions
|
|
out_height = (height + 2 * self.padding - self.kernel_size) // self.stride + 1
|
|
out_width = (width + 2 * self.padding - self.kernel_size) // self.stride + 1
|
|
|
|
# Placeholder implementation (you'll implement actual convolution)
|
|
output_shape = (batch_size, self.out_channels, out_height, out_width)
|
|
output_data = np.random.randn(*output_shape) # Replace with real convolution
|
|
|
|
return Tensor(output_data)
|
|
|
|
2. Export the module:
|
|
tito module complete 06_spatial
|
|
|
|
📚 CONVOLUTION CONCEPTS:
|
|
- Kernel/Filter: Small weight matrix that slides over input
|
|
- Stride: How much kernel moves each step
|
|
- Padding: Zero-padding around input edges
|
|
- Output size: (input + 2*padding - kernel) / stride + 1
|
|
"""
|
|
except Exception as e:
|
|
assert False, f"""
|
|
❌ CONV2D LAYER BROKEN!
|
|
|
|
🔍 ERROR: {str(e)}
|
|
|
|
🔧 CONV2D REQUIREMENTS:
|
|
1. Must inherit from Layer base class
|
|
2. Must have __init__ with (in_channels, out_channels, kernel_size)
|
|
3. Must have weights with shape (out_ch, in_ch, k_size, k_size)
|
|
4. Must have forward() method
|
|
5. Must be callable via Layer.__call__()
|
|
|
|
💡 COMPUTER VISION FOUNDATION:
|
|
Conv2D is the core building block for:
|
|
- Image classification (ResNet, VGG)
|
|
- Object detection (YOLO, R-CNN)
|
|
- Image generation (GANs, VAEs)
|
|
- Medical imaging, autonomous driving, etc.
|
|
"""
|
|
|
|
def test_pooling_operations(self):
|
|
"""
|
|
✅ TEST: Pooling operations - Reduce spatial dimensions in CNNs
|
|
|
|
📋 POOLING TYPES:
|
|
- MaxPool2D: Take maximum value in each region
|
|
- AvgPool2D: Take average value in each region
|
|
- Used to reduce overfitting and computational cost
|
|
|
|
🎯 Essential for efficient CNN architectures
|
|
"""
|
|
try:
|
|
from tinytorch.core.spatial import MaxPool2D
|
|
from tinytorch.core.tensor import Tensor
|
|
|
|
# Test MaxPool2D creation
|
|
pool = MaxPool2D(kernel_size=2, stride=2)
|
|
|
|
# Test pooling operation
|
|
# Input: 4x4 image, pooling 2x2 -> 2x2 output
|
|
x = Tensor(np.array([[[[1, 2, 3, 4],
|
|
[5, 6, 7, 8],
|
|
[9, 10, 11, 12],
|
|
[13, 14, 15, 16]]]], dtype=np.float32)) # (1, 1, 4, 4)
|
|
|
|
output = pool(x)
|
|
|
|
# MaxPool 2x2 should take max of each 2x2 region
|
|
expected_shape = (1, 1, 2, 2)
|
|
assert output.shape == expected_shape, \
|
|
f"❌ MaxPool output shape wrong. Expected {expected_shape}, got {output.shape}"
|
|
|
|
# Check values (max of each 2x2 region)
|
|
expected_values = np.array([[[[6, 8], [14, 16]]]]) # Max of each 2x2 block
|
|
assert np.array_equal(output.data, expected_values), \
|
|
f"❌ MaxPool values wrong. Expected {expected_values}, got {output.data}"
|
|
|
|
except ImportError as e:
|
|
assert False, f"""
|
|
❌ POOLING OPERATIONS MISSING!
|
|
|
|
🔍 IMPORT ERROR: {str(e)}
|
|
|
|
🔧 HOW TO IMPLEMENT MaxPool2D:
|
|
|
|
class MaxPool2D:
|
|
'''2D Max pooling for downsampling spatial dimensions.'''
|
|
|
|
def __init__(self, kernel_size, stride=None):
|
|
self.kernel_size = kernel_size
|
|
self.stride = stride if stride is not None else kernel_size
|
|
|
|
def __call__(self, x):
|
|
# Input: (batch_size, channels, height, width)
|
|
batch_size, channels, height, width = x.shape
|
|
|
|
# Calculate output dimensions
|
|
out_height = height // self.stride
|
|
out_width = width // self.stride
|
|
|
|
# Perform max pooling (simplified implementation)
|
|
output = np.zeros((batch_size, channels, out_height, out_width))
|
|
|
|
for b in range(batch_size):
|
|
for c in range(channels):
|
|
for h in range(out_height):
|
|
for w in range(out_width):
|
|
h_start = h * self.stride
|
|
w_start = w * self.stride
|
|
h_end = h_start + self.kernel_size
|
|
w_end = w_start + self.kernel_size
|
|
|
|
# Take maximum in this region
|
|
region = x.data[b, c, h_start:h_end, w_start:w_end]
|
|
output[b, c, h, w] = np.max(region)
|
|
|
|
return Tensor(output)
|
|
|
|
💡 POOLING PURPOSE:
|
|
- Reduces spatial dimensions (4x4 -> 2x2)
|
|
- Reduces parameters and computation
|
|
- Provides translation invariance
|
|
- Prevents overfitting
|
|
"""
|
|
except Exception as e:
|
|
assert False, f"""
|
|
❌ POOLING OPERATIONS BROKEN!
|
|
|
|
🔍 ERROR: {str(e)}
|
|
|
|
🔧 POOLING REQUIREMENTS:
|
|
1. MaxPool2D takes kernel_size and stride parameters
|
|
2. Input shape: (batch, channels, height, width)
|
|
3. Output shape: (batch, channels, out_height, out_width)
|
|
4. Operation: take max value in each kernel_size x kernel_size region
|
|
5. Stride determines how much to move kernel each step
|
|
|
|
🧪 DEBUG TEST:
|
|
x = Tensor(np.arange(16).reshape(1, 1, 4, 4)) # 0-15 in 4x4
|
|
pool = MaxPool2D(kernel_size=2)
|
|
y = pool(x)
|
|
print(f"Input: {{x.data}}")
|
|
print(f"Output: {{y.data}}") # Should be max of each 2x2 region
|
|
"""
|
|
|
|
def test_spatial_tensor_operations(self):
|
|
"""
|
|
✅ TEST: Spatial operations work correctly with 4D tensors
|
|
|
|
📋 4D TENSOR FORMAT:
|
|
- Dimension 0: Batch size (number of images)
|
|
- Dimension 1: Channels (RGB = 3, grayscale = 1)
|
|
- Dimension 2: Height (image height in pixels)
|
|
- Dimension 3: Width (image width in pixels)
|
|
|
|
💡 This is the standard format for computer vision
|
|
"""
|
|
try:
|
|
from tinytorch.core.tensor import Tensor
|
|
from tinytorch.core.spatial import Conv2D
|
|
|
|
# Test 4D tensor creation and manipulation
|
|
batch_size, channels, height, width = 2, 3, 32, 32
|
|
|
|
# Create batch of RGB images
|
|
images = Tensor(np.random.randn(batch_size, channels, height, width))
|
|
|
|
assert images.shape == (2, 3, 32, 32), \
|
|
f"❌ 4D tensor creation broken. Expected (2, 3, 32, 32), got {images.shape}"
|
|
|
|
# Test convolution with 4D tensors
|
|
conv = Conv2D(in_channels=3, out_channels=16, kernel_size=5, padding=2)
|
|
conv_output = conv(images)
|
|
|
|
# With padding=2 and kernel_size=5, spatial dimensions should be preserved
|
|
expected_shape = (2, 16, 32, 32)
|
|
assert conv_output.shape == expected_shape, \
|
|
f"❌ Conv2D with 4D tensors broken. Expected {expected_shape}, got {conv_output.shape}"
|
|
|
|
# Test different spatial sizes
|
|
small_images = Tensor(np.random.randn(1, 1, 8, 8))
|
|
small_conv = Conv2D(in_channels=1, out_channels=4, kernel_size=3)
|
|
small_output = small_conv(small_images)
|
|
|
|
# 8x8 input with 3x3 kernel -> 6x6 output
|
|
expected_small_shape = (1, 4, 6, 6)
|
|
assert small_output.shape == expected_small_shape, \
|
|
f"❌ Small Conv2D broken. Expected {expected_small_shape}, got {small_output.shape}"
|
|
|
|
except Exception as e:
|
|
assert False, f"""
|
|
❌ SPATIAL TENSOR OPERATIONS BROKEN!
|
|
|
|
🔍 ERROR: {str(e)}
|
|
|
|
🔧 4D TENSOR REQUIREMENTS:
|
|
1. Support (batch, channels, height, width) format
|
|
2. Convolution preserves batch and channel semantics
|
|
3. Spatial dimensions computed correctly:
|
|
output_size = (input_size + 2*padding - kernel_size) / stride + 1
|
|
4. Handle different input sizes correctly
|
|
|
|
💡 COMPUTER VISION TENSOR FORMAT:
|
|
- MNIST: (batch, 1, 28, 28) - grayscale 28x28 images
|
|
- CIFAR-10: (batch, 3, 32, 32) - RGB 32x32 images
|
|
- ImageNet: (batch, 3, 224, 224) - RGB 224x224 images
|
|
|
|
🧪 DEBUG SPATIAL DIMENSIONS:
|
|
Input: H_in = 32, W_in = 32
|
|
Kernel: K = 5, Padding: P = 2, Stride: S = 1
|
|
Output: H_out = (32 + 2*2 - 5) / 1 + 1 = 32
|
|
|
|
Test this calculation in your implementation!
|
|
"""
|
|
|
|
|
|
class TestSpatialIntegration:
|
|
"""
|
|
🔗 INTEGRATION TEST: Spatial operations + Foundation stack working together.
|
|
|
|
💡 Test that CNNs can be built using the complete progressive stack.
|
|
🎯 Goal: Build convolutional neural networks for computer vision.
|
|
"""
|
|
|
|
def test_cnn_architecture_building(self):
|
|
"""
|
|
✅ TEST: Can build complete CNN architectures
|
|
|
|
📋 CNN ARCHITECTURE:
|
|
input -> conv -> relu -> pool -> conv -> relu -> pool -> dense -> output
|
|
|
|
💡 This is the foundation for all computer vision models
|
|
"""
|
|
try:
|
|
from tinytorch.core.tensor import Tensor
|
|
from tinytorch.core.spatial import Conv2D, MaxPool2D
|
|
from tinytorch.core.layers import Linear
|
|
from tinytorch.core.activations import ReLU, Softmax
|
|
|
|
# Build mini CNN for CIFAR-10 style classification
|
|
# Input: 32x32 RGB images, Output: 10 classes
|
|
|
|
# Convolutional layers
|
|
conv1 = Conv2D(in_channels=3, out_channels=16, kernel_size=3, padding=1) # 32x32 -> 32x32
|
|
pool1 = MaxPool2D(kernel_size=2, stride=2) # 32x32 -> 16x16
|
|
conv2 = Conv2D(in_channels=16, out_channels=32, kernel_size=3, padding=1) # 16x16 -> 16x16
|
|
pool2 = MaxPool2D(kernel_size=2, stride=2) # 16x16 -> 8x8
|
|
|
|
# Dense layers (after flattening)
|
|
# 32 channels * 8 * 8 = 2048 features
|
|
fc1 = Linear(32 * 8 * 8, 128)
|
|
fc2 = Linear(128, 10)
|
|
|
|
# Activations
|
|
relu = ReLU()
|
|
softmax = Softmax()
|
|
|
|
# Test forward pass through complete CNN
|
|
batch_size = 4
|
|
x = Tensor(np.random.randn(batch_size, 3, 32, 32)) # Batch of CIFAR-10 images
|
|
|
|
# Convolutional feature extraction
|
|
h1 = relu(conv1(x)) # (4, 16, 32, 32)
|
|
h1_pool = pool1(h1) # (4, 16, 16, 16)
|
|
h2 = relu(conv2(h1_pool)) # (4, 32, 16, 16)
|
|
h2_pool = pool2(h2) # (4, 32, 8, 8)
|
|
|
|
# Flatten for dense layers
|
|
flattened = Tensor(h2_pool.data.reshape(batch_size, -1)) # (4, 2048)
|
|
|
|
# Classification layers
|
|
h3 = relu(fc1(flattened)) # (4, 128)
|
|
logits = fc2(h3) # (4, 10)
|
|
output = softmax(logits) # (4, 10)
|
|
|
|
# Verify complete CNN pipeline
|
|
assert output.shape == (4, 10), \
|
|
f"❌ CNN output shape wrong. Expected (4, 10), got {output.shape}"
|
|
|
|
# Verify softmax probabilities
|
|
prob_sums = np.sum(output.data, axis=1)
|
|
assert np.allclose(prob_sums, 1.0), \
|
|
f"❌ CNN softmax broken. Probabilities don't sum to 1: {prob_sums}"
|
|
|
|
# Verify feature extraction pipeline
|
|
assert h1.shape == (4, 16, 32, 32), "❌ Conv1 output shape wrong"
|
|
assert h1_pool.shape == (4, 16, 16, 16), "❌ Pool1 output shape wrong"
|
|
assert h2.shape == (4, 32, 16, 16), "❌ Conv2 output shape wrong"
|
|
assert h2_pool.shape == (4, 32, 8, 8), "❌ Pool2 output shape wrong"
|
|
|
|
except Exception as e:
|
|
assert False, f"""
|
|
❌ CNN ARCHITECTURE BUILDING BROKEN!
|
|
|
|
🔍 ERROR: {str(e)}
|
|
|
|
🔧 CNN PIPELINE REQUIREMENTS:
|
|
1. ✅ Spatial operations (Conv2D, MaxPool2D)
|
|
2. ✅ Foundation operations (Dense, ReLU, Softmax)
|
|
3. ✅ 4D tensor handling throughout
|
|
4. ✅ Shape preservation and transformation
|
|
5. ✅ Integration between spatial and dense layers
|
|
|
|
💡 CNN ARCHITECTURE PATTERN:
|
|
[Input Images]
|
|
↓
|
|
[Conv2D + ReLU] → Extract spatial features
|
|
↓
|
|
[MaxPool2D] → Reduce spatial dimensions
|
|
↓
|
|
[Conv2D + ReLU] → Extract higher-level features
|
|
↓
|
|
[MaxPool2D] → Further dimension reduction
|
|
↓
|
|
[Flatten] → Convert to 1D for dense layers
|
|
↓
|
|
[Dense + ReLU] → Classification features
|
|
↓
|
|
[Dense + Softmax] → Class probabilities
|
|
|
|
🧪 DEBUG CNN SHAPES:
|
|
Input: (batch=4, channels=3, height=32, width=32)
|
|
Conv1: (4, 16, 32, 32) - 16 feature maps
|
|
Pool1: (4, 16, 16, 16) - halved spatial size
|
|
Conv2: (4, 32, 16, 16) - 32 feature maps
|
|
Pool2: (4, 32, 8, 8) - halved again
|
|
Flatten: (4, 2048) - 32*8*8 = 2048 features
|
|
Dense: (4, 10) - 10 class scores
|
|
"""
|
|
|
|
def test_image_processing_pipeline(self):
|
|
"""
|
|
✅ TEST: Complete image processing pipeline
|
|
|
|
📋 IMAGE PROCESSING:
|
|
- Load and preprocess images
|
|
- Extract features with CNNs
|
|
- Make predictions
|
|
- Handle different image sizes
|
|
|
|
🎯 Real-world computer vision workflow
|
|
"""
|
|
try:
|
|
from tinytorch.core.tensor import Tensor
|
|
from tinytorch.core.spatial import Conv2D, MaxPool2D
|
|
from tinytorch.core.activations import ReLU
|
|
|
|
# Simulate different image processing scenarios
|
|
|
|
# Scenario 1: MNIST-style grayscale images
|
|
mnist_images = Tensor(np.random.randn(8, 1, 28, 28)) # 8 images, 1 channel, 28x28
|
|
mnist_conv = Conv2D(in_channels=1, out_channels=8, kernel_size=5)
|
|
mnist_features = mnist_conv(mnist_images)
|
|
|
|
expected_mnist_shape = (8, 8, 24, 24) # 28-5+1 = 24
|
|
assert mnist_features.shape == expected_mnist_shape, \
|
|
f"❌ MNIST processing broken. Expected {expected_mnist_shape}, got {mnist_features.shape}"
|
|
|
|
# Scenario 2: CIFAR-10 style RGB images
|
|
cifar_images = Tensor(np.random.randn(16, 3, 32, 32)) # 16 images, 3 channels, 32x32
|
|
cifar_conv = Conv2D(in_channels=3, out_channels=64, kernel_size=3, padding=1)
|
|
cifar_pool = MaxPool2D(kernel_size=2)
|
|
|
|
cifar_features = cifar_conv(cifar_images)
|
|
cifar_pooled = cifar_pool(cifar_features)
|
|
|
|
assert cifar_features.shape == (16, 64, 32, 32), "❌ CIFAR conv broken"
|
|
assert cifar_pooled.shape == (16, 64, 16, 16), "❌ CIFAR pooling broken"
|
|
|
|
# Scenario 3: Multi-scale feature extraction
|
|
relu = ReLU()
|
|
|
|
# Small features (fine details)
|
|
small_conv = Conv2D(in_channels=3, out_channels=32, kernel_size=3)
|
|
small_features = relu(small_conv(cifar_images))
|
|
|
|
# Large features (global patterns)
|
|
large_conv = Conv2D(in_channels=3, out_channels=32, kernel_size=7)
|
|
large_features = relu(large_conv(cifar_images))
|
|
|
|
# Both should extract meaningful features
|
|
assert small_features.shape[1] == 32, "❌ Small feature extraction broken"
|
|
assert large_features.shape[1] == 32, "❌ Large feature extraction broken"
|
|
assert np.all(small_features.data >= 0), "❌ Small features ReLU broken"
|
|
assert np.all(large_features.data >= 0), "❌ Large features ReLU broken"
|
|
|
|
except Exception as e:
|
|
assert False, f"""
|
|
❌ IMAGE PROCESSING PIPELINE BROKEN!
|
|
|
|
🔍 ERROR: {str(e)}
|
|
|
|
🔧 IMAGE PROCESSING REQUIREMENTS:
|
|
1. Handle different image formats (grayscale, RGB)
|
|
2. Support various image sizes (28x28, 32x32, etc.)
|
|
3. Extract features at different scales
|
|
4. Maintain spatial relationships
|
|
5. Work with batches of images
|
|
|
|
💡 REAL-WORLD APPLICATIONS:
|
|
- Medical imaging: X-rays, MRIs, CT scans
|
|
- Autonomous driving: Camera feeds, object detection
|
|
- Security: Face recognition, surveillance
|
|
- Entertainment: Photo filters, style transfer
|
|
- Science: Satellite imagery, microscopy
|
|
|
|
🧪 IMAGE PROCESSING CHECKLIST:
|
|
□ MNIST (28x28 grayscale): Medical imaging, digit recognition
|
|
□ CIFAR-10 (32x32 RGB): Object classification
|
|
□ ImageNet (224x224 RGB): General computer vision
|
|
□ Multi-scale features: Fine details + global patterns
|
|
"""
|
|
|
|
def test_cnn_spatial_hierarchies(self):
|
|
"""
|
|
✅ TEST: CNNs build spatial feature hierarchies
|
|
|
|
📋 FEATURE HIERARCHIES:
|
|
- Early layers: Edges, corners, simple patterns
|
|
- Middle layers: Shapes, textures, objects parts
|
|
- Late layers: Complete objects, complex patterns
|
|
|
|
💡 This is why CNNs work so well for computer vision
|
|
"""
|
|
try:
|
|
from tinytorch.core.tensor import Tensor
|
|
from tinytorch.core.spatial import Conv2D, MaxPool2D
|
|
from tinytorch.core.activations import ReLU
|
|
|
|
# Build hierarchical CNN feature extractor
|
|
relu = ReLU()
|
|
|
|
# Layer 1: Low-level features (edges, corners)
|
|
conv1 = Conv2D(in_channels=3, out_channels=16, kernel_size=3, padding=1)
|
|
pool1 = MaxPool2D(kernel_size=2)
|
|
|
|
# Layer 2: Mid-level features (shapes, textures)
|
|
conv2 = Conv2D(in_channels=16, out_channels=32, kernel_size=3, padding=1)
|
|
pool2 = MaxPool2D(kernel_size=2)
|
|
|
|
# Layer 3: High-level features (object parts)
|
|
conv3 = Conv2D(in_channels=32, out_channels=64, kernel_size=3, padding=1)
|
|
pool3 = MaxPool2D(kernel_size=2)
|
|
|
|
# Test feature hierarchy with realistic image
|
|
x = Tensor(np.random.randn(1, 3, 64, 64)) # Single 64x64 RGB image
|
|
|
|
# Extract features at each level
|
|
# Level 1: 64x64 -> 32x32 (low-level features)
|
|
features_1 = relu(conv1(x)) # (1, 16, 64, 64)
|
|
pooled_1 = pool1(features_1) # (1, 16, 32, 32)
|
|
|
|
# Level 2: 32x32 -> 16x16 (mid-level features)
|
|
features_2 = relu(conv2(pooled_1)) # (1, 32, 32, 32)
|
|
pooled_2 = pool2(features_2) # (1, 32, 16, 16)
|
|
|
|
# Level 3: 16x16 -> 8x8 (high-level features)
|
|
features_3 = relu(conv3(pooled_2)) # (1, 64, 16, 16)
|
|
pooled_3 = pool3(features_3) # (1, 64, 8, 8)
|
|
|
|
# Verify hierarchical feature extraction
|
|
assert features_1.shape == (1, 16, 64, 64), "❌ Level 1 features broken"
|
|
assert pooled_1.shape == (1, 16, 32, 32), "❌ Level 1 pooling broken"
|
|
assert features_2.shape == (1, 32, 32, 32), "❌ Level 2 features broken"
|
|
assert pooled_2.shape == (1, 32, 16, 16), "❌ Level 2 pooling broken"
|
|
assert features_3.shape == (1, 64, 16, 16), "❌ Level 3 features broken"
|
|
assert pooled_3.shape == (1, 64, 8, 8), "❌ Level 3 pooling broken"
|
|
|
|
# Verify feature complexity increases (more channels, smaller spatial)
|
|
channel_progression = [16, 32, 64]
|
|
spatial_progression = [(32, 32), (16, 16), (8, 8)]
|
|
|
|
for i, (channels, spatial) in enumerate(zip(channel_progression, spatial_progression)):
|
|
level = i + 1
|
|
assert channels > (8 if i == 0 else channel_progression[i-1]), \
|
|
f"❌ Level {level}: Feature complexity not increasing"
|
|
|
|
h, w = spatial
|
|
assert h < (64 if i == 0 else spatial_progression[i-1][0]), \
|
|
f"❌ Level {level}: Spatial size not decreasing"
|
|
|
|
except Exception as e:
|
|
assert False, f"""
|
|
❌ CNN SPATIAL HIERARCHIES BROKEN!
|
|
|
|
🔍 ERROR: {str(e)}
|
|
|
|
🔧 HIERARCHICAL CNN REQUIREMENTS:
|
|
1. Early layers extract simple features (edges, corners)
|
|
2. Later layers extract complex features (objects, patterns)
|
|
3. Spatial resolution decreases through network
|
|
4. Feature complexity (channels) increases through network
|
|
5. Each level builds on previous level features
|
|
|
|
💡 CNN FEATURE HIERARCHY:
|
|
|
|
Level 1 (64x64 → 32x32):
|
|
- 16 channels detect edges, corners, simple patterns
|
|
- High spatial resolution preserves fine details
|
|
|
|
Level 2 (32x32 → 16x16):
|
|
- 32 channels detect shapes, textures, object parts
|
|
- Medium spatial resolution focuses on local patterns
|
|
|
|
Level 3 (16x16 → 8x8):
|
|
- 64 channels detect complete objects, complex patterns
|
|
- Low spatial resolution captures global structure
|
|
|
|
🧠 WHY THIS WORKS:
|
|
This mimics the human visual system:
|
|
- Retina → edges and motion
|
|
- V1 → oriented edges and bars
|
|
- V2 → shapes and textures
|
|
- V4 → objects and faces
|
|
"""
|
|
|
|
|
|
class TestComputerVisionCapabilities:
|
|
"""
|
|
🖼️ COMPUTER VISION CAPABILITIES: Test real-world CV applications.
|
|
|
|
💡 Verify the spatial foundation enables actual computer vision tasks.
|
|
🎯 Goal: Show students can now build real CV systems.
|
|
"""
|
|
|
|
def test_image_classification_capability(self):
|
|
"""
|
|
✅ TEST: Can build image classification systems
|
|
|
|
📋 IMAGE CLASSIFICATION:
|
|
- Input: Images
|
|
- Output: Class probabilities
|
|
- Applications: Medical diagnosis, quality control, content moderation
|
|
|
|
💡 This is the "Hello World" of computer vision
|
|
"""
|
|
try:
|
|
from tinytorch.core.tensor import Tensor
|
|
from tinytorch.core.spatial import Conv2D, MaxPool2D
|
|
from tinytorch.core.layers import Linear
|
|
from tinytorch.core.activations import ReLU, Softmax
|
|
|
|
# Build classifier for 10 classes (CIFAR-10 style)
|
|
class ImageClassifier:
|
|
def __init__(self, num_classes=10):
|
|
# Feature extraction (convolutional layers)
|
|
self.conv1 = Conv2D(3, 32, kernel_size=3, padding=1)
|
|
self.pool1 = MaxPool2D(kernel_size=2)
|
|
self.conv2 = Conv2D(32, 64, kernel_size=3, padding=1)
|
|
self.pool2 = MaxPool2D(kernel_size=2)
|
|
|
|
# Classification (dense layers)
|
|
self.fc1 = Linear(64 * 8 * 8, 128) # Assuming 32x32 input
|
|
self.fc2 = Linear(128, num_classes)
|
|
|
|
# Activations
|
|
self.relu = ReLU()
|
|
self.softmax = Softmax()
|
|
|
|
def __call__(self, x):
|
|
# Feature extraction
|
|
h1 = self.relu(self.conv1(x)) # Extract low-level features
|
|
h1_pool = self.pool1(h1) # Downsample
|
|
h2 = self.relu(self.conv2(h1_pool)) # Extract high-level features
|
|
h2_pool = self.pool2(h2) # Downsample
|
|
|
|
# Flatten for classification
|
|
batch_size = h2_pool.shape[0]
|
|
flattened = Tensor(h2_pool.data.reshape(batch_size, -1))
|
|
|
|
# Classification
|
|
h3 = self.relu(self.fc1(flattened))
|
|
logits = self.fc2(h3)
|
|
probabilities = self.softmax(logits)
|
|
|
|
return probabilities
|
|
|
|
# Test image classifier
|
|
classifier = ImageClassifier(num_classes=10)
|
|
|
|
# Batch of test images
|
|
test_images = Tensor(np.random.randn(5, 3, 32, 32))
|
|
predictions = classifier(test_images)
|
|
|
|
# Verify classifier output
|
|
assert predictions.shape == (5, 10), \
|
|
f"❌ Classifier shape wrong. Expected (5, 10), got {predictions.shape}"
|
|
|
|
# Verify probabilities sum to 1
|
|
prob_sums = np.sum(predictions.data, axis=1)
|
|
assert np.allclose(prob_sums, 1.0, atol=1e-6), \
|
|
f"❌ Classifier probabilities don't sum to 1: {prob_sums}"
|
|
|
|
# Verify probabilities in valid range
|
|
assert np.all(predictions.data >= 0) and np.all(predictions.data <= 1), \
|
|
"❌ Classifier probabilities not in [0, 1] range"
|
|
|
|
# Test prediction extraction (most likely class)
|
|
predicted_classes = np.argmax(predictions.data, axis=1)
|
|
assert len(predicted_classes) == 5, "❌ Prediction extraction broken"
|
|
assert all(0 <= cls < 10 for cls in predicted_classes), \
|
|
"❌ Predicted classes out of range"
|
|
|
|
except Exception as e:
|
|
assert False, f"""
|
|
❌ IMAGE CLASSIFICATION CAPABILITY BROKEN!
|
|
|
|
🔍 ERROR: {str(e)}
|
|
|
|
🔧 IMAGE CLASSIFICATION REQUIREMENTS:
|
|
1. CNN feature extraction (Conv2D + pooling)
|
|
2. Dense classification layers
|
|
3. Softmax probability output
|
|
4. Batch processing support
|
|
5. End-to-end differentiable pipeline
|
|
|
|
💡 REAL-WORLD APPLICATIONS:
|
|
|
|
🏥 Medical Imaging:
|
|
- X-ray diagnosis (pneumonia detection)
|
|
- Skin cancer classification
|
|
- Retinal disease detection
|
|
|
|
🚗 Autonomous Vehicles:
|
|
- Traffic sign recognition
|
|
- Pedestrian detection
|
|
- Lane boundary detection
|
|
|
|
🏭 Quality Control:
|
|
- Defect detection in manufacturing
|
|
- Food quality assessment
|
|
- Product sorting and grading
|
|
|
|
📱 Consumer Applications:
|
|
- Photo tagging and search
|
|
- Content moderation
|
|
- Augmented reality filters
|
|
"""
|
|
|
|
def test_feature_extraction_capability(self):
|
|
"""
|
|
✅ TEST: Can extract meaningful visual features
|
|
|
|
📋 FEATURE EXTRACTION:
|
|
- Low-level: Edges, corners, textures
|
|
- High-level: Objects, shapes, patterns
|
|
- Transfer learning: Features from one task help another
|
|
|
|
💡 Feature extraction is the foundation of all computer vision
|
|
"""
|
|
try:
|
|
from tinytorch.core.tensor import Tensor
|
|
from tinytorch.core.spatial import Conv2D, MaxPool2D
|
|
from tinytorch.core.activations import ReLU
|
|
|
|
# Build feature extractor
|
|
class FeatureExtractor:
|
|
def __init__(self):
|
|
# Multi-scale feature extraction
|
|
self.small_features = Conv2D(3, 16, kernel_size=3, padding=1) # Fine details
|
|
self.medium_features = Conv2D(3, 16, kernel_size=5, padding=2) # Medium patterns
|
|
self.large_features = Conv2D(3, 16, kernel_size=7, padding=3) # Large patterns
|
|
|
|
# Feature refinement
|
|
self.refine = Conv2D(48, 32, kernel_size=1) # 1x1 conv for feature fusion
|
|
self.pool = MaxPool2D(kernel_size=2)
|
|
self.relu = ReLU()
|
|
|
|
def extract_features(self, x):
|
|
# Extract features at multiple scales
|
|
small = self.relu(self.small_features(x))
|
|
medium = self.relu(self.medium_features(x))
|
|
large = self.relu(self.large_features(x))
|
|
|
|
# Concatenate multi-scale features
|
|
# In real implementation, would use tensor concatenation
|
|
# For now, simulate by combining channels
|
|
combined_data = np.concatenate([small.data, medium.data, large.data], axis=1)
|
|
combined = Tensor(combined_data)
|
|
|
|
# Refine combined features
|
|
refined = self.relu(self.refine(combined))
|
|
pooled = self.pool(refined)
|
|
|
|
return pooled
|
|
|
|
# Test feature extraction
|
|
extractor = FeatureExtractor()
|
|
|
|
# Test with different types of images
|
|
test_cases = [
|
|
("Natural images", np.random.randn(3, 3, 64, 64)),
|
|
("Medical images", np.random.randn(2, 3, 128, 128)),
|
|
("Satellite images", np.random.randn(1, 3, 256, 256))
|
|
]
|
|
|
|
for name, image_data in test_cases:
|
|
images = Tensor(image_data)
|
|
features = extractor.extract_features(images)
|
|
|
|
batch_size = images.shape[0]
|
|
expected_channels = 32
|
|
expected_spatial = (images.shape[2] // 2, images.shape[3] // 2) # Halved by pooling
|
|
|
|
assert features.shape[0] == batch_size, f"❌ {name}: Batch size wrong"
|
|
assert features.shape[1] == expected_channels, f"❌ {name}: Feature channels wrong"
|
|
assert features.shape[2:] == expected_spatial, f"❌ {name}: Spatial dimensions wrong"
|
|
|
|
# Features should be meaningful (not all zeros)
|
|
assert not np.allclose(features.data, 0), f"❌ {name}: Features are all zeros"
|
|
|
|
# ReLU should ensure non-negative features
|
|
assert np.all(features.data >= 0), f"❌ {name}: Features contain negative values"
|
|
|
|
except Exception as e:
|
|
assert False, f"""
|
|
❌ FEATURE EXTRACTION CAPABILITY BROKEN!
|
|
|
|
🔍 ERROR: {str(e)}
|
|
|
|
🔧 FEATURE EXTRACTION REQUIREMENTS:
|
|
1. Multi-scale feature detection (small, medium, large)
|
|
2. Feature combination and refinement
|
|
3. Spatial dimension handling
|
|
4. Meaningful feature representations
|
|
5. Transfer learning capability
|
|
|
|
💡 FEATURE EXTRACTION APPLICATIONS:
|
|
|
|
🔬 Scientific Research:
|
|
- Analyzing microscopy images
|
|
- Identifying cellular structures
|
|
- Tracking biological processes
|
|
|
|
🛰️ Remote Sensing:
|
|
- Land use classification
|
|
- Environmental monitoring
|
|
- Disaster response planning
|
|
|
|
🎨 Creative Applications:
|
|
- Style transfer (artistic filters)
|
|
- Image enhancement
|
|
- Content-aware editing
|
|
|
|
🤖 Robotics:
|
|
- Object recognition and grasping
|
|
- Navigation and mapping
|
|
- Human-robot interaction
|
|
|
|
💡 TRANSFER LEARNING:
|
|
Features learned on one dataset (ImageNet) transfer to:
|
|
- Medical imaging with small datasets
|
|
- Specialized domains (satellite, microscopy)
|
|
- New tasks with limited training data
|
|
"""
|
|
|
|
def test_spatial_understanding_capability(self):
|
|
"""
|
|
✅ TEST: CNNs understand spatial relationships
|
|
|
|
📋 SPATIAL UNDERSTANDING:
|
|
- Local patterns: Textures, edges within small regions
|
|
- Global structure: Object layout, scene composition
|
|
- Translation invariance: Same object anywhere in image
|
|
- Scale invariance: Objects at different sizes
|
|
|
|
💡 This is what makes CNNs powerful for vision
|
|
"""
|
|
try:
|
|
from tinytorch.core.tensor import Tensor
|
|
from tinytorch.core.spatial import Conv2D, MaxPool2D
|
|
from tinytorch.core.activations import ReLU
|
|
|
|
# Test spatial understanding with different spatial patterns
|
|
relu = ReLU()
|
|
|
|
# Pattern detector
|
|
pattern_detector = Conv2D(1, 8, kernel_size=3, padding=1)
|
|
spatial_pool = MaxPool2D(kernel_size=2)
|
|
|
|
# Create test images with known spatial patterns
|
|
batch_size = 4
|
|
|
|
# Pattern 1: Vertical stripes
|
|
vertical_stripes = np.zeros((1, 1, 16, 16))
|
|
vertical_stripes[0, 0, :, ::2] = 1 # Every other column
|
|
|
|
# Pattern 2: Horizontal stripes
|
|
horizontal_stripes = np.zeros((1, 1, 16, 16))
|
|
horizontal_stripes[0, 0, ::2, :] = 1 # Every other row
|
|
|
|
# Pattern 3: Checkerboard
|
|
checkerboard = np.zeros((1, 1, 16, 16))
|
|
for i in range(16):
|
|
for j in range(16):
|
|
if (i + j) % 2 == 0:
|
|
checkerboard[0, 0, i, j] = 1
|
|
|
|
# Pattern 4: Center blob
|
|
center_blob = np.zeros((1, 1, 16, 16))
|
|
center_blob[0, 0, 6:10, 6:10] = 1
|
|
|
|
# Combine patterns into batch
|
|
patterns = np.concatenate([vertical_stripes, horizontal_stripes,
|
|
checkerboard, center_blob], axis=0)
|
|
pattern_tensor = Tensor(patterns)
|
|
|
|
# Extract features for each pattern
|
|
features = relu(pattern_detector(pattern_tensor))
|
|
pooled_features = spatial_pool(features)
|
|
|
|
# Test spatial pattern detection
|
|
assert features.shape == (4, 8, 16, 16), \
|
|
f"❌ Pattern features shape wrong. Expected (4, 8, 16, 16), got {features.shape}"
|
|
|
|
assert pooled_features.shape == (4, 8, 8, 8), \
|
|
f"❌ Pooled features shape wrong. Expected (4, 8, 8, 8), got {pooled_features.shape}"
|
|
|
|
# Features should be different for different patterns
|
|
for i in range(4):
|
|
for j in range(i+1, 4):
|
|
pattern_i_features = features.data[i].flatten()
|
|
pattern_j_features = features.data[j].flatten()
|
|
|
|
# Patterns should produce different features
|
|
assert not np.allclose(pattern_i_features, pattern_j_features, rtol=0.1), \
|
|
f"❌ Patterns {i} and {j} produce identical features"
|
|
|
|
# Test translation invariance (same pattern, different location)
|
|
shifted_blob = np.zeros((1, 1, 16, 16))
|
|
shifted_blob[0, 0, 2:6, 2:6] = 1 # Same blob, different position
|
|
|
|
original_blob_tensor = Tensor(center_blob)
|
|
shifted_blob_tensor = Tensor(shifted_blob)
|
|
|
|
original_features = relu(pattern_detector(original_blob_tensor))
|
|
shifted_features = relu(pattern_detector(shifted_blob_tensor))
|
|
|
|
# After pooling, features should be similar (translation invariance)
|
|
original_pooled = spatial_pool(original_features)
|
|
shifted_pooled = spatial_pool(shifted_features)
|
|
|
|
# Global feature similarity (though not exact due to edge effects)
|
|
original_global = np.mean(original_pooled.data)
|
|
shifted_global = np.mean(shifted_pooled.data)
|
|
|
|
assert abs(original_global - shifted_global) < 0.5, \
|
|
"❌ Translation invariance broken: shifted pattern too different"
|
|
|
|
except Exception as e:
|
|
assert False, f"""
|
|
❌ SPATIAL UNDERSTANDING CAPABILITY BROKEN!
|
|
|
|
🔍 ERROR: {str(e)}
|
|
|
|
🔧 SPATIAL UNDERSTANDING REQUIREMENTS:
|
|
1. Pattern detection: Different spatial patterns produce different features
|
|
2. Translation invariance: Same pattern different locations → similar features
|
|
3. Local processing: Convolution respects spatial neighborhoods
|
|
4. Hierarchical understanding: Local → global feature extraction
|
|
5. Spatial pooling: Reduce spatial resolution while preserving features
|
|
|
|
💡 SPATIAL UNDERSTANDING ENABLES:
|
|
|
|
🖼️ Image Analysis:
|
|
- Object detection: "Where is the cat in the image?"
|
|
- Semantic segmentation: "Which pixels belong to the road?"
|
|
- Instance segmentation: "Separate the two cars in the image"
|
|
|
|
🏥 Medical Imaging:
|
|
- Tumor localization: "Where is the abnormal tissue?"
|
|
- Anatomical structure identification
|
|
- Disease progression tracking over time
|
|
|
|
🚗 Autonomous Navigation:
|
|
- Lane detection: "Where are the road boundaries?"
|
|
- Obstacle avoidance: "What objects are in my path?"
|
|
- Traffic sign recognition: "What does this sign mean?"
|
|
|
|
🎮 Augmented Reality:
|
|
- Object tracking in real-time
|
|
- Spatial registration of virtual objects
|
|
- Hand gesture recognition
|
|
"""
|
|
|
|
|
|
class TestModule06Completion:
|
|
"""
|
|
✅ COMPLETION CHECK: Module 06 ready and foundation set for advanced architectures.
|
|
|
|
🎯 Final validation that spatial operations work and foundation supports computer vision.
|
|
"""
|
|
|
|
def test_computer_vision_foundation_complete(self):
|
|
"""
|
|
✅ FINAL TEST: Complete computer vision foundation ready
|
|
|
|
📋 CV FOUNDATION CHECKLIST:
|
|
□ Convolutional operations (Conv2D)
|
|
□ Pooling operations (MaxPool2D)
|
|
□ 4D tensor handling (batch, channels, height, width)
|
|
□ Spatial feature hierarchies
|
|
□ Integration with dense layers
|
|
□ Image classification capability
|
|
□ Feature extraction capability
|
|
□ Spatial understanding
|
|
|
|
🎯 SUCCESS = Ready for advanced CV architectures!
|
|
"""
|
|
cv_capabilities = {
|
|
"Conv2D operations": False,
|
|
"Pooling operations": False,
|
|
"4D tensor handling": False,
|
|
"CNN architecture building": False,
|
|
"Image classification": False,
|
|
"Feature extraction": False,
|
|
"Spatial understanding": False,
|
|
"Foundation integration": False
|
|
}
|
|
|
|
try:
|
|
# Test 1: Conv2D operations
|
|
from tinytorch.core.spatial import Conv2D
|
|
conv = Conv2D(3, 16, kernel_size=3)
|
|
cv_capabilities["Conv2D operations"] = True
|
|
|
|
# Test 2: Pooling operations
|
|
from tinytorch.core.spatial import MaxPool2D
|
|
pool = MaxPool2D(kernel_size=2)
|
|
cv_capabilities["Pooling operations"] = True
|
|
|
|
# Test 3: 4D tensor handling
|
|
from tinytorch.core.tensor import Tensor
|
|
x = Tensor(np.random.randn(2, 3, 32, 32))
|
|
conv_out = conv(x)
|
|
assert len(conv_out.shape) == 4
|
|
cv_capabilities["4D tensor handling"] = True
|
|
|
|
# Test 4: CNN architecture building
|
|
from tinytorch.core.activations import ReLU
|
|
from tinytorch.core.layers import Linear
|
|
|
|
relu = ReLU()
|
|
h1 = relu(conv_out)
|
|
h1_pool = pool(h1)
|
|
|
|
# Flatten and connect to dense
|
|
flattened = Tensor(h1_pool.data.reshape(2, -1))
|
|
dense = Linear(flattened.shape[1], 10)
|
|
output = dense(flattened)
|
|
|
|
assert output.shape == (2, 10)
|
|
cv_capabilities["CNN architecture building"] = True
|
|
|
|
# Test 5: Image classification capability
|
|
from tinytorch.core.activations import Softmax
|
|
softmax = Softmax()
|
|
probs = softmax(output)
|
|
|
|
prob_sums = np.sum(probs.data, axis=1)
|
|
assert np.allclose(prob_sums, 1.0)
|
|
cv_capabilities["Image classification"] = True
|
|
|
|
# Test 6: Feature extraction
|
|
features = relu(conv(x))
|
|
assert np.all(features.data >= 0) # ReLU features
|
|
assert not np.allclose(features.data, 0) # Non-trivial features
|
|
cv_capabilities["Feature extraction"] = True
|
|
|
|
# Test 7: Spatial understanding
|
|
small_x = Tensor(np.random.randn(1, 3, 8, 8))
|
|
small_conv = Conv2D(3, 8, kernel_size=3)
|
|
small_features = small_conv(small_x)
|
|
assert small_features.shape == (1, 8, 6, 6) # Correct spatial calculation
|
|
cv_capabilities["Spatial understanding"] = True
|
|
|
|
# Test 8: Foundation integration
|
|
from tinytorch.core.tensor import Tensor
|
|
from tinytorch.core.layers import Linear, Layer
|
|
from tinytorch.core.activations import ReLU
|
|
|
|
# All foundation components should work together
|
|
assert issubclass(Conv2D, Layer) # Inherits from Layer
|
|
cv_capabilities["Foundation integration"] = True
|
|
|
|
except Exception as e:
|
|
# Show progress even if not complete
|
|
completed_count = sum(cv_capabilities.values())
|
|
total_count = len(cv_capabilities)
|
|
|
|
progress_report = "\n🔍 COMPUTER VISION PROGRESS:\n"
|
|
for capability, completed in cv_capabilities.items():
|
|
status = "✅" if completed else "❌"
|
|
progress_report += f" {status} {capability}\n"
|
|
|
|
progress_report += f"\n📊 Progress: {completed_count}/{total_count} capabilities ready"
|
|
|
|
assert False, f"""
|
|
❌ COMPUTER VISION FOUNDATION NOT COMPLETE!
|
|
|
|
🔍 ERROR: {str(e)}
|
|
|
|
{progress_report}
|
|
|
|
🔧 NEXT STEPS:
|
|
1. Fix the failing capability above
|
|
2. Re-run this test
|
|
3. When all ✅, you have complete computer vision foundation!
|
|
|
|
💡 ALMOST THERE!
|
|
You've completed {completed_count}/{total_count} CV capabilities.
|
|
Just fix the error above and you'll be ready for advanced vision architectures!
|
|
"""
|
|
|
|
# If we get here, everything passed!
|
|
assert True, f"""
|
|
🎉 COMPUTER VISION FOUNDATION COMPLETE! 🎉
|
|
|
|
✅ Conv2D convolutional operations
|
|
✅ MaxPool2D pooling operations
|
|
✅ 4D tensor handling (batch, channels, height, width)
|
|
✅ CNN architecture building
|
|
✅ Image classification capability
|
|
✅ Feature extraction capability
|
|
✅ Spatial understanding and processing
|
|
✅ Complete foundation integration
|
|
|
|
🚀 READY FOR ADVANCED COMPUTER VISION!
|
|
|
|
💡 What you can now build:
|
|
- Image classifiers (MNIST, CIFAR-10, ImageNet)
|
|
- Object detection systems
|
|
- Medical image analysis
|
|
- Autonomous vehicle vision
|
|
- Artistic style transfer
|
|
- And much more!
|
|
|
|
🎯 Next modules will add:
|
|
- Attention mechanisms (Module 07)
|
|
- Data loading pipelines (Module 08)
|
|
- Training loops (Module 11)
|
|
- Advanced optimizations (Module 13)
|
|
|
|
🏆 ACHIEVEMENT UNLOCKED: Computer Vision Engineer!
|
|
"""
|
|
|
|
|
|
# Note: No separate regression prevention class needed - we test foundation stability above |