mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-04-29 17:20:21 -05:00
Folder consolidation: - Merge system/ into integration/ (removed duplicate folder) - Remove performance/ (only had framework, no tests) File relocations: - Move test_dense_layer.py, test_dense_integration.py from 04_losses/ to 03_layers/ - Move test_network_capability.py from 04_losses/ to integration/ - Move test_kv_cache_integration.py from 14_profiling/ to 18_memoization/ - Move system/ tests (forward_passes, gradients, shapes, etc.) to integration/ Removed duplicates: - system/test_gradient_flow_overall.py (duplicate of integration version) - system/test_integration.py (redundant with integration/ folder) - system/test_milestones.py (duplicate of milestones/ tests) Final structure: 26 folders, 100 test files
452 lines
14 KiB
Python
452 lines
14 KiB
Python
#!/usr/bin/env python
|
|
"""
|
|
Shape Validation Tests for TinyTorch
|
|
=====================================
|
|
Comprehensive shape validation ensuring all operations produce expected dimensions.
|
|
Uses pytest style - one test per specific behavior for clear reporting.
|
|
|
|
Run with: pytest tests/system/test_shapes.py -v
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
import numpy as np
|
|
import pytest
|
|
|
|
# Add project root to path
|
|
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))
|
|
sys.path.insert(0, project_root)
|
|
|
|
from tinytorch.core.tensor import Tensor
|
|
from tinytorch.core.layers import Linear
|
|
from tinytorch.core.activations import ReLU, Sigmoid, Tanh, Softmax
|
|
from tinytorch.core.spatial import Conv2d
|
|
from tinytorch.core.transformers import TransformerBlock, LayerNorm
|
|
from tinytorch.core.embeddings import Embedding, PositionalEncoding
|
|
|
|
class Sequential:
|
|
"""Simple sequential container for testing."""
|
|
def __init__(self, layers):
|
|
self.layers = layers
|
|
def __call__(self, x):
|
|
for layer in self.layers:
|
|
x = layer(x)
|
|
return x
|
|
def parameters(self):
|
|
params = []
|
|
for layer in self.layers:
|
|
if hasattr(layer, 'parameters'):
|
|
params.extend(layer.parameters())
|
|
return params
|
|
|
|
class F:
|
|
"""Functional interface for testing."""
|
|
@staticmethod
|
|
def relu(x):
|
|
from tinytorch.core.activations import ReLU
|
|
return ReLU()(x)
|
|
@staticmethod
|
|
def sigmoid(x):
|
|
from tinytorch.core.activations import Sigmoid
|
|
return Sigmoid()(x)
|
|
@staticmethod
|
|
def tanh(x):
|
|
from tinytorch.core.activations import Tanh
|
|
return Tanh()(x)
|
|
@staticmethod
|
|
def softmax(x, dim=-1):
|
|
from tinytorch.core.activations import Softmax
|
|
return Softmax()(x)
|
|
@staticmethod
|
|
def max_pool2d(x, kernel_size):
|
|
from tinytorch.core.spatial import MaxPool2d
|
|
return MaxPool2d(kernel_size)(x)
|
|
@staticmethod
|
|
def avg_pool2d(x, kernel_size):
|
|
from tinytorch.core.spatial import AvgPool2d
|
|
return AvgPool2d(kernel_size)(x)
|
|
@staticmethod
|
|
def flatten(x, start_dim=1):
|
|
import numpy as np
|
|
shape = x.shape
|
|
new_shape = shape[:start_dim] + (np.prod(shape[start_dim:]),)
|
|
return x.reshape(*new_shape)
|
|
|
|
|
|
# ============== Linear Layer Shape Tests ==============
|
|
|
|
def test_linear_basic_shape():
|
|
"""Linear layer produces correct output shape."""
|
|
layer = Linear(10, 5)
|
|
x = Tensor(np.random.randn(3, 10))
|
|
y = layer(x)
|
|
assert y.shape == (3, 5), f"Expected (3, 5), got {y.shape}"
|
|
|
|
|
|
def test_linear_single_sample():
|
|
"""Linear handles single sample (batch=1)."""
|
|
layer = Linear(10, 5)
|
|
x = Tensor(np.random.randn(1, 10))
|
|
y = layer(x)
|
|
assert y.shape == (1, 5), f"Expected (1, 5), got {y.shape}"
|
|
|
|
|
|
def test_linear_large_batch():
|
|
"""Linear handles large batch size."""
|
|
layer = Linear(10, 5)
|
|
x = Tensor(np.random.randn(32, 10))
|
|
y = layer(x)
|
|
assert y.shape == (32, 5), f"Expected (32, 5), got {y.shape}"
|
|
|
|
|
|
def test_linear_chain():
|
|
"""Chain of linear layers maintains correct dimensions."""
|
|
layer1 = Linear(784, 256)
|
|
layer2 = Linear(256, 128)
|
|
layer3 = Linear(128, 10)
|
|
|
|
x = Tensor(np.random.randn(16, 784))
|
|
x = layer1(x)
|
|
assert x.shape == (16, 256), f"After layer1: expected (16, 256), got {x.shape}"
|
|
x = layer2(x)
|
|
assert x.shape == (16, 128), f"After layer2: expected (16, 128), got {x.shape}"
|
|
x = layer3(x)
|
|
assert x.shape == (16, 10), f"After layer3: expected (16, 10), got {x.shape}"
|
|
|
|
|
|
# ============== Conv2d Shape Tests ==============
|
|
|
|
def test_conv2d_basic():
|
|
"""Conv2d produces correct output shape with no padding."""
|
|
layer = Conv2d(3, 16, kernel_size=3)
|
|
x = Tensor(np.random.randn(2, 3, 32, 32))
|
|
y = layer(x)
|
|
# Output: (32 - 3)/1 + 1 = 30
|
|
assert y.shape == (2, 16, 30, 30), f"Expected (2, 16, 30, 30), got {y.shape}"
|
|
|
|
|
|
def test_conv2d_with_padding():
|
|
"""Conv2d with padding=1 preserves spatial dimensions."""
|
|
layer = Conv2d(3, 16, kernel_size=3, padding=1)
|
|
x = Tensor(np.random.randn(2, 3, 32, 32))
|
|
y = layer(x)
|
|
assert y.shape == (2, 16, 32, 32), f"Expected (2, 16, 32, 32), got {y.shape}"
|
|
|
|
|
|
def test_conv2d_with_stride():
|
|
"""Conv2d with stride=2 halves spatial dimensions."""
|
|
layer = Conv2d(3, 16, kernel_size=3, stride=2)
|
|
x = Tensor(np.random.randn(2, 3, 32, 32))
|
|
y = layer(x)
|
|
# Output: (32 - 3)/2 + 1 = 15
|
|
assert y.shape == (2, 16, 15, 15), f"Expected (2, 16, 15, 15), got {y.shape}"
|
|
|
|
|
|
def test_conv2d_1x1():
|
|
"""1x1 convolution preserves spatial dimensions."""
|
|
layer = Conv2d(64, 32, kernel_size=1)
|
|
x = Tensor(np.random.randn(4, 64, 14, 14))
|
|
y = layer(x)
|
|
assert y.shape == (4, 32, 14, 14), f"Expected (4, 32, 14, 14), got {y.shape}"
|
|
|
|
|
|
def test_conv2d_chain():
|
|
"""Chain of conv layers (typical CNN pattern)."""
|
|
conv1 = Conv2d(1, 32, kernel_size=3)
|
|
conv2 = Conv2d(32, 64, kernel_size=3)
|
|
|
|
x = Tensor(np.random.randn(4, 1, 28, 28)) # MNIST-like
|
|
x = conv1(x)
|
|
assert x.shape == (4, 32, 26, 26), f"After conv1: expected (4, 32, 26, 26), got {x.shape}"
|
|
x = conv2(x)
|
|
assert x.shape == (4, 64, 24, 24), f"After conv2: expected (4, 64, 24, 24), got {x.shape}"
|
|
|
|
|
|
# ============== Activation Shape Tests ==============
|
|
|
|
def test_relu_preserves_2d_shape():
|
|
"""ReLU preserves 2D tensor shape."""
|
|
x = Tensor(np.random.randn(10, 20))
|
|
y = F.relu(x)
|
|
assert y.shape == x.shape, f"ReLU changed shape: {x.shape} → {y.shape}"
|
|
|
|
|
|
def test_relu_preserves_4d_shape():
|
|
"""ReLU preserves 4D tensor shape (conv output)."""
|
|
x = Tensor(np.random.randn(2, 16, 32, 32))
|
|
y = F.relu(x)
|
|
assert y.shape == x.shape, f"ReLU changed shape: {x.shape} → {y.shape}"
|
|
|
|
|
|
def test_sigmoid_preserves_shape():
|
|
"""Sigmoid preserves tensor shape."""
|
|
x = Tensor(np.random.randn(5, 10))
|
|
y = F.sigmoid(x)
|
|
assert y.shape == x.shape, f"Sigmoid changed shape: {x.shape} → {y.shape}"
|
|
|
|
|
|
def test_tanh_preserves_shape():
|
|
"""Tanh preserves tensor shape."""
|
|
x = Tensor(np.random.randn(5, 10))
|
|
y = F.tanh(x)
|
|
assert y.shape == x.shape, f"Tanh changed shape: {x.shape} → {y.shape}"
|
|
|
|
|
|
def test_softmax_preserves_shape():
|
|
"""Softmax preserves tensor shape."""
|
|
x = Tensor(np.random.randn(5, 10))
|
|
y = F.softmax(x, dim=-1)
|
|
assert y.shape == x.shape, f"Softmax changed shape: {x.shape} → {y.shape}"
|
|
|
|
|
|
# ============== Pooling Shape Tests ==============
|
|
|
|
def test_maxpool2d_kernel_2():
|
|
"""MaxPool2d with kernel=2 halves spatial dimensions."""
|
|
x = Tensor(np.random.randn(2, 16, 32, 32))
|
|
y = F.max_pool2d(x, kernel_size=2)
|
|
assert y.shape == (2, 16, 16, 16), f"Expected (2, 16, 16, 16), got {y.shape}"
|
|
|
|
|
|
def test_maxpool2d_kernel_4():
|
|
"""MaxPool2d with kernel=4 quarters spatial dimensions."""
|
|
x = Tensor(np.random.randn(2, 16, 32, 32))
|
|
y = F.max_pool2d(x, kernel_size=4)
|
|
assert y.shape == (2, 16, 8, 8), f"Expected (2, 16, 8, 8), got {y.shape}"
|
|
|
|
|
|
def test_avgpool2d_kernel_2():
|
|
"""AvgPool2d with kernel=2 halves spatial dimensions."""
|
|
x = Tensor(np.random.randn(2, 16, 32, 32))
|
|
y = F.avg_pool2d(x, kernel_size=2)
|
|
assert y.shape == (2, 16, 16, 16), f"Expected (2, 16, 16, 16), got {y.shape}"
|
|
|
|
|
|
def test_pool_after_conv():
|
|
"""Pooling after convolution (common CNN pattern)."""
|
|
conv = Conv2d(3, 32, kernel_size=5)
|
|
x = Tensor(np.random.randn(4, 3, 32, 32))
|
|
x = conv(x)
|
|
assert x.shape == (4, 32, 28, 28), f"After conv: expected (4, 32, 28, 28), got {x.shape}"
|
|
x = F.max_pool2d(x, 2)
|
|
assert x.shape == (4, 32, 14, 14), f"After pool: expected (4, 32, 14, 14), got {x.shape}"
|
|
|
|
|
|
# ============== Reshape Operation Tests ==============
|
|
|
|
def test_flatten_4d():
|
|
"""Flatten 4D tensor for FC after Conv."""
|
|
x = Tensor(np.random.randn(4, 64, 5, 5))
|
|
y = F.flatten(x, start_dim=1)
|
|
assert y.shape == (4, 1600), f"Expected (4, 1600), got {y.shape}"
|
|
|
|
|
|
def test_flatten_cnn_to_fc():
|
|
"""Flatten for CNN→FC transition."""
|
|
x = Tensor(np.random.randn(8, 128, 7, 7))
|
|
y = F.flatten(x, start_dim=1)
|
|
expected = 128 * 7 * 7
|
|
assert y.shape == (8, expected), f"Expected (8, {expected}), got {y.shape}"
|
|
|
|
|
|
def test_reshape_3d_to_2d():
|
|
"""Reshape 3D tensor to 2D."""
|
|
x = Tensor(np.random.randn(2, 3, 4))
|
|
y = x.reshape(6, 4)
|
|
assert y.shape == (6, 4), f"Expected (6, 4), got {y.shape}"
|
|
|
|
|
|
def test_reshape_to_flat():
|
|
"""Reshape to 1D (flatten completely)."""
|
|
x = Tensor(np.random.randn(2, 3, 4))
|
|
y = x.reshape(24)
|
|
assert y.shape == (24,), f"Expected (24,), got {y.shape}"
|
|
|
|
|
|
def test_reshape_batch_preserve():
|
|
"""Reshape preserving batch dimension."""
|
|
x = Tensor(np.random.randn(10, 3, 4))
|
|
y = x.reshape(10, 12)
|
|
assert y.shape == (10, 12), f"Expected (10, 12), got {y.shape}"
|
|
|
|
|
|
# ============== Transformer Component Tests ==============
|
|
|
|
def test_embedding_shape():
|
|
"""Embedding produces correct shape."""
|
|
embed = Embedding(1000, 128)
|
|
input_ids = Tensor(np.random.randint(0, 1000, (4, 10)))
|
|
x = embed(input_ids)
|
|
assert x.shape == (4, 10, 128), f"Expected (4, 10, 128), got {x.shape}"
|
|
|
|
|
|
def test_positional_encoding_preserves_shape():
|
|
"""Positional encoding preserves tensor shape."""
|
|
# PositionalEncoding(max_seq_len, embed_dim) - need seq_len >= 10, embed_dim = 128
|
|
pos_enc = PositionalEncoding(50, 128)
|
|
x = Tensor(np.random.randn(4, 10, 128))
|
|
y = pos_enc(x)
|
|
assert y.shape == x.shape, f"PositionalEncoding changed shape: {x.shape} → {y.shape}"
|
|
|
|
|
|
def test_transformer_block_preserves_shape():
|
|
"""TransformerBlock preserves tensor shape."""
|
|
block = TransformerBlock(128, num_heads=8)
|
|
x = Tensor(np.random.randn(4, 10, 128))
|
|
y = block(x)
|
|
assert y.shape == x.shape, f"TransformerBlock changed shape: {x.shape} → {y.shape}"
|
|
|
|
|
|
def test_layernorm_preserves_shape():
|
|
"""LayerNorm preserves tensor shape."""
|
|
ln = LayerNorm(128)
|
|
x = Tensor(np.random.randn(4, 10, 128))
|
|
y = ln(x)
|
|
assert y.shape == x.shape, f"LayerNorm changed shape: {x.shape} → {y.shape}"
|
|
|
|
|
|
def test_transformer_output_projection():
|
|
"""Transformer output projection with reshape."""
|
|
batch, seq, embed = 4, 10, 128
|
|
vocab = 1000
|
|
|
|
x = Tensor(np.random.randn(batch, seq, embed))
|
|
x_2d = x.reshape(batch * seq, embed)
|
|
assert x_2d.shape == (40, 128), f"Expected (40, 128), got {x_2d.shape}"
|
|
|
|
proj = Linear(embed, vocab)
|
|
logits_2d = proj(x_2d)
|
|
assert logits_2d.shape == (40, 1000), f"Expected (40, 1000), got {logits_2d.shape}"
|
|
|
|
logits = logits_2d.reshape(batch, seq, vocab)
|
|
assert logits.shape == (4, 10, 1000), f"Expected (4, 10, 1000), got {logits.shape}"
|
|
|
|
|
|
# ============== Batch Size Flexibility Tests ==============
|
|
|
|
@pytest.mark.parametrize("batch_size", [1, 2, 8, 32])
|
|
def test_linear_batch_flexibility(batch_size):
|
|
"""Linear handles various batch sizes."""
|
|
layer = Linear(100, 50)
|
|
x = Tensor(np.random.randn(batch_size, 100))
|
|
y = layer(x)
|
|
assert y.shape == (batch_size, 50), f"Batch {batch_size}: expected ({batch_size}, 50), got {y.shape}"
|
|
|
|
|
|
@pytest.mark.parametrize("batch_size", [1, 2, 8, 16])
|
|
def test_conv2d_batch_flexibility(batch_size):
|
|
"""Conv2d handles various batch sizes."""
|
|
layer = Conv2d(3, 16, kernel_size=3)
|
|
x = Tensor(np.random.randn(batch_size, 3, 32, 32))
|
|
y = layer(x)
|
|
assert y.shape == (batch_size, 16, 30, 30), f"Batch {batch_size}: got {y.shape}"
|
|
|
|
|
|
@pytest.mark.parametrize("batch_size", [1, 4, 16])
|
|
def test_sequential_batch_flexibility(batch_size):
|
|
"""Sequential model handles various batch sizes."""
|
|
model = Sequential([
|
|
Linear(10, 20),
|
|
ReLU(),
|
|
Linear(20, 5)
|
|
])
|
|
x = Tensor(np.random.randn(batch_size, 10))
|
|
y = model(x)
|
|
assert y.shape == (batch_size, 5), f"Batch {batch_size}: expected ({batch_size}, 5), got {y.shape}"
|
|
|
|
|
|
# ============== Edge Cases ==============
|
|
|
|
def test_conv_small_spatial():
|
|
"""Conv on very small spatial dimensions."""
|
|
x = Tensor(np.random.randn(2, 16, 3, 3))
|
|
conv = Conv2d(16, 32, kernel_size=3)
|
|
y = conv(x)
|
|
assert y.shape == (2, 32, 1, 1), f"Expected (2, 32, 1, 1), got {y.shape}"
|
|
|
|
|
|
def test_flatten_already_2d():
|
|
"""Flatten on already 2D tensor (should be no-op)."""
|
|
x = Tensor(np.random.randn(10, 20))
|
|
y = F.flatten(x, start_dim=1)
|
|
assert y.shape == (10, 20), f"Expected (10, 20), got {y.shape}"
|
|
|
|
|
|
def test_single_channel_conv():
|
|
"""Conv with single input channel (grayscale images)."""
|
|
conv = Conv2d(1, 8, kernel_size=3)
|
|
x = Tensor(np.random.randn(2, 1, 28, 28))
|
|
y = conv(x)
|
|
assert y.shape == (2, 8, 26, 26), f"Expected (2, 8, 26, 26), got {y.shape}"
|
|
|
|
|
|
# ============== Integration Pattern Tests ==============
|
|
|
|
def test_mnist_cnn_dimensions():
|
|
"""Complete MNIST CNN dimension flow."""
|
|
x = Tensor(np.random.randn(32, 1, 28, 28)) # MNIST batch
|
|
|
|
# Conv block 1
|
|
conv1 = Conv2d(1, 32, kernel_size=3)
|
|
x = conv1(x)
|
|
assert x.shape == (32, 32, 26, 26), f"After conv1: {x.shape}"
|
|
x = F.max_pool2d(x, 2)
|
|
assert x.shape == (32, 32, 13, 13), f"After pool1: {x.shape}"
|
|
|
|
# Conv block 2
|
|
conv2 = Conv2d(32, 64, kernel_size=3)
|
|
x = conv2(x)
|
|
assert x.shape == (32, 64, 11, 11), f"After conv2: {x.shape}"
|
|
x = F.max_pool2d(x, 2)
|
|
assert x.shape == (32, 64, 5, 5), f"After pool2: {x.shape}"
|
|
|
|
# Flatten for FC
|
|
x = F.flatten(x, start_dim=1)
|
|
assert x.shape == (32, 1600), f"After flatten: {x.shape}"
|
|
|
|
# FC layers
|
|
fc1 = Linear(1600, 128)
|
|
x = fc1(x)
|
|
assert x.shape == (32, 128), f"After fc1: {x.shape}"
|
|
|
|
fc2 = Linear(128, 10)
|
|
x = fc2(x)
|
|
assert x.shape == (32, 10), f"Final output: {x.shape}"
|
|
|
|
|
|
def test_cifar10_cnn_dimensions():
|
|
"""Complete CIFAR-10 CNN dimension flow."""
|
|
x = Tensor(np.random.randn(16, 3, 32, 32)) # CIFAR-10 batch
|
|
|
|
# Conv block 1
|
|
conv1 = Conv2d(3, 32, kernel_size=3)
|
|
x = conv1(x)
|
|
assert x.shape == (16, 32, 30, 30), f"After conv1: {x.shape}"
|
|
x = F.max_pool2d(x, 2)
|
|
assert x.shape == (16, 32, 15, 15), f"After pool1: {x.shape}"
|
|
|
|
# Conv block 2
|
|
conv2 = Conv2d(32, 64, kernel_size=3)
|
|
x = conv2(x)
|
|
assert x.shape == (16, 64, 13, 13), f"After conv2: {x.shape}"
|
|
x = F.max_pool2d(x, 2)
|
|
assert x.shape == (16, 64, 6, 6), f"After pool2: {x.shape}"
|
|
|
|
# Flatten and FC
|
|
x = F.flatten(x, start_dim=1)
|
|
assert x.shape == (16, 2304), f"After flatten: {x.shape}"
|
|
|
|
fc = Linear(2304, 10)
|
|
x = fc(x)
|
|
assert x.shape == (16, 10), f"Final output: {x.shape}"
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# When run directly, use pytest
|
|
import subprocess
|
|
result = subprocess.run(["pytest", __file__, "-v"], capture_output=True, text=True)
|
|
print(result.stdout)
|
|
if result.stderr:
|
|
print(result.stderr)
|
|
sys.exit(result.returncode)
|