Files
TinyTorch/tests/module_10/test_progressive_integration.py
Vijay Janapa Reddi 2f23f757e7 MAJOR: Implement beautiful module progression through strategic reordering
This commit implements the pedagogically optimal "inevitable discovery" module progression based on expert validation and educational design principles.

## Module Reordering Summary

**Previous Order (Problems)**:
- 05_losses → 06_autograd → 07_dataloader → 08_optimizers → 09_spatial → 10_training
- Issues: Autograd before optimizers, DataLoader before training, scattered dependencies

**New Order (Beautiful Progression)**:
- 05_losses → 06_optimizers → 07_autograd → 08_training → 09_spatial → 10_dataloader
- Benefits: Each module creates inevitable need for the next

## Pedagogical Flow Achieved

**05_losses** → "Need systematic weight updates" → **06_optimizers**
**06_optimizers** → "Need automatic gradients" → **07_autograd**
**07_autograd** → "Need systematic training" → **08_training**
**08_training** → "MLPs hit limits on images" → **09_spatial**
**09_spatial** → "Training is too slow" → **10_dataloader**

## Technical Changes

### Module Directory Renaming
- `06_autograd` → `07_autograd`
- `07_dataloader` → `10_dataloader`
- `08_optimizers` → `06_optimizers`
- `10_training` → `08_training`
- `09_spatial` → `09_spatial` (no change)

### System Integration Updates
- **MODULE_TO_CHECKPOINT mapping**: Updated in tito/commands/export.py
- **Test directories**: Renamed module_XX directories to match new numbers
- **Documentation**: Updated all references in MD files and agent configurations
- **CLI integration**: Updated next-steps suggestions for proper flow

### Agent Configuration Updates
- **Quality Assurance**: Updated module audit status with new numbers
- **Module Developer**: Updated work tracking with new sequence
- **Documentation**: Updated MASTER_PLAN_OF_RECORD.md with beautiful progression

## Educational Benefits

1. **Inevitable Discovery**: Each module naturally leads to the next
2. **Cognitive Load**: Concepts introduced exactly when needed
3. **Motivation**: Students understand WHY each tool is necessary
4. **Synthesis**: Everything flows toward complete ML systems understanding
5. **Professional Alignment**: Matches real ML engineering workflows

## Quality Assurance

-  All CLI commands still function
-  Checkpoint system mappings updated
-  Documentation consistency maintained
-  Test directory structure aligned
-  Agent configurations synchronized

**Impact**: This reordering transforms TinyTorch from a collection of modules into a coherent educational journey where each step naturally motivates the next, creating optimal conditions for deep learning systems understanding.
2025-09-24 15:56:47 -04:00

336 lines
14 KiB
Python

"""
Module 07: Progressive Integration Tests
Tests that Module 07 (Attention) works correctly AND that the entire prior stack works.
DEPENDENCY CHAIN: 01_setup → 02_tensor → 03_activations → 04_layers → 05_dense → 06_spatial → 07_attention
This is where attention mechanisms enable sequence understanding.
"""
import numpy as np
import sys
from pathlib import Path
# Add project root to path
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
class TestPriorStackStillWorking:
"""Quick regression checks that prior modules (01→06) still work."""
def test_foundation_stack_stable(self):
"""Verify foundation stack (01→05) remains stable."""
# Environment (Module 01)
assert sys.version_info >= (3, 8), "Foundation broken: Python version"
# Tensor foundation (Module 02)
try:
from tinytorch.core.tensor import Tensor
t = Tensor([1, 2, 3])
assert t.shape == (3,), "Foundation broken: Tensor creation"
except ImportError:
assert True, "Tensor foundation not implemented yet"
def test_spatial_operations_stable(self):
"""Verify Module 06 (Spatial) operations still work."""
try:
from tinytorch.core.spatial import Conv2D, MaxPool2D
# Basic spatial operations should work
conv = Conv2D(in_channels=3, out_channels=16, kernel_size=3)
pool = MaxPool2D(kernel_size=2)
assert hasattr(conv, 'forward'), "Spatial broken: Conv2D interface"
assert hasattr(pool, 'forward'), "Spatial broken: MaxPool2D interface"
except ImportError:
assert True, "Spatial operations not implemented yet"
class TestModule07AttentionCore:
"""Test Module 07 (Attention) core functionality."""
def test_attention_mechanism_creation(self):
"""Test basic attention mechanism works."""
try:
from tinytorch.core.attention import MultiHeadAttention
from tinytorch.core.tensor import Tensor
# Create attention mechanism
attention = MultiHeadAttention(embed_dim=64, num_heads=8)
# Should have proper components
assert hasattr(attention, 'query_proj'), "Attention broken: No query projection"
assert hasattr(attention, 'key_proj'), "Attention broken: No key projection"
assert hasattr(attention, 'value_proj'), "Attention broken: No value projection"
# Test with sequence input
seq_len, batch_size, embed_dim = 10, 4, 64
x = Tensor(np.random.randn(seq_len, batch_size, embed_dim))
output = attention(x)
assert output.shape == (seq_len, batch_size, embed_dim), "Attention output shape broken"
except ImportError:
assert True, "Attention mechanism not implemented yet"
def test_scaled_dot_product_attention(self):
"""Test core attention computation."""
try:
from tinytorch.core.attention import scaled_dot_product_attention
from tinytorch.core.tensor import Tensor
# Attention inputs: queries, keys, values
seq_len, embed_dim = 8, 16
Q = Tensor(np.random.randn(seq_len, embed_dim))
K = Tensor(np.random.randn(seq_len, embed_dim))
V = Tensor(np.random.randn(seq_len, embed_dim))
# Compute attention
output, attention_weights = scaled_dot_product_attention(Q, K, V)
assert output.shape == V.shape, "Attention output shape wrong"
assert attention_weights.shape == (seq_len, seq_len), "Attention weights shape wrong"
# Attention weights should sum to 1 across keys
weight_sums = np.sum(attention_weights.data, axis=1)
assert np.allclose(weight_sums, 1.0), "Attention weights don't sum to 1"
except ImportError:
assert True, "Scaled dot-product attention not implemented yet"
class TestProgressiveStackIntegration:
"""Test that the complete stack (01→07) works together."""
def test_neural_network_with_attention(self):
"""Test neural network enhanced with attention."""
try:
from tinytorch.core.tensor import Tensor
from tinytorch.core.layers import Dense
from tinytorch.core.activations import ReLU
from tinytorch.core.attention import MultiHeadAttention
# Build network: dense → attention → dense
encoder = Dense(64, 64)
attention = MultiHeadAttention(embed_dim=64, num_heads=8)
decoder = Dense(64, 10)
relu = ReLU()
# Sequence input
seq_len, batch_size, input_dim = 12, 4, 64
x = Tensor(np.random.randn(seq_len, batch_size, input_dim))
# Forward pass through network with attention
h = relu(encoder(x)) # Dense processing
attn_out = attention(h) # Attention mechanism
output = decoder(attn_out) # Final projection
assert output.shape == (seq_len, batch_size, 10), "Network with attention broken"
except ImportError:
assert True, "Neural network with attention not ready yet"
def test_transformer_block_capability(self):
"""Test building transformer-style blocks."""
try:
from tinytorch.core.attention import MultiHeadAttention
from tinytorch.core.layers import Dense
from tinytorch.core.activations import ReLU
from tinytorch.core.tensor import Tensor
# Transformer block components
attention = MultiHeadAttention(embed_dim=128, num_heads=8)
ff1 = Dense(128, 512)
ff2 = Dense(512, 128)
relu = ReLU()
# Input sequence
seq_len, batch_size, embed_dim = 16, 2, 128
x = Tensor(np.random.randn(seq_len, batch_size, embed_dim))
# Transformer block: attention + feedforward
attn_out = attention(x)
ff_out = ff2(relu(ff1(attn_out)))
# Residual connection (if implemented)
if hasattr(x, '__add__'):
output = x + ff_out # Residual connection
else:
output = ff_out
assert output.shape == x.shape, "Transformer block broken"
except ImportError:
assert True, "Transformer block capability not ready yet"
class TestSequenceUnderstandingCapability:
"""Test that attention enables sequence understanding."""
def test_sequence_to_sequence_capability(self):
"""Test sequence-to-sequence processing."""
try:
from tinytorch.core.attention import MultiHeadAttention
from tinytorch.core.tensor import Tensor
# Encoder-decoder style processing
encoder_attention = MultiHeadAttention(embed_dim=64, num_heads=4)
decoder_attention = MultiHeadAttention(embed_dim=64, num_heads=4)
# Source and target sequences
src_len, tgt_len, batch_size, embed_dim = 10, 8, 2, 64
src = Tensor(np.random.randn(src_len, batch_size, embed_dim))
tgt = Tensor(np.random.randn(tgt_len, batch_size, embed_dim))
# Encode source sequence
encoded = encoder_attention(src)
# Decode target sequence (with potential cross-attention)
if hasattr(decoder_attention, 'cross_attention'):
decoded = decoder_attention(tgt, encoded)
else:
decoded = decoder_attention(tgt)
assert encoded.shape == src.shape, "Sequence encoding broken"
assert decoded.shape == tgt.shape, "Sequence decoding broken"
except ImportError:
assert True, "Sequence-to-sequence not ready yet"
def test_attention_pattern_analysis(self):
"""Test that attention creates meaningful patterns."""
try:
from tinytorch.core.attention import scaled_dot_product_attention
from tinytorch.core.tensor import Tensor
# Create sequence with clear patterns
seq_len, embed_dim = 6, 8
# Pattern: first and last tokens should attend to each other
pattern_input = np.zeros((seq_len, embed_dim))
pattern_input[0, :] = 1.0 # First token
pattern_input[-1, :] = 1.0 # Last token
Q = Tensor(pattern_input)
K = Tensor(pattern_input)
V = Tensor(pattern_input)
output, attention_weights = scaled_dot_product_attention(Q, K, V)
# Check attention patterns make sense
# First token should attend strongly to last token
first_to_last = attention_weights.data[0, -1]
last_to_first = attention_weights.data[-1, 0]
# These should be among the highest attention weights
assert first_to_last > 0.1, "Attention pattern not detected"
assert last_to_first > 0.1, "Attention pattern not detected"
except ImportError:
assert True, "Attention pattern analysis not ready yet"
class TestNLPReadiness:
"""Test readiness for NLP applications."""
def test_language_modeling_architecture(self):
"""Test architecture suitable for language modeling."""
try:
from tinytorch.core.attention import MultiHeadAttention
from tinytorch.core.layers import Dense
from tinytorch.core.tensor import Tensor
# Language model components
vocab_size, embed_dim, seq_len = 1000, 256, 32
# Embedding layer (simplified)
embedding = Dense(vocab_size, embed_dim)
# Attention layers
attention1 = MultiHeadAttention(embed_dim=embed_dim, num_heads=8)
attention2 = MultiHeadAttention(embed_dim=embed_dim, num_heads=8)
# Output projection
output_proj = Dense(embed_dim, vocab_size)
# Token sequence (as embeddings)
batch_size = 4
tokens = Tensor(np.random.randint(0, vocab_size, (seq_len, batch_size)))
# Simple embedding lookup (simplified)
if hasattr(embedding, 'embedding_lookup'):
x = embedding.embedding_lookup(tokens)
else:
# Simplified: random embeddings
x = Tensor(np.random.randn(seq_len, batch_size, embed_dim))
# Transformer layers
h1 = attention1(x)
h2 = attention2(h1)
# Output logits
logits = output_proj(h2)
assert logits.shape == (seq_len, batch_size, vocab_size), "Language model architecture broken"
except ImportError:
assert True, "Language modeling architecture not ready yet"
class TestRegressionPrevention:
"""Ensure previous modules still work after Module 07 development."""
def test_no_foundation_regression(self):
"""Verify foundation stack (01→05) unchanged."""
# Environment should remain stable
assert sys.version_info.major >= 3, "Foundation: Python detection broken"
# Project structure should remain intact
project_root = Path(__file__).parent.parent.parent
assert project_root.exists(), "Foundation: Project structure broken"
def test_no_spatial_regression(self):
"""Verify spatial operations (Module 06) unchanged."""
try:
from tinytorch.core.spatial import Conv2D
# Spatial operations should still work
conv = Conv2D(in_channels=1, out_channels=8, kernel_size=3)
assert hasattr(conv, 'forward'), "Spatial regression: Conv2D broken"
except ImportError:
# If not implemented, that's fine
# But numpy should still work (from foundation)
import numpy as np
arr = np.array([1, 2, 3])
assert arr.shape == (3,), "Spatial regression: Numpy foundation broken"
def test_progressive_stability(self):
"""Test the progressive stack is stable through attention."""
# Stack should be stable through: Setup → Tensor → Activations → Layers → Dense → Spatial → Attention
# Setup level
import numpy as np
assert np is not None, "Setup level broken"
# Foundation level (if available)
try:
from tinytorch.core.tensor import Tensor
from tinytorch.core.layers import Dense
# Should still be able to build neural networks
layer = Dense(10, 5)
x = Tensor(np.random.randn(4, 10))
output = layer(x)
assert output.shape == (4, 5), "Foundation level broken"
except ImportError:
pass # Not implemented yet
# Attention level (if available)
try:
from tinytorch.core.attention import MultiHeadAttention
attention = MultiHeadAttention(embed_dim=32, num_heads=4)
assert callable(attention), "Attention level broken"
except ImportError:
pass # Not implemented yet