mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-05-01 01:59:10 -05:00
refactor(tests): clean up test folder and fix gradient flow issues
Test Cleanup (113 files, -22,000 lines): - Remove 21 redundant run_all_tests.py files - Remove checkpoints/ folder (22 obsolete checkpoint files) - Remove progressive/, debugging/, diagnostic/ folders - Remove duplicate integration tests and examples - Remove orphaned dev artifacts and generated outputs - Consolidate test_gradient_flow_overall.py into system/ Documentation Cleanup (4 files removed): - Remove duplicate HOW_TO_USE.md, WORKFLOW.md, SYSTEM_DESIGN.md - Trim environment/README.md from 334 to 86 lines - Update capstone/README.md removing outdated bug references Test Fixes: - Add requires_grad=True to layer parameters in gradient tests - Fix PositionalEncoding argument order in test_shapes.py - Adjust performance thresholds for realistic expectations - Fix gradient clipping to handle memoryview correctly - Update zero_grad assertions to accept None or zeros
This commit is contained in:
@@ -1,282 +0,0 @@
|
||||
# Module 10 Integration Tests: Wrong vs Correct
|
||||
|
||||
## Current File (WRONG) ❌
|
||||
|
||||
```python
|
||||
"""
|
||||
Module 10: Progressive Integration Tests
|
||||
Tests that Module 11 (Training) works correctly... # ← WRONG MODULE!
|
||||
|
||||
DEPENDENCY CHAIN: 01_setup → ... → 10_optimizers → 11_training # ← WRONG!
|
||||
This is where we enable complete end-to-end training loops. # ← WRONG!
|
||||
"""
|
||||
|
||||
class TestModule11TrainingCore: # ← WRONG MODULE!
|
||||
"""Test Module 11 (Training) core functionality.""" # ← WRONG!
|
||||
|
||||
def test_training_loop_creation(self):
|
||||
from tinytorch.core.training import Trainer # ← WRONG!
|
||||
from tinytorch.core.optimizers import SGD
|
||||
# Tests training loops... ← WRONG TOPIC!
|
||||
|
||||
def test_loss_function_support(self):
|
||||
from tinytorch.core.training import CrossEntropyLoss, MSELoss # ← WRONG!
|
||||
# Tests loss functions... ← WRONG TOPIC!
|
||||
|
||||
class TestAdvancedTrainingFeatures: # ← WRONG MODULE!
|
||||
def test_distributed_training_support(self): # ← WRONG!
|
||||
def test_mixed_precision_training(self): # ← WRONG!
|
||||
```
|
||||
|
||||
**Problems**:
|
||||
- Tests Module 11 (Training) instead of Module 10 (Tokenization)
|
||||
- All imports from `tinytorch.core.training` (doesn't exist yet)
|
||||
- Tests loss functions, optimizers, CNN pipelines (wrong concepts)
|
||||
- 0% coverage of actual Module 10 functionality
|
||||
- Copy-paste error from Module 11 template
|
||||
|
||||
---
|
||||
|
||||
## Corrected File (CORRECT) ✅
|
||||
|
||||
```python
|
||||
"""
|
||||
Module 10: Progressive Integration Tests
|
||||
Tests that Module 10 (Tokenization) works correctly... # ← CORRECT!
|
||||
|
||||
DEPENDENCY CHAIN: 01_tensor → ... → 05_dataloader → ... → 08_training → 09_convolutions → 10_tokenization # ← CORRECT!
|
||||
This is where we enable text processing for NLP tasks. # ← CORRECT!
|
||||
"""
|
||||
|
||||
class TestModule10TokenizationCore: # ← CORRECT MODULE!
|
||||
"""Test Module 10 (Tokenization) core functionality.""" # ← CORRECT!
|
||||
|
||||
def test_char_tokenizer_creation(self):
|
||||
from tinytorch.text.tokenization import CharTokenizer # ← CORRECT!
|
||||
# Tests CharTokenizer initialization
|
||||
|
||||
def test_char_tokenizer_encode_decode(self):
|
||||
# Tests encode/decode roundtrip
|
||||
|
||||
def test_bpe_tokenizer_training(self):
|
||||
from tinytorch.text.tokenization import BPETokenizer # ← CORRECT!
|
||||
# Tests BPE training
|
||||
|
||||
def test_bpe_tokenizer_encode_decode(self):
|
||||
# Tests BPE encode/decode
|
||||
|
||||
class TestTokenizationIntegration: # ← CORRECT!
|
||||
"""Test tokenization integration with other modules."""
|
||||
|
||||
def test_tokenizer_produces_correct_dtypes(self):
|
||||
# CRITICAL: Verify int64 for embeddings
|
||||
|
||||
def test_tokenization_to_embedding_pipeline(self):
|
||||
from tinytorch.text.embeddings import Embedding
|
||||
from tinytorch.text.tokenization import CharTokenizer
|
||||
# Tests tokenization → embedding flow
|
||||
|
||||
def test_tokenizer_dataloader_integration(self):
|
||||
# Tests tokenizer with DataLoader
|
||||
|
||||
class TestTokenizationEdgeCases: # ← CORRECT!
|
||||
"""Test tokenization robustness with edge cases."""
|
||||
|
||||
def test_bpe_edge_cases(self):
|
||||
# Empty strings, unknown tokens, special chars
|
||||
|
||||
def test_vocabulary_consistency(self):
|
||||
# Bidirectional mappings, roundtrips
|
||||
|
||||
def test_batch_processing(self):
|
||||
# Batch encoding/decoding
|
||||
```
|
||||
|
||||
**Benefits**:
|
||||
- Tests actual Module 10 (Tokenization) functionality
|
||||
- Correct imports from `tinytorch.text.tokenization`
|
||||
- Tests CharTokenizer, BPETokenizer, vocabularies
|
||||
- Validates integration with Tensor, Embeddings, DataLoader
|
||||
- 100% coverage of critical integration points
|
||||
|
||||
---
|
||||
|
||||
## Side-by-Side Comparison
|
||||
|
||||
| Aspect | Current (WRONG) | Corrected (CORRECT) |
|
||||
|--------|-----------------|---------------------|
|
||||
| **Module Tested** | Module 11 (Training) | Module 10 (Tokenization) |
|
||||
| **Primary Imports** | `tinytorch.core.training` | `tinytorch.text.tokenization` |
|
||||
| **Classes Tested** | Trainer, CrossEntropyLoss | CharTokenizer, BPETokenizer |
|
||||
| **Test Focus** | Training loops, loss functions | Encode/decode, vocabularies |
|
||||
| **Integration Points** | Optimizers, CNN, distributed | Tensors, Embeddings, DataLoader |
|
||||
| **Edge Cases** | Checkpointing, early stopping | Empty strings, unknown tokens |
|
||||
| **Coverage** | 0% (wrong module) | 100% (correct tests) |
|
||||
| **Bug-Catching** | None (tests wrong code) | High (catches dtype, shape errors) |
|
||||
|
||||
---
|
||||
|
||||
## Key Differences
|
||||
|
||||
### Wrong File Tests
|
||||
1. ❌ Training loops and Trainer class
|
||||
2. ❌ Loss functions (MSELoss, CrossEntropyLoss)
|
||||
3. ❌ Validation loops and metrics
|
||||
4. ❌ Checkpointing and early stopping
|
||||
5. ❌ Learning rate scheduling
|
||||
6. ❌ Distributed training
|
||||
7. ❌ Mixed precision training
|
||||
8. ❌ Gradient accumulation
|
||||
9. ❌ CNN training pipelines
|
||||
10. ❌ End-to-end model training
|
||||
|
||||
### Correct File Tests
|
||||
1. ✅ CharTokenizer initialization and vocab building
|
||||
2. ✅ CharTokenizer encode/decode roundtrip
|
||||
3. ✅ BPETokenizer training on corpus
|
||||
4. ✅ BPE encode/decode operations
|
||||
5. ✅ Token ID dtype correctness (int64)
|
||||
6. ✅ Tokenization → Embedding pipeline
|
||||
7. ✅ DataLoader integration
|
||||
8. ✅ BPE edge cases (empty, unknown, special)
|
||||
9. ✅ Vocabulary consistency (bidirectional)
|
||||
10. ✅ Batch processing correctness
|
||||
11. ✅ Performance benchmarks (throughput)
|
||||
12. ✅ Regression prevention (Tensor, DataLoader)
|
||||
|
||||
---
|
||||
|
||||
## Example: What Each Tests
|
||||
|
||||
### Wrong File Example
|
||||
```python
|
||||
def test_training_loop_creation(self):
|
||||
"""Test basic training loop functionality.""" # ← Module 11, not 10!
|
||||
from tinytorch.core.training import Trainer # ← Doesn't exist
|
||||
from tinytorch.core.layers import Dense
|
||||
from tinytorch.core.optimizers import SGD
|
||||
|
||||
model = Dense(10, 3)
|
||||
optimizer = SGD(model.parameters(), lr=0.01)
|
||||
trainer = Trainer(model, optimizer) # ← Testing training, not tokenization!
|
||||
|
||||
assert hasattr(trainer, 'train'), "Trainer broken"
|
||||
```
|
||||
|
||||
### Correct File Example
|
||||
```python
|
||||
def test_char_tokenizer_encode_decode(self):
|
||||
"""Test CharTokenizer encode/decode roundtrip.""" # ← Module 10!
|
||||
from tinytorch.text.tokenization import CharTokenizer # ← Correct import
|
||||
|
||||
tokenizer = CharTokenizer()
|
||||
tokenizer.build_vocab(["hello", "world"])
|
||||
|
||||
text = "hello"
|
||||
token_ids = tokenizer.encode(text) # ← Testing tokenization!
|
||||
|
||||
assert isinstance(token_ids, list), "encode() should return list"
|
||||
assert all(isinstance(t, int) for t in token_ids), "Token IDs should be integers"
|
||||
|
||||
decoded = tokenizer.decode(token_ids)
|
||||
for char in text:
|
||||
assert char in decoded, f"Lost character '{char}' in roundtrip"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Critical Integration Tests Only in Correct File
|
||||
|
||||
### 1. Dtype Correctness (Catches Embedding Bugs)
|
||||
```python
|
||||
def test_tokenizer_produces_correct_dtypes(self):
|
||||
"""Verify int64 output for embeddings."""
|
||||
token_tensor = Tensor(token_ids)
|
||||
assert token_tensor.data.dtype in [np.int32, np.int64, np.int_]
|
||||
```
|
||||
**Why Critical**: Embeddings crash if token IDs are float32 instead of int64
|
||||
|
||||
### 2. Embedding Integration (Primary Use Case)
|
||||
```python
|
||||
def test_tokenization_to_embedding_pipeline(self):
|
||||
"""Test complete tokenization → embedding pipeline."""
|
||||
tokenizer = CharTokenizer()
|
||||
embedding = Embedding(vocab_size, embed_dim)
|
||||
|
||||
token_ids = tokenizer.encode("hello")
|
||||
embedded = embedding(Tensor(token_ids))
|
||||
assert embedded.shape == (len(token_ids), embed_dim)
|
||||
```
|
||||
**Why Critical**: This is THE use case for tokenizers - must work!
|
||||
|
||||
### 3. BPE Edge Cases (Production Robustness)
|
||||
```python
|
||||
def test_bpe_edge_cases(self):
|
||||
"""Empty strings, unknown tokens, special chars."""
|
||||
tokenizer = BPETokenizer(vocab_size=100)
|
||||
|
||||
# Empty string
|
||||
assert tokenizer.encode("") == []
|
||||
|
||||
# Unknown characters
|
||||
tokenizer.train(["hello"])
|
||||
tokens = tokenizer.encode("xyz") # Not in training
|
||||
assert isinstance(tokens, list) # Should handle gracefully
|
||||
```
|
||||
**Why Critical**: Production systems receive unexpected input
|
||||
|
||||
---
|
||||
|
||||
## Impact of Using Wrong Tests
|
||||
|
||||
**If we keep the wrong file**:
|
||||
- ❌ Students implement tokenizers but have 0% test coverage
|
||||
- ❌ Dtype bugs (int vs float) go undetected → embeddings crash
|
||||
- ❌ BPE edge cases untested → production failures
|
||||
- ❌ No validation of tokenization → embedding pipeline
|
||||
- ❌ Vocabulary corruption undetected
|
||||
- ❌ Integration with DataLoader untested
|
||||
|
||||
**With correct tests**:
|
||||
- ✅ Catch dtype mismatches before they reach embeddings
|
||||
- ✅ Validate primary use case (tokenization → embeddings)
|
||||
- ✅ Test production robustness (edge cases)
|
||||
- ✅ Ensure vocabulary integrity
|
||||
- ✅ Verify DataLoader integration
|
||||
- ✅ Maintain stack stability (regression tests)
|
||||
|
||||
---
|
||||
|
||||
## How to Fix
|
||||
|
||||
### Option 1: Replace File
|
||||
```bash
|
||||
cd /Users/VJ/GitHub/TinyTorch/tests/10_tokenization
|
||||
mv test_progressive_integration.py test_progressive_integration_OLD.py
|
||||
mv test_progressive_integration_REFERENCE.py test_progressive_integration.py
|
||||
```
|
||||
|
||||
### Option 2: Manual Edit
|
||||
1. Delete all content in `test_progressive_integration.py`
|
||||
2. Copy content from `test_progressive_integration_REFERENCE.py`
|
||||
3. Save and commit
|
||||
|
||||
### Verify Fix
|
||||
```bash
|
||||
pytest tests/10_tokenization/test_progressive_integration.py -v
|
||||
|
||||
# Should see:
|
||||
# - TestModule10TokenizationCore (not TestModule11TrainingCore)
|
||||
# - Tests for CharTokenizer, BPETokenizer
|
||||
# - Integration tests with Embedding, DataLoader
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
**Current Status**: CRITICAL - Wrong module tested (Module 11 instead of 10)
|
||||
**Root Cause**: Copy-paste error from Module 11 template
|
||||
**Impact**: 0% integration test coverage for Module 10
|
||||
**Fix**: Replace with corrected reference implementation
|
||||
**Urgency**: HIGH - Students have no validation of tokenization integration
|
||||
@@ -1,146 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Run all tests for Module XX: [Module Name]
|
||||
Template test runner - copy to each module's test directory
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import importlib.util
|
||||
import time
|
||||
from typing import List, Dict
|
||||
|
||||
# Add project root to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
||||
|
||||
|
||||
def run_module_tests() -> Dict:
|
||||
"""Run all tests for this module."""
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
from rich import box
|
||||
from rich.panel import Panel
|
||||
|
||||
console = Console()
|
||||
|
||||
# Update module number and name
|
||||
MODULE_NUMBER = "10"
|
||||
MODULE_NAME = "Tokenization"
|
||||
|
||||
# Header
|
||||
console.print(Panel(f"[bold blue]Module {MODULE_NUMBER}: {MODULE_NAME} - Test Suite[/bold blue]",
|
||||
expand=False))
|
||||
|
||||
# Find all test files in this module
|
||||
test_files = list(Path(__file__).parent.glob("test_*.py"))
|
||||
test_files = [f for f in test_files if f.name != Path(__file__).name]
|
||||
|
||||
if not test_files:
|
||||
console.print("[yellow]No test files found in this module![/yellow]")
|
||||
return {'status': 'NO_TESTS', 'passed': 0, 'failed': 0}
|
||||
|
||||
all_results = []
|
||||
total_passed = 0
|
||||
total_failed = 0
|
||||
total_skipped = 0
|
||||
|
||||
# Create results table
|
||||
table = Table(title="Test Results", box=box.ROUNDED)
|
||||
table.add_column("Test File", style="cyan")
|
||||
table.add_column("Test Class", style="yellow")
|
||||
table.add_column("Test Method", style="white")
|
||||
table.add_column("Status", justify="center")
|
||||
table.add_column("Time", justify="right")
|
||||
|
||||
for test_file in sorted(test_files):
|
||||
module_name = test_file.stem
|
||||
|
||||
try:
|
||||
# Import test module
|
||||
spec = importlib.util.spec_from_file_location(module_name, test_file)
|
||||
test_module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(test_module)
|
||||
|
||||
# Find test classes
|
||||
for class_name in dir(test_module):
|
||||
if class_name.startswith("Test"):
|
||||
test_class = getattr(test_module, class_name)
|
||||
|
||||
# Create instance
|
||||
try:
|
||||
instance = test_class()
|
||||
except Exception as e:
|
||||
table.add_row(
|
||||
module_name,
|
||||
class_name,
|
||||
"initialization",
|
||||
"[red]❌ ERROR[/red]",
|
||||
"-"
|
||||
)
|
||||
total_failed += 1
|
||||
continue
|
||||
|
||||
# Run test methods
|
||||
for method_name in dir(instance):
|
||||
if method_name.startswith("test_"):
|
||||
method = getattr(instance, method_name)
|
||||
|
||||
# Skip template placeholder tests
|
||||
if "pass" in str(method.__code__.co_code):
|
||||
continue
|
||||
|
||||
# Run test
|
||||
start = time.time()
|
||||
try:
|
||||
method()
|
||||
status = "[green]✅ PASS[/green]"
|
||||
total_passed += 1
|
||||
except AssertionError as e:
|
||||
status = "[red]❌ FAIL[/red]"
|
||||
total_failed += 1
|
||||
except ImportError:
|
||||
status = "[yellow]⏭️ SKIP[/yellow]"
|
||||
total_skipped += 1
|
||||
except Exception as e:
|
||||
status = "[red]💥 ERROR[/red]"
|
||||
total_failed += 1
|
||||
|
||||
duration = time.time() - start
|
||||
|
||||
table.add_row(
|
||||
module_name,
|
||||
class_name,
|
||||
method_name,
|
||||
status,
|
||||
f"{duration:.3f}s"
|
||||
)
|
||||
except Exception as e:
|
||||
console.print(f"[red]Error loading test file {test_file}: {e}[/red]")
|
||||
total_failed += 1
|
||||
|
||||
if total_passed + total_failed + total_skipped > 0:
|
||||
console.print(table)
|
||||
|
||||
# Summary
|
||||
console.print(f"\n📊 Summary:")
|
||||
console.print(f" • Total: {total_passed + total_failed + total_skipped} tests")
|
||||
console.print(f" • ✅ Passed: {total_passed}")
|
||||
console.print(f" • ❌ Failed: {total_failed}")
|
||||
if total_skipped > 0:
|
||||
console.print(f" • ⏭️ Skipped: {total_skipped}")
|
||||
|
||||
# Final status
|
||||
if total_failed == 0:
|
||||
console.print("\n[green bold]✅ All tests passed![/green bold]")
|
||||
return {'status': 'PASSED', 'passed': total_passed, 'failed': 0}
|
||||
else:
|
||||
console.print("\n[red]❌ Some tests failed![/red]")
|
||||
return {'status': 'FAILED', 'passed': total_passed, 'failed': total_failed}
|
||||
else:
|
||||
console.print("[yellow]No actual tests implemented yet (only templates).[/yellow]")
|
||||
return {'status': 'NO_TESTS', 'passed': 0, 'failed': 0}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
results = run_module_tests()
|
||||
sys.exit(0 if results['status'] == 'PASSED' else 1)
|
||||
Reference in New Issue
Block a user