From d43b6c5e33d0558376ea032b1a0e749dd04094c7 Mon Sep 17 00:00:00 2001 From: Vijay Janapa Reddi Date: Sun, 14 Dec 2025 13:38:00 -0500 Subject: [PATCH] fix: align test directory numbering with module numbering (15-17) - Renamed tests/15_memoization -> tests/17_memoization - Renamed tests/16_quantization -> tests/15_quantization - Renamed tests/17_compression -> tests/16_compression Also expanded test coverage: - Module 15 (Quantization): Added 4 new tests for edge cases, ordering, negative values, QuantizedLinear - Module 16 (Compression): Added 5 new tests for target sparsity, large weight preservation, structured pruning - Module 17 (Memoization): Added 4 new tests for multiple tokens, multiple layers, seq_pos tracking, error handling All 24 tests for modules 15-17 now pass. --- .../15_memoization/test_kv_cache_core.py | 91 ------ .../run_all_tests.py | 0 .../test_quantization_integration.py | 0 .../15_quantization/test_quantizer_core.py | 206 ++++++++++++++ .../run_all_tests.py | 0 .../test_compression_integration.py | 0 .../16_compression/test_compressor_core.py | 262 ++++++++++++++++++ .../16_quantization/test_quantizer_core.py | 94 ------- .../17_compression/test_compressor_core.py | 111 -------- .../run_all_tests.py | 0 .../17_memoization/test_kv_cache_core.py | 212 ++++++++++++++ .../test_progressive_integration.py | 0 .../test_tinygpt_integration.py | 0 13 files changed, 680 insertions(+), 296 deletions(-) delete mode 100644 tinytorch/tests/15_memoization/test_kv_cache_core.py rename tinytorch/tests/{16_quantization => 15_quantization}/run_all_tests.py (100%) rename tinytorch/tests/{16_quantization => 15_quantization}/test_quantization_integration.py (100%) create mode 100644 tinytorch/tests/15_quantization/test_quantizer_core.py rename tinytorch/tests/{17_compression => 16_compression}/run_all_tests.py (100%) rename tinytorch/tests/{17_compression => 16_compression}/test_compression_integration.py (100%) create mode 100644 tinytorch/tests/16_compression/test_compressor_core.py delete mode 100644 tinytorch/tests/16_quantization/test_quantizer_core.py delete mode 100644 tinytorch/tests/17_compression/test_compressor_core.py rename tinytorch/tests/{15_memoization => 17_memoization}/run_all_tests.py (100%) create mode 100644 tinytorch/tests/17_memoization/test_kv_cache_core.py rename tinytorch/tests/{15_memoization => 17_memoization}/test_progressive_integration.py (100%) rename tinytorch/tests/{15_memoization => 17_memoization}/test_tinygpt_integration.py (100%) diff --git a/tinytorch/tests/15_memoization/test_kv_cache_core.py b/tinytorch/tests/15_memoization/test_kv_cache_core.py deleted file mode 100644 index e3ea85836..000000000 --- a/tinytorch/tests/15_memoization/test_kv_cache_core.py +++ /dev/null @@ -1,91 +0,0 @@ -""" -Module 15: KV Cache (Memoization) Core Tests -============================================= - -These tests verify that KV caching works for efficient inference. - -WHY THESE TESTS MATTER: ------------------------ -KV caching is essential for efficient text generation: -- Without cache: O(n²) per token (recompute all attention) -- With cache: O(n) per token (reuse previous K,V) - -For generating 100 tokens, that's 100x speedup! - -WHAT WE TEST: -------------- -1. KVCache can store key-value pairs -2. Cache retrieval returns stored values -3. Cache works across multiple layers -""" - -import pytest -import numpy as np -import sys -from pathlib import Path - -sys.path.insert(0, str(Path(__file__).parent.parent.parent)) - -from tinytorch.core.tensor import Tensor - - -class TestKVCacheBasics: - """Test basic KV cache functionality.""" - - def test_kv_cache_import(self): - """ - WHAT: Verify KVCache can be imported. - - WHY: Basic sanity check. - """ - try: - from tinytorch.perf.memoization import KVCache - assert KVCache is not None - except ImportError as e: - pytest.skip(f"KVCache not yet exported: {e}") - - def test_kv_cache_can_instantiate(self): - """ - WHAT: Verify KVCache can be created. - """ - try: - from tinytorch.perf.memoization import KVCache - # KVCache needs: batch_size, max_seq_len, num_layers, num_heads, head_dim - cache = KVCache(batch_size=1, max_seq_len=128, num_layers=2, num_heads=4, head_dim=16) - assert cache is not None - except ImportError: - pytest.skip("KVCache not yet exported") - - def test_kv_cache_stores_and_retrieves(self): - """ - WHAT: Verify cache can store and retrieve K,V tensors. - - WHY: The whole point of the cache is to reuse computed values. - If storage/retrieval doesn't work, there's no speedup. - """ - try: - from tinytorch.perf.memoization import KVCache - except ImportError: - pytest.skip("KVCache not yet exported") - - # Create cache with proper dimensions - cache = KVCache(batch_size=1, max_seq_len=128, num_layers=2, num_heads=4, head_dim=16) - - # Store some K,V pairs (cache expects one token at a time during generation) - layer_idx = 0 - K = Tensor(np.random.randn(1, 4, 1, 16)) # (batch, heads, 1, dim) - one new token - V = Tensor(np.random.randn(1, 4, 1, 16)) - - cache.update(layer_idx, K, V) - - # Retrieve - cached_K, cached_V = cache.get(layer_idx) - - assert cached_K is not None, "Cache didn't store K" - assert cached_V is not None, "Cache didn't store V" - assert np.allclose(cached_K.data, K.data), "Retrieved K doesn't match stored" - assert np.allclose(cached_V.data, V.data), "Retrieved V doesn't match stored" - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/tinytorch/tests/16_quantization/run_all_tests.py b/tinytorch/tests/15_quantization/run_all_tests.py similarity index 100% rename from tinytorch/tests/16_quantization/run_all_tests.py rename to tinytorch/tests/15_quantization/run_all_tests.py diff --git a/tinytorch/tests/16_quantization/test_quantization_integration.py b/tinytorch/tests/15_quantization/test_quantization_integration.py similarity index 100% rename from tinytorch/tests/16_quantization/test_quantization_integration.py rename to tinytorch/tests/15_quantization/test_quantization_integration.py diff --git a/tinytorch/tests/15_quantization/test_quantizer_core.py b/tinytorch/tests/15_quantization/test_quantizer_core.py new file mode 100644 index 000000000..9a6a9061b --- /dev/null +++ b/tinytorch/tests/15_quantization/test_quantizer_core.py @@ -0,0 +1,206 @@ +""" +Module 15: Quantization Core Tests +=================================== + +These tests verify that quantization reduces model size correctly. + +WHY THESE TESTS MATTER: +----------------------- +Quantization converts FP32 (4 bytes) to INT8 (1 byte) = 4x smaller model. +If quantization is broken: +- Model stays big (defeats the purpose) +- Accuracy drops too much (unusable) +- Values overflow (numerical errors) + +WHAT WE TEST: +------------- +1. Quantization produces INT8 values +2. Dequantization recovers approximate original values +3. Model size actually decreases +""" + +import pytest +import numpy as np +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent.parent)) + +from tinytorch.core.tensor import Tensor + + +class TestQuantizationBasics: + """Test basic quantization functionality.""" + + def test_quantizer_import(self): + """Verify Quantizer can be imported.""" + try: + from tinytorch.perf.quantization import Quantizer + assert Quantizer is not None + except ImportError as e: + pytest.skip(f"Quantizer not yet exported: {e}") + + def test_quantize_produces_int8(self): + """ + WHAT: Verify quantization produces INT8 values in [-128, 127]. + + WHY: INT8 is the target representation. Values outside this + range would overflow and produce garbage. + """ + try: + from tinytorch.perf.quantization import Quantizer + except ImportError: + pytest.skip("Quantizer not yet exported") + + # Create FP32 tensor + fp32_tensor = Tensor(np.random.randn(10, 10).astype(np.float32)) + + # Quantize + q_tensor, scale, zero_point = Quantizer.quantize_tensor(fp32_tensor) + + # Check INT8 range + assert q_tensor.data.min() >= -128, "Quantized values below INT8 min" + assert q_tensor.data.max() <= 127, "Quantized values above INT8 max" + + def test_dequantize_recovers_approximate_values(self): + """ + WHAT: Verify dequantization recovers values close to original. + + WHY: Quantization is lossy, but should be approximately reversible. + Large errors would destroy model accuracy. + """ + try: + from tinytorch.perf.quantization import Quantizer + except ImportError: + pytest.skip("Quantizer not yet exported") + + # Create FP32 tensor with known values + original = Tensor(np.array([0.5, -0.5, 1.0, -1.0]).astype(np.float32)) + + # Round trip: quantize then dequantize + q_tensor, scale, zero_point = Quantizer.quantize_tensor(original) + recovered = Quantizer.dequantize_tensor(q_tensor, scale, zero_point) + + # Should be close (within ~1% for typical values) + max_error = np.max(np.abs(original.data - recovered.data)) + assert max_error < 0.1, ( + f"Dequantization error too large: {max_error}\n" + f" Original: {original.data}\n" + f" Recovered: {recovered.data}" + ) + + +class TestQuantizationAdvanced: + """Advanced quantization tests for edge cases and accuracy.""" + + def test_quantize_constant_tensor(self): + """ + WHAT: Verify quantization handles constant tensors (all same value). + + WHY: Constant tensors are an edge case where min=max. The algorithm + must handle this gracefully without division by zero. + """ + try: + from tinytorch.perf.quantization import quantize_int8 + except ImportError: + pytest.skip("quantize_int8 not yet exported") + + # All zeros + constant = Tensor(np.zeros((4, 4), dtype=np.float32)) + q_tensor, scale, zero_point = quantize_int8(constant) + + # Should produce valid output without errors + assert q_tensor.data.shape == constant.data.shape, "Shape changed" + + def test_quantize_preserves_relative_ordering(self): + """ + WHAT: Verify quantization preserves relative ordering of values. + + WHY: If [0.1, 0.2, 0.3] becomes [5, 4, 6], the model's predictions + would be garbage. Relative ordering must be preserved. + """ + try: + from tinytorch.perf.quantization import quantize_int8 + except ImportError: + pytest.skip("quantize_int8 not yet exported") + + # Strictly increasing values + original = Tensor(np.array([0.1, 0.2, 0.3, 0.4, 0.5], dtype=np.float32)) + q_tensor, _, _ = quantize_int8(original) + + # Quantized values should be monotonically non-decreasing + q_data = q_tensor.data.astype(np.float32) + for i in range(len(q_data) - 1): + assert q_data[i] <= q_data[i + 1], ( + f"Ordering not preserved: q[{i}]={q_data[i]} > q[{i+1}]={q_data[i+1]}" + ) + + def test_quantize_negative_values(self): + """ + WHAT: Verify quantization handles negative values correctly. + + WHY: Neural network weights are typically centered around zero + with both positive and negative values. + """ + try: + from tinytorch.perf.quantization import quantize_int8, dequantize_int8 + except ImportError: + pytest.skip("Quantization functions not yet exported") + + # Mixed positive and negative + original = Tensor(np.array([-2.0, -1.0, 0.0, 1.0, 2.0], dtype=np.float32)) + q_tensor, scale, zero_point = quantize_int8(original) + recovered = dequantize_int8(q_tensor, scale, zero_point) + + # Original signs should be preserved after round-trip + for i in range(len(original.data)): + orig_sign = np.sign(original.data[i]) + rec_sign = np.sign(recovered.data[i]) + # Zero can go either way due to quantization noise + if orig_sign != 0: + assert orig_sign == rec_sign, ( + f"Sign not preserved for value {original.data[i]}: " + f"recovered {recovered.data[i]}" + ) + + +class TestQuantizedLinear: + """Test the QuantizedLinear layer implementation.""" + + def test_quantized_linear_forward(self): + """ + WHAT: Verify QuantizedLinear produces similar output to regular Linear. + + WHY: Quantized layers should approximate the original behavior. + Large deviations indicate incorrect implementation. + """ + try: + from tinytorch.perf.quantization import QuantizedLinear + from tinytorch.core.layers import Linear + except ImportError: + pytest.skip("QuantizedLinear not yet exported") + + # Create and quantize a linear layer + linear = Linear(4, 3) + q_linear = QuantizedLinear(linear) + + # Forward pass + input_tensor = Tensor(np.random.randn(2, 4).astype(np.float32)) + original_output = linear.forward(input_tensor) + quantized_output = q_linear.forward(input_tensor) + + # Outputs should be similar (within quantization error) + # For INT8, typical error is ~1-5% of the output range + max_error = np.max(np.abs(original_output.data - quantized_output.data)) + output_range = np.max(original_output.data) - np.min(original_output.data) + + # Allow up to 10% relative error for educational implementation + assert max_error < 0.1 * output_range + 0.1, ( + f"QuantizedLinear output differs too much from Linear:\n" + f" Max error: {max_error:.4f}\n" + f" Output range: {output_range:.4f}" + ) + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tinytorch/tests/17_compression/run_all_tests.py b/tinytorch/tests/16_compression/run_all_tests.py similarity index 100% rename from tinytorch/tests/17_compression/run_all_tests.py rename to tinytorch/tests/16_compression/run_all_tests.py diff --git a/tinytorch/tests/17_compression/test_compression_integration.py b/tinytorch/tests/16_compression/test_compression_integration.py similarity index 100% rename from tinytorch/tests/17_compression/test_compression_integration.py rename to tinytorch/tests/16_compression/test_compression_integration.py diff --git a/tinytorch/tests/16_compression/test_compressor_core.py b/tinytorch/tests/16_compression/test_compressor_core.py new file mode 100644 index 000000000..f122b1c07 --- /dev/null +++ b/tinytorch/tests/16_compression/test_compressor_core.py @@ -0,0 +1,262 @@ +""" +Module 16: Compression Core Tests +=================================== + +These tests verify that model compression (pruning) works correctly. + +WHY THESE TESTS MATTER: +----------------------- +Pruning removes unnecessary weights, making models smaller and faster. +If compression is broken: +- Model doesn't get smaller (no benefit) +- Important weights get removed (accuracy crashes) +- Sparsity calculations are wrong (can't measure compression) +""" + +import pytest +import numpy as np +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent.parent)) + +from tinytorch.core.tensor import Tensor +from tinytorch.core.layers import Linear + + +class TestCompressionBasics: + """Test basic compression/pruning functionality.""" + + def test_compressor_import(self): + """Verify Compressor can be imported.""" + try: + from tinytorch.perf.compression import Compressor + assert Compressor is not None + except ImportError as e: + pytest.skip(f"Compressor not yet exported: {e}") + + def test_measure_sparsity(self): + """ + WHAT: Verify sparsity measurement works correctly. + + WHY: Sparsity = fraction of zeros. This is how we measure compression. + 50% sparsity means half the weights are zero. + """ + try: + from tinytorch.perf.compression import Compressor + except ImportError: + pytest.skip("Compressor not yet exported") + + # Create a simple model with known sparsity + class SimpleModel: + def __init__(self): + # Half zeros, half ones = 50% sparsity + self.layer = Linear(4, 4, bias=False) + self.layer.weight.data = np.array([ + [0, 0, 1, 1], + [0, 0, 1, 1], + [0, 0, 1, 1], + [0, 0, 1, 1] + ], dtype=np.float32) + + def parameters(self): + return self.layer.parameters() + + model = SimpleModel() + sparsity = Compressor.measure_sparsity(model) + + # Should be ~50% + assert 0.4 < sparsity < 0.6, ( + f"Sparsity measurement wrong!\n" + f" Expected: ~0.5 (50% zeros)\n" + f" Got: {sparsity}" + ) + + def test_magnitude_prune_increases_sparsity(self): + """ + WHAT: Verify pruning increases the number of zeros. + + WHY: Pruning should set small weights to zero. + After pruning, sparsity should increase. + """ + try: + from tinytorch.perf.compression import Compressor + except ImportError: + pytest.skip("Compressor not yet exported") + + # Create model with random weights (low sparsity) + class SimpleModel: + def __init__(self): + self.layer = Linear(10, 10, bias=False) + + def parameters(self): + return self.layer.parameters() + + model = SimpleModel() + initial_sparsity = Compressor.measure_sparsity(model) + + # Apply pruning + Compressor.magnitude_prune(model, sparsity=0.5) + + final_sparsity = Compressor.measure_sparsity(model) + + assert final_sparsity > initial_sparsity, ( + f"Pruning didn't increase sparsity!\n" + f" Before: {initial_sparsity}\n" + f" After: {final_sparsity}" + ) + + +class TestCompressionAdvanced: + """Advanced compression tests for accuracy and edge cases.""" + + def test_sparsity_achieves_target(self): + """ + WHAT: Verify magnitude pruning achieves approximately target sparsity. + + WHY: If we request 80% sparsity, we should get close to 80% zeros. + Large deviations indicate the pruning algorithm is broken. + """ + try: + from tinytorch.perf.compression import measure_sparsity, magnitude_prune + except ImportError: + pytest.skip("Compression functions not yet exported") + + # Create model + class SimpleModel: + def __init__(self): + self.layer1 = Linear(100, 50, bias=False) + self.layer2 = Linear(50, 25, bias=False) + + def parameters(self): + return self.layer1.parameters() + self.layer2.parameters() + + model = SimpleModel() + target_sparsity = 0.8 # 80% + + # Apply pruning + magnitude_prune(model, sparsity=target_sparsity) + achieved_sparsity = measure_sparsity(model) + + # Should be within 5% of target (sparsity is in percentage) + assert abs(achieved_sparsity - target_sparsity * 100) < 5, ( + f"Sparsity target not achieved!\n" + f" Target: {target_sparsity * 100}%\n" + f" Achieved: {achieved_sparsity:.1f}%" + ) + + def test_pruning_preserves_large_weights(self): + """ + WHAT: Verify that large magnitude weights are preserved during pruning. + + WHY: Magnitude pruning should keep the largest weights. If large + weights are removed, model accuracy would collapse. + """ + try: + from tinytorch.perf.compression import magnitude_prune + except ImportError: + pytest.skip("magnitude_prune not yet exported") + + # Create model with one very large weight + class SimpleModel: + def __init__(self): + self.layer = Linear(4, 4, bias=False) + # Set one weight to be much larger than others + self.layer.weight.data = np.array([ + [0.01, 0.02, 0.01, 0.02], + [0.01, 100.0, 0.01, 0.02], # 100.0 is the largest + [0.01, 0.02, 0.01, 0.02], + [0.01, 0.02, 0.01, 0.02] + ], dtype=np.float32) + + def parameters(self): + return self.layer.parameters() + + model = SimpleModel() + + # Prune 90% of weights + magnitude_prune(model, sparsity=0.9) + + # The largest weight should still be there + assert model.layer.weight.data[1, 1] == 100.0, ( + "Large weight was incorrectly pruned!\n" + f" Expected: 100.0\n" + f" Got: {model.layer.weight.data[1, 1]}" + ) + + def test_zero_sparsity_no_change(self): + """ + WHAT: Verify that 0% sparsity doesn't change the model. + + WHY: This is an edge case - requesting no pruning should leave + all weights unchanged. + """ + try: + from tinytorch.perf.compression import magnitude_prune + except ImportError: + pytest.skip("magnitude_prune not yet exported") + + class SimpleModel: + def __init__(self): + self.layer = Linear(4, 4, bias=False) + + def parameters(self): + return self.layer.parameters() + + model = SimpleModel() + original_weights = model.layer.weight.data.copy() + + # Prune 0% (should be no change) + magnitude_prune(model, sparsity=0.0) + + assert np.allclose(model.layer.weight.data, original_weights), ( + "0% sparsity changed weights when it shouldn't!" + ) + + +class TestStructuredPruning: + """Test structured pruning (removing entire neurons/channels).""" + + def test_structured_prune_import(self): + """Verify structured_prune function can be imported.""" + try: + from tinytorch.perf.compression import structured_prune + assert structured_prune is not None + except ImportError: + pytest.skip("structured_prune not yet exported") + + def test_structured_prune_reduces_effective_neurons(self): + """ + WHAT: Verify structured pruning removes entire rows/columns. + + WHY: Unlike magnitude pruning which creates sparse matrices, + structured pruning removes whole neurons for actual speedups. + """ + try: + from tinytorch.perf.compression import structured_prune + from tinytorch.core.layers import Sequential + except ImportError: + pytest.skip("structured_prune not yet exported") + + # Create model using Sequential (required for structured_prune) + layer = Linear(10, 10, bias=False) + model = Sequential(layer) + + # Apply 50% structured pruning + structured_prune(model, prune_ratio=0.5) + + # Check that some entire columns are now all zeros + # structured_prune zeros out columns (output channels) + weights = layer.weight.data + zero_cols = np.sum(np.all(weights == 0, axis=0)) + + # At least some columns should be completely zeroed + assert zero_cols >= 1, ( + f"Structured pruning didn't zero out entire columns!\n" + f" Expected: At least 1 zero column\n" + f" Got: {zero_cols} zero columns" + ) + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tinytorch/tests/16_quantization/test_quantizer_core.py b/tinytorch/tests/16_quantization/test_quantizer_core.py deleted file mode 100644 index 41d52b2c7..000000000 --- a/tinytorch/tests/16_quantization/test_quantizer_core.py +++ /dev/null @@ -1,94 +0,0 @@ -""" -Module 16: Quantization Core Tests -=================================== - -These tests verify that quantization reduces model size correctly. - -WHY THESE TESTS MATTER: ------------------------ -Quantization converts FP32 (4 bytes) to INT8 (1 byte) = 4x smaller model. -If quantization is broken: -- Model stays big (defeats the purpose) -- Accuracy drops too much (unusable) -- Values overflow (numerical errors) - -WHAT WE TEST: -------------- -1. Quantization produces INT8 values -2. Dequantization recovers approximate original values -3. Model size actually decreases -""" - -import pytest -import numpy as np -import sys -from pathlib import Path - -sys.path.insert(0, str(Path(__file__).parent.parent.parent)) - -from tinytorch.core.tensor import Tensor - - -class TestQuantizationBasics: - """Test basic quantization functionality.""" - - def test_quantizer_import(self): - """Verify Quantizer can be imported.""" - try: - from tinytorch.perf.quantization import Quantizer - assert Quantizer is not None - except ImportError as e: - pytest.skip(f"Quantizer not yet exported: {e}") - - def test_quantize_produces_int8(self): - """ - WHAT: Verify quantization produces INT8 values in [-128, 127]. - - WHY: INT8 is the target representation. Values outside this - range would overflow and produce garbage. - """ - try: - from tinytorch.perf.quantization import Quantizer - except ImportError: - pytest.skip("Quantizer not yet exported") - - # Create FP32 tensor - fp32_tensor = Tensor(np.random.randn(10, 10).astype(np.float32)) - - # Quantize - q_tensor, scale, zero_point = Quantizer.quantize_tensor(fp32_tensor) - - # Check INT8 range - assert q_tensor.data.min() >= -128, "Quantized values below INT8 min" - assert q_tensor.data.max() <= 127, "Quantized values above INT8 max" - - def test_dequantize_recovers_approximate_values(self): - """ - WHAT: Verify dequantization recovers values close to original. - - WHY: Quantization is lossy, but should be approximately reversible. - Large errors would destroy model accuracy. - """ - try: - from tinytorch.perf.quantization import Quantizer - except ImportError: - pytest.skip("Quantizer not yet exported") - - # Create FP32 tensor with known values - original = Tensor(np.array([0.5, -0.5, 1.0, -1.0]).astype(np.float32)) - - # Round trip: quantize then dequantize - q_tensor, scale, zero_point = Quantizer.quantize_tensor(original) - recovered = Quantizer.dequantize_tensor(q_tensor, scale, zero_point) - - # Should be close (within ~1% for typical values) - max_error = np.max(np.abs(original.data - recovered.data)) - assert max_error < 0.1, ( - f"Dequantization error too large: {max_error}\n" - f" Original: {original.data}\n" - f" Recovered: {recovered.data}" - ) - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/tinytorch/tests/17_compression/test_compressor_core.py b/tinytorch/tests/17_compression/test_compressor_core.py deleted file mode 100644 index 9b9fc0bca..000000000 --- a/tinytorch/tests/17_compression/test_compressor_core.py +++ /dev/null @@ -1,111 +0,0 @@ -""" -Module 17: Compression Core Tests -=================================== - -These tests verify that model compression (pruning) works correctly. - -WHY THESE TESTS MATTER: ------------------------ -Pruning removes unnecessary weights, making models smaller and faster. -If compression is broken: -- Model doesn't get smaller (no benefit) -- Important weights get removed (accuracy crashes) -- Sparsity calculations are wrong (can't measure compression) -""" - -import pytest -import numpy as np -import sys -from pathlib import Path - -sys.path.insert(0, str(Path(__file__).parent.parent.parent)) - -from tinytorch.core.tensor import Tensor -from tinytorch.core.layers import Linear - - -class TestCompressionBasics: - """Test basic compression/pruning functionality.""" - - def test_compressor_import(self): - """Verify Compressor can be imported.""" - try: - from tinytorch.perf.compression import Compressor - assert Compressor is not None - except ImportError as e: - pytest.skip(f"Compressor not yet exported: {e}") - - def test_measure_sparsity(self): - """ - WHAT: Verify sparsity measurement works correctly. - - WHY: Sparsity = fraction of zeros. This is how we measure compression. - 50% sparsity means half the weights are zero. - """ - try: - from tinytorch.perf.compression import Compressor - except ImportError: - pytest.skip("Compressor not yet exported") - - # Create a simple model with known sparsity - class SimpleModel: - def __init__(self): - # Half zeros, half ones = 50% sparsity - self.layer = Linear(4, 4, bias=False) - self.layer.weight.data = np.array([ - [0, 0, 1, 1], - [0, 0, 1, 1], - [0, 0, 1, 1], - [0, 0, 1, 1] - ], dtype=np.float32) - - def parameters(self): - return self.layer.parameters() - - model = SimpleModel() - sparsity = Compressor.measure_sparsity(model) - - # Should be ~50% - assert 0.4 < sparsity < 0.6, ( - f"Sparsity measurement wrong!\n" - f" Expected: ~0.5 (50% zeros)\n" - f" Got: {sparsity}" - ) - - def test_magnitude_prune_increases_sparsity(self): - """ - WHAT: Verify pruning increases the number of zeros. - - WHY: Pruning should set small weights to zero. - After pruning, sparsity should increase. - """ - try: - from tinytorch.perf.compression import Compressor - except ImportError: - pytest.skip("Compressor not yet exported") - - # Create model with random weights (low sparsity) - class SimpleModel: - def __init__(self): - self.layer = Linear(10, 10, bias=False) - - def parameters(self): - return self.layer.parameters() - - model = SimpleModel() - initial_sparsity = Compressor.measure_sparsity(model) - - # Apply pruning - Compressor.magnitude_prune(model, sparsity=0.5) - - final_sparsity = Compressor.measure_sparsity(model) - - assert final_sparsity > initial_sparsity, ( - f"Pruning didn't increase sparsity!\n" - f" Before: {initial_sparsity}\n" - f" After: {final_sparsity}" - ) - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/tinytorch/tests/15_memoization/run_all_tests.py b/tinytorch/tests/17_memoization/run_all_tests.py similarity index 100% rename from tinytorch/tests/15_memoization/run_all_tests.py rename to tinytorch/tests/17_memoization/run_all_tests.py diff --git a/tinytorch/tests/17_memoization/test_kv_cache_core.py b/tinytorch/tests/17_memoization/test_kv_cache_core.py new file mode 100644 index 000000000..c767e5988 --- /dev/null +++ b/tinytorch/tests/17_memoization/test_kv_cache_core.py @@ -0,0 +1,212 @@ +""" +Module 17: KV Cache (Memoization) Core Tests +============================================= + +These tests verify that KV caching works for efficient inference. + +WHY THESE TESTS MATTER: +----------------------- +KV caching is essential for efficient text generation: +- Without cache: O(n²) per token (recompute all attention) +- With cache: O(n) per token (reuse previous K,V) + +For generating 100 tokens, that's 100x speedup! + +WHAT WE TEST: +------------- +1. KVCache can store key-value pairs +2. Cache retrieval returns stored values +3. Cache works across multiple layers +""" + +import pytest +import numpy as np +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent.parent)) + +from tinytorch.core.tensor import Tensor + + +class TestKVCacheBasics: + """Test basic KV cache functionality.""" + + def test_kv_cache_import(self): + """ + WHAT: Verify KVCache can be imported. + + WHY: Basic sanity check. + """ + try: + from tinytorch.perf.memoization import KVCache + assert KVCache is not None + except ImportError as e: + pytest.skip(f"KVCache not yet exported: {e}") + + def test_kv_cache_can_instantiate(self): + """ + WHAT: Verify KVCache can be created. + """ + try: + from tinytorch.perf.memoization import KVCache + # KVCache needs: batch_size, max_seq_len, num_layers, num_heads, head_dim + cache = KVCache(batch_size=1, max_seq_len=128, num_layers=2, num_heads=4, head_dim=16) + assert cache is not None + except ImportError: + pytest.skip("KVCache not yet exported") + + def test_kv_cache_stores_and_retrieves(self): + """ + WHAT: Verify cache can store and retrieve K,V tensors. + + WHY: The whole point of the cache is to reuse computed values. + If storage/retrieval doesn't work, there's no speedup. + """ + try: + from tinytorch.perf.memoization import KVCache + except ImportError: + pytest.skip("KVCache not yet exported") + + # Create cache with proper dimensions + cache = KVCache(batch_size=1, max_seq_len=128, num_layers=2, num_heads=4, head_dim=16) + + # Store some K,V pairs (cache expects one token at a time during generation) + layer_idx = 0 + K = Tensor(np.random.randn(1, 4, 1, 16)) # (batch, heads, 1, dim) - one new token + V = Tensor(np.random.randn(1, 4, 1, 16)) + + cache.update(layer_idx, K, V) + cache.advance() # Must advance after update to make values retrievable + + # Retrieve + cached_K, cached_V = cache.get(layer_idx) + + assert cached_K is not None, "Cache didn't store K" + assert cached_V is not None, "Cache didn't store V" + assert cached_K.shape == K.shape, f"K shape mismatch: {cached_K.shape} vs {K.shape}" + assert np.allclose(cached_K.data, K.data), "Retrieved K doesn't match stored" + assert np.allclose(cached_V.data, V.data), "Retrieved V doesn't match stored" + + +class TestKVCacheAdvanced: + """Advanced KV cache tests for multiple tokens and layers.""" + + def test_kv_cache_multiple_tokens(self): + """ + WHAT: Verify cache can accumulate multiple tokens. + + WHY: During generation, we add one token at a time. The cache must + correctly accumulate all previous K,V pairs. + """ + try: + from tinytorch.perf.memoization import KVCache + except ImportError: + pytest.skip("KVCache not yet exported") + + cache = KVCache(batch_size=1, max_seq_len=10, num_layers=1, num_heads=2, head_dim=8) + + # Add 3 tokens + for token_idx in range(3): + K = Tensor(np.full((1, 2, 1, 8), token_idx, dtype=np.float32)) + V = Tensor(np.full((1, 2, 1, 8), token_idx + 10, dtype=np.float32)) + cache.update(layer_idx=0, key=K, value=V) + cache.advance() + + # Retrieve should give all 3 tokens + cached_K, cached_V = cache.get(layer_idx=0) + + assert cached_K.shape == (1, 2, 3, 8), f"Expected (1,2,3,8), got {cached_K.shape}" + assert cached_V.shape == (1, 2, 3, 8), f"Expected (1,2,3,8), got {cached_V.shape}" + + # Verify values are in order + assert cached_K.data[0, 0, 0, 0] == 0, "First token K wrong" + assert cached_K.data[0, 0, 1, 0] == 1, "Second token K wrong" + assert cached_K.data[0, 0, 2, 0] == 2, "Third token K wrong" + + def test_kv_cache_multiple_layers(self): + """ + WHAT: Verify cache works correctly across multiple transformer layers. + + WHY: Real transformers have multiple layers, each with its own K,V cache. + """ + try: + from tinytorch.perf.memoization import KVCache + except ImportError: + pytest.skip("KVCache not yet exported") + + num_layers = 4 + cache = KVCache(batch_size=1, max_seq_len=10, num_layers=num_layers, num_heads=2, head_dim=8) + + # Update each layer with different values + for layer_idx in range(num_layers): + K = Tensor(np.full((1, 2, 1, 8), layer_idx * 10, dtype=np.float32)) + V = Tensor(np.full((1, 2, 1, 8), layer_idx * 10 + 1, dtype=np.float32)) + cache.update(layer_idx, K, V) + + cache.advance() + + # Verify each layer has correct values + for layer_idx in range(num_layers): + cached_K, cached_V = cache.get(layer_idx) + expected_k_val = layer_idx * 10 + expected_v_val = layer_idx * 10 + 1 + + assert cached_K.data[0, 0, 0, 0] == expected_k_val, ( + f"Layer {layer_idx} K wrong: expected {expected_k_val}, got {cached_K.data[0,0,0,0]}" + ) + assert cached_V.data[0, 0, 0, 0] == expected_v_val, ( + f"Layer {layer_idx} V wrong: expected {expected_v_val}, got {cached_V.data[0,0,0,0]}" + ) + + def test_kv_cache_seq_pos_tracking(self): + """ + WHAT: Verify seq_pos counter tracks correctly. + + WHY: seq_pos determines where in the cache to write next and how + much valid data to return. Incorrect tracking breaks generation. + """ + try: + from tinytorch.perf.memoization import KVCache + except ImportError: + pytest.skip("KVCache not yet exported") + + cache = KVCache(batch_size=1, max_seq_len=100, num_layers=1, num_heads=2, head_dim=8) + + # Initially at 0 + assert cache.seq_pos == 0, "Initial seq_pos should be 0" + + # Add tokens and check seq_pos + for expected_pos in range(1, 6): + K = Tensor(np.zeros((1, 2, 1, 8))) + V = Tensor(np.zeros((1, 2, 1, 8))) + cache.update(0, K, V) + cache.advance() + assert cache.seq_pos == expected_pos, ( + f"seq_pos should be {expected_pos}, got {cache.seq_pos}" + ) + + def test_kv_cache_raises_on_invalid_layer(self): + """ + WHAT: Verify cache raises error for invalid layer index. + + WHY: Trying to access a non-existent layer is a programming error + that should be caught early. + """ + try: + from tinytorch.perf.memoization import KVCache + except ImportError: + pytest.skip("KVCache not yet exported") + + cache = KVCache(batch_size=1, max_seq_len=10, num_layers=2, num_heads=2, head_dim=8) + + K = Tensor(np.zeros((1, 2, 1, 8))) + V = Tensor(np.zeros((1, 2, 1, 8))) + + # Valid layers are 0 and 1 + with pytest.raises(ValueError): + cache.update(layer_idx=5, key=K, value=V) # Invalid layer + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tinytorch/tests/15_memoization/test_progressive_integration.py b/tinytorch/tests/17_memoization/test_progressive_integration.py similarity index 100% rename from tinytorch/tests/15_memoization/test_progressive_integration.py rename to tinytorch/tests/17_memoization/test_progressive_integration.py diff --git a/tinytorch/tests/15_memoization/test_tinygpt_integration.py b/tinytorch/tests/17_memoization/test_tinygpt_integration.py similarity index 100% rename from tinytorch/tests/15_memoization/test_tinygpt_integration.py rename to tinytorch/tests/17_memoization/test_tinygpt_integration.py