#!/usr/bin/env python3 """ Real Performance Analysis for TinyTorch Optimization Modules =========================================================== This script tests whether TinyTorch's optimization claims are real or hallucinated. We measure actual performance improvements with scientific rigor. """ import time import numpy as np import statistics import sys import os def measure_performance(func, *args, runs=5): """Measure function performance with multiple runs.""" times = [] for _ in range(runs): start = time.perf_counter() result = func(*args) end = time.perf_counter() times.append(end - start) return { 'mean': statistics.mean(times), 'std': statistics.stdev(times) if len(times) > 1 else 0, 'times': times, 'result': result } def test_matrix_multiplication_optimization(): """Test real speedups from Module 16: Acceleration.""" print("\n🧪 MODULE 16: MATRIX MULTIPLICATION OPTIMIZATION") print("=" * 60) def naive_matmul(A, B): """O(n³) triple nested loops.""" n, k = A.shape k2, m = B.shape C = np.zeros((n, m), dtype=np.float32) for i in range(n): for j in range(m): for idx in range(k): C[i, j] += A[i, idx] * B[idx, j] return C def numpy_matmul(A, B): """Optimized NumPy implementation.""" return np.dot(A, B) # Test data size = 64 # Small for quick testing np.random.seed(42) A = np.random.randn(size, size).astype(np.float32) B = np.random.randn(size, size).astype(np.float32) print(f"Testing {size}×{size} matrix multiplication...") # Measure performance naive_perf = measure_performance(naive_matmul, A, B) numpy_perf = measure_performance(numpy_matmul, A, B) speedup = naive_perf['mean'] / numpy_perf['mean'] # Check accuracy naive_result = naive_perf['result'] numpy_result = numpy_perf['result'] max_diff = np.max(np.abs(naive_result - numpy_result)) accuracy_ok = max_diff < 1e-4 print(f" Naive implementation: {naive_perf['mean']*1000:.2f} ± {naive_perf['std']*1000:.2f} ms") print(f" NumPy implementation: {numpy_perf['mean']*1000:.2f} ± {numpy_perf['std']*1000:.2f} ms") print(f" Speedup: {speedup:.1f}×") print(f" Max difference: {max_diff:.2e}") print(f" Accuracy: {'✅ preserved' if accuracy_ok else '❌ lost'}") success = speedup > 2.0 and accuracy_ok print(f" Result: {'✅ REAL IMPROVEMENT' if success else '⚠️ MINIMAL IMPROVEMENT'}") return speedup, accuracy_ok def test_attention_complexity(): """Test O(n²) vs O(n) attention complexity from Module 19: Caching.""" print("\n🧪 MODULE 19: ATTENTION COMPLEXITY OPTIMIZATION") print("=" * 60) def standard_attention_generation(Q, K, V, seq_len): """Standard O(n²) attention for autoregressive generation.""" outputs = [] for i in range(1, seq_len): # Recompute attention for full sequence up to position i Q_slice = Q[i:i+1] K_slice = K[:i+1] V_slice = V[:i+1] # Attention computation scores = np.dot(Q_slice, K_slice.T) / np.sqrt(Q_slice.shape[-1]) attention_weights = np.exp(scores) / np.sum(np.exp(scores), axis=-1, keepdims=True) output = np.dot(attention_weights, V_slice) outputs.append(output[0]) return np.array(outputs) def cached_attention_generation(Q, K, V, seq_len): """Cached O(n) attention for autoregressive generation.""" outputs = [] K_cache = [K[0]] # Initialize cache V_cache = [V[0]] for i in range(1, seq_len): # Add new K,V to cache K_cache.append(K[i]) V_cache.append(V[i]) # Compute attention using cached K,V K_combined = np.array(K_cache) V_combined = np.array(V_cache) scores = np.dot(Q[i:i+1], K_combined.T) / np.sqrt(Q.shape[-1]) attention_weights = np.exp(scores) / np.sum(np.exp(scores)) output = np.dot(attention_weights, V_combined) outputs.append(output) return np.array(outputs) # Test with different sequence lengths to show complexity difference seq_lengths = [16, 32, 48] # Small lengths for quick testing d_model = 64 print("Testing attention complexity scaling:") for seq_len in seq_lengths: np.random.seed(42) Q = np.random.randn(seq_len, d_model).astype(np.float32) K = np.random.randn(seq_len, d_model).astype(np.float32) V = np.random.randn(seq_len, d_model).astype(np.float32) standard_perf = measure_performance(standard_attention_generation, Q, K, V, seq_len, runs=3) cached_perf = measure_performance(cached_attention_generation, Q, K, V, seq_len, runs=3) speedup = standard_perf['mean'] / cached_perf['mean'] print(f" Seq len {seq_len}: Standard {standard_perf['mean']*1000:.1f}ms, Cached {cached_perf['mean']*1000:.1f}ms, Speedup {speedup:.1f}×") return speedup def test_quantization_benefits(): """Test INT8 vs FP32 performance from Module 17: Quantization.""" print("\n🧪 MODULE 17: QUANTIZATION PERFORMANCE") print("=" * 60) def fp32_operations(data): """Standard FP32 operations.""" result = data.copy() # Simulate typical neural network operations result = np.maximum(0, result) # ReLU result = np.dot(result, result.T) # Matrix multiply result = np.tanh(result) # Activation return result def int8_operations(data): """Simulated INT8 operations.""" # Quantize to INT8 range scale = np.max(np.abs(data)) / 127.0 quantized = np.round(data / scale).astype(np.int8) # Operations in INT8 (simulated) result = np.maximum(0, quantized) # ReLU result = np.dot(result.astype(np.int16), result.astype(np.int16).T) # Matrix multiply with wider accumulator # Dequantize result = result.astype(np.float32) * (scale * scale) result = np.tanh(result) # Final activation in FP32 return result # Test data size = 128 np.random.seed(42) data = np.random.randn(size, size).astype(np.float32) * 0.1 print(f"Testing {size}×{size} quantized operations...") fp32_perf = measure_performance(fp32_operations, data) int8_perf = measure_performance(int8_operations, data) speedup = fp32_perf['mean'] / int8_perf['mean'] # Check accuracy loss fp32_result = fp32_perf['result'] int8_result = int8_perf['result'] max_diff = np.max(np.abs(fp32_result - int8_result)) relative_error = max_diff / (np.max(np.abs(fp32_result)) + 1e-8) accuracy_acceptable = relative_error < 0.05 # 5% relative error acceptable print(f" FP32 operations: {fp32_perf['mean']*1000:.2f} ± {fp32_perf['std']*1000:.2f} ms") print(f" INT8 operations: {int8_perf['mean']*1000:.2f} ± {int8_perf['std']*1000:.2f} ms") print(f" Speedup: {speedup:.1f}×") print(f" Max difference: {max_diff:.2e}") print(f" Relative error: {relative_error:.1%}") print(f" Accuracy: {'✅ acceptable' if accuracy_acceptable else '❌ too much loss'}") success = speedup > 1.0 and accuracy_acceptable print(f" Result: {'✅ QUANTIZATION BENEFICIAL' if success else '⚠️ NO CLEAR BENEFIT'}") return speedup, accuracy_acceptable def main(): """Run comprehensive performance analysis.""" print("🔥 TinyTorch Performance Analysis: Real Numbers Only") print("===================================================") print("Testing whether optimization modules deliver real improvements.") print("No hallucinations - only measured performance data.") results = {} # Test each optimization module try: matmul_speedup, matmul_accuracy = test_matrix_multiplication_optimization() results['matrix_multiplication'] = {'speedup': matmul_speedup, 'accuracy': matmul_accuracy} except Exception as e: print(f"❌ Matrix multiplication test failed: {e}") results['matrix_multiplication'] = None try: attention_speedup = test_attention_complexity() results['attention_caching'] = {'speedup': attention_speedup} except Exception as e: print(f"❌ Attention caching test failed: {e}") results['attention_caching'] = None try: quant_speedup, quant_accuracy = test_quantization_benefits() results['quantization'] = {'speedup': quant_speedup, 'accuracy': quant_accuracy} except Exception as e: print(f"❌ Quantization test failed: {e}") results['quantization'] = None # Summary print("\n" + "="*60) print("📋 FINAL PERFORMANCE ANALYSIS SUMMARY") print("="*60) successful_optimizations = 0 total_tests = 0 for test_name, result in results.items(): total_tests += 1 if result is not None: speedup = result.get('speedup', 0) accuracy = result.get('accuracy', True) if speedup > 1.5 and accuracy: successful_optimizations += 1 print(f"✅ {test_name.replace('_', ' ').title()}: {speedup:.1f}× speedup with good accuracy") elif speedup > 1.0: print(f"⚠️ {test_name.replace('_', ' ').title()}: {speedup:.1f}× speedup (modest improvement)") else: print(f"❌ {test_name.replace('_', ' ').title()}: {speedup:.1f}× (no improvement)") else: print(f"❌ {test_name.replace('_', ' ').title()}: Test failed") print(f"\n🎯 BOTTOM LINE: {successful_optimizations}/{total_tests} optimizations show significant real improvements") if successful_optimizations >= 2: print("✅ TinyTorch optimization modules deliver measurable performance benefits!") print(" Students will see real speedups when implementing these techniques.") elif successful_optimizations >= 1: print("⚠️ TinyTorch shows some optimization benefits but room for improvement.") print(" Some modules deliver real speedups, others need work.") else: print("❌ TinyTorch optimization modules don't show clear performance benefits.") print(" Claims of speedups are not supported by measurements.") return results if __name__ == "__main__": main()