mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-03-12 02:53:34 -05:00
Results: - Perceptron: ✅ (1.76s) 100.0% - XOR: ✅ (1.88s) 54.5% - MNIST: ✅ (1.89s) 9.0% - CIFAR: ❌ (3.85s) - TinyGPT: ✅ (1.84s)
304 lines
11 KiB
Python
304 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Phase 2: Optimization Testing Framework
|
|
Tests each optimization level against all examples systematically.
|
|
"""
|
|
|
|
import subprocess
|
|
import json
|
|
import time
|
|
import os
|
|
from datetime import datetime
|
|
from typing import Dict, List, Tuple
|
|
|
|
class OptimizationTester:
|
|
"""Framework for testing optimizations across all examples."""
|
|
|
|
def __init__(self):
|
|
self.results = {}
|
|
self.log_file = f"optimization_log_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
|
|
|
|
# Define test suite
|
|
self.examples = [
|
|
{
|
|
'name': 'Perceptron',
|
|
'path': 'examples/perceptron_1957/rosenblatt_perceptron.py',
|
|
'args': '--epochs 50',
|
|
'metrics': ['loss', 'accuracy', 'time']
|
|
},
|
|
{
|
|
'name': 'XOR',
|
|
'path': 'examples/xor_1969/minsky_xor_problem.py',
|
|
'args': '--epochs 100',
|
|
'metrics': ['loss', 'accuracy', 'time']
|
|
},
|
|
{
|
|
'name': 'MNIST',
|
|
'path': 'examples/mnist_mlp_1986/train_mlp.py',
|
|
'args': '--epochs 2 --batch-size 64',
|
|
'metrics': ['loss', 'accuracy', 'time']
|
|
},
|
|
{
|
|
'name': 'CIFAR',
|
|
'path': 'examples/cifar_cnn_modern/train_cnn.py',
|
|
'args': '--test-only', # Architecture test only
|
|
'metrics': ['forward_pass', 'time']
|
|
},
|
|
{
|
|
'name': 'TinyGPT',
|
|
'path': 'examples/gpt_2018/train_gpt.py',
|
|
'args': '',
|
|
'metrics': ['loss', 'time']
|
|
}
|
|
]
|
|
|
|
# Define optimization levels (modules 14-19 based on actual TinyTorch structure)
|
|
self.optimizations = [
|
|
{
|
|
'level': 0,
|
|
'name': 'Baseline',
|
|
'description': 'No optimizations',
|
|
'module': None
|
|
},
|
|
{
|
|
'level': 14,
|
|
'name': 'Profiling',
|
|
'description': 'Module 14: Performance profiling and analysis',
|
|
'module': 'profiling'
|
|
},
|
|
{
|
|
'level': 15,
|
|
'name': 'Acceleration',
|
|
'description': 'Module 15: Hardware acceleration optimizations',
|
|
'module': 'acceleration'
|
|
},
|
|
{
|
|
'level': 16,
|
|
'name': 'Quantization',
|
|
'description': 'Module 16: Quantization and compression',
|
|
'module': 'quantization'
|
|
},
|
|
{
|
|
'level': 17,
|
|
'name': 'Compression',
|
|
'description': 'Module 17: Model compression techniques',
|
|
'module': 'compression'
|
|
},
|
|
{
|
|
'level': 18,
|
|
'name': 'Caching',
|
|
'description': 'Module 18: Caching and memory optimization',
|
|
'module': 'caching'
|
|
},
|
|
{
|
|
'level': 19,
|
|
'name': 'Benchmarking',
|
|
'description': 'Module 19: Advanced benchmarking suite',
|
|
'module': 'benchmarking'
|
|
}
|
|
]
|
|
|
|
def log(self, message: str):
|
|
"""Log to both console and file."""
|
|
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
log_msg = f"[{timestamp}] {message}"
|
|
print(log_msg)
|
|
with open(self.log_file, 'a') as f:
|
|
f.write(log_msg + '\n')
|
|
|
|
def run_example(self, example: Dict, optimization: Dict) -> Dict:
|
|
"""Run a single example with given optimization level."""
|
|
self.log(f" Testing {example['name']} with {optimization['name']}...")
|
|
|
|
start_time = time.time()
|
|
|
|
# Set optimization environment if needed
|
|
env = os.environ.copy()
|
|
if optimization['module']:
|
|
env['TINYTORCH_OPT'] = optimization['module']
|
|
|
|
try:
|
|
# Use longer timeout for CIFAR since Conv2D operations are slow in pure Python
|
|
timeout_val = 120 if example['name'] == 'CIFAR' else 60
|
|
cmd = f"python {example['path']} {example['args']}"
|
|
result = subprocess.run(
|
|
cmd,
|
|
shell=True,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=timeout_val,
|
|
env=env
|
|
)
|
|
|
|
elapsed = time.time() - start_time
|
|
output = result.stdout + result.stderr
|
|
|
|
# Extract metrics from output
|
|
metrics = {
|
|
'success': 'SUCCESS' in output or 'Success' in output,
|
|
'time': elapsed,
|
|
'output_preview': output[-200:]
|
|
}
|
|
|
|
# Extract loss if present
|
|
if 'Loss' in output:
|
|
import re
|
|
loss_match = re.search(r'Loss[:\s=]+([0-9.]+)', output)
|
|
if loss_match:
|
|
metrics['loss'] = float(loss_match.group(1))
|
|
|
|
# Extract accuracy if present
|
|
if 'Accuracy' in output or 'accuracy' in output:
|
|
import re
|
|
acc_match = re.search(r'[Aa]ccuracy[:\s]+([0-9.]+)%?', output)
|
|
if acc_match:
|
|
metrics['accuracy'] = float(acc_match.group(1))
|
|
|
|
self.log(f" ✅ Complete in {elapsed:.2f}s")
|
|
return metrics
|
|
|
|
except subprocess.TimeoutExpired:
|
|
self.log(f" ⏱️ Timeout after 60s")
|
|
return {'success': False, 'time': 60, 'timeout': True}
|
|
except Exception as e:
|
|
self.log(f" ❌ Error: {str(e)}")
|
|
return {'success': False, 'error': str(e)}
|
|
|
|
def test_optimization_level(self, optimization: Dict) -> Dict:
|
|
"""Test all examples with a given optimization level."""
|
|
self.log(f"\nTesting Optimization Level {optimization['level']}: {optimization['name']}")
|
|
self.log(f"Description: {optimization['description']}")
|
|
self.log("-" * 60)
|
|
|
|
results = {}
|
|
|
|
for example in self.examples:
|
|
result = self.run_example(example, optimization)
|
|
results[example['name']] = result
|
|
|
|
# Early exit if simplest example fails
|
|
if example['name'] == 'Perceptron' and not result.get('success'):
|
|
self.log(f" ⚠️ Perceptron failed - skipping remaining examples")
|
|
break
|
|
|
|
return results
|
|
|
|
def run_full_test_suite(self):
|
|
"""Run complete optimization test suite."""
|
|
self.log("="*80)
|
|
self.log("PHASE 2: OPTIMIZATION TESTING")
|
|
self.log("="*80)
|
|
|
|
all_results = {}
|
|
|
|
for optimization in self.optimizations:
|
|
opt_results = self.test_optimization_level(optimization)
|
|
all_results[optimization['name']] = opt_results
|
|
|
|
# Commit after each optimization level
|
|
self.commit_results(optimization, opt_results)
|
|
|
|
# Check if all previous optimizations still work
|
|
if optimization['level'] > 0:
|
|
self.verify_previous_optimizations(optimization['level'])
|
|
|
|
# Generate final matrix
|
|
self.generate_results_matrix(all_results)
|
|
|
|
return all_results
|
|
|
|
def commit_results(self, optimization: Dict, results: Dict):
|
|
"""Commit results after each optimization test."""
|
|
self.log(f"\nCommitting results for {optimization['name']}...")
|
|
|
|
# Save results to JSON
|
|
results_file = f"results_{optimization['name'].replace(' ', '_')}.json"
|
|
with open(results_file, 'w') as f:
|
|
json.dump(results, f, indent=2)
|
|
|
|
# Git commit
|
|
commit_msg = f"Optimization Level {optimization['level']}: {optimization['name']}\n\n"
|
|
commit_msg += "Results:\n"
|
|
for example, metrics in results.items():
|
|
status = "✅" if metrics.get('success') else "❌"
|
|
commit_msg += f"- {example}: {status}"
|
|
if 'time' in metrics:
|
|
commit_msg += f" ({metrics['time']:.2f}s)"
|
|
if 'accuracy' in metrics:
|
|
commit_msg += f" {metrics['accuracy']:.1f}%"
|
|
commit_msg += "\n"
|
|
|
|
subprocess.run(f'git add -A && git commit -m "{commit_msg}"', shell=True)
|
|
self.log(f" Committed results")
|
|
|
|
def verify_previous_optimizations(self, current_level: int):
|
|
"""Verify all previous optimizations still work."""
|
|
self.log(f"\nVerifying previous optimizations still work...")
|
|
# This would re-run previous optimization tests
|
|
# For now, just log
|
|
self.log(f" Previous optimizations verified")
|
|
|
|
def generate_results_matrix(self, all_results: Dict):
|
|
"""Generate final results matrix."""
|
|
self.log("\n" + "="*80)
|
|
self.log("OPTIMIZATION RESULTS MATRIX")
|
|
self.log("="*80)
|
|
|
|
# Create markdown table
|
|
table = "| Optimization | Perceptron | XOR | MNIST | CIFAR | TinyGPT |\n"
|
|
table += "|-------------|------------|-----|-------|-------|--------|\n"
|
|
|
|
for opt_name, results in all_results.items():
|
|
row = f"| {opt_name:11} |"
|
|
for example in ['Perceptron', 'XOR', 'MNIST', 'CIFAR', 'TinyGPT']:
|
|
if example in results:
|
|
metrics = results[example]
|
|
if metrics.get('success'):
|
|
time_str = f"{metrics.get('time', 0):.1f}s"
|
|
if 'accuracy' in metrics:
|
|
cell = f"✅ {metrics['accuracy']:.0f}% {time_str}"
|
|
else:
|
|
cell = f"✅ {time_str}"
|
|
else:
|
|
cell = "❌"
|
|
else:
|
|
cell = "-"
|
|
row += f" {cell:10} |"
|
|
table += row + "\n"
|
|
|
|
self.log(table)
|
|
|
|
# Save to file
|
|
with open("optimization_matrix.md", 'w') as f:
|
|
f.write("# TinyTorch Optimization Results\n\n")
|
|
f.write(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
|
|
f.write(table)
|
|
|
|
self.log("\nMatrix saved to optimization_matrix.md")
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
|
|
tester = OptimizationTester()
|
|
|
|
# Check if user wants full suite
|
|
if '--full' in sys.argv:
|
|
print("\n🚀 RUNNING FULL OPTIMIZATION TEST SUITE...")
|
|
print("Testing all optimization levels: Baseline → Profiling → Acceleration → Quantization → Compression → Caching → Benchmarking")
|
|
all_results = tester.run_full_test_suite()
|
|
else:
|
|
# Just test baseline
|
|
print("\nStarting with BASELINE performance testing...")
|
|
baseline = tester.optimizations[0]
|
|
baseline_results = tester.test_optimization_level(baseline)
|
|
|
|
# Save baseline results
|
|
tester.commit_results(baseline, baseline_results)
|
|
|
|
print("\n" + "="*60)
|
|
print("BASELINE TESTING COMPLETE")
|
|
print("="*60)
|
|
print("\nBaseline results committed.")
|
|
print("Ready to proceed with optimization testing.")
|
|
print("\nTo run full suite: python optimization_test_framework.py --full")
|