Files
TinyTorch/tests/module_13/test_progressive_integration.py
Vijay Janapa Reddi 86b908fe5c Add TinyTorch examples gallery and fix module integration issues
- Create professional examples directory showcasing TinyTorch as real ML framework
- Add examples: XOR, MNIST, CIFAR-10, text generation, autograd demo, optimizer comparison
- Fix import paths in exported modules (training.py, dense.py)
- Update training module with autograd integration for loss functions
- Add progressive integration tests for all 16 modules
- Document framework capabilities and usage patterns

This commit establishes the examples gallery that demonstrates TinyTorch
works like PyTorch/TensorFlow, validating the complete framework.
2025-09-21 10:00:11 -04:00

629 lines
26 KiB
Python

"""
Module 13: Progressive Integration Tests
Tests that Module 13 (Kernels) works correctly AND that the entire prior stack works.
DEPENDENCY CHAIN: 01_setup → ... → 12_compression → 13_kernels
This is where we enable high-performance computational kernels and hardware acceleration.
"""
import numpy as np
import sys
from pathlib import Path
# Add project root to path
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
class TestPriorStackStillWorking:
"""Quick regression checks that prior modules (01→12) still work."""
def test_complete_ml_system_stable(self):
"""Verify complete ML system remains stable."""
# Environment (Module 01)
assert sys.version_info >= (3, 8), "Foundation broken: Python version"
# Complete ML system should work
try:
from tinytorch.core.tensor import Tensor
from tinytorch.core.layers import Dense
from tinytorch.core.optimizers import Adam
from tinytorch.core.training import Trainer
from tinytorch.core.compression import prune_weights
# All ML system components should be available
model = Dense(10, 5)
optimizer = Adam(model.parameters(), lr=0.001)
trainer = Trainer(model, optimizer)
# Compression should still work
if 'prune_weights' in locals():
pruned_weights = prune_weights(model.weights, sparsity=0.3)
assert pruned_weights.shape == model.weights.shape, "Compression broken"
# Basic ML functionality should work
x = Tensor(np.random.randn(4, 10))
output = model(x)
assert output.shape == (4, 5), "ML system broken"
except ImportError:
assert True, "ML system not implemented yet"
def test_efficiency_features_stable(self):
"""Verify efficiency modules (11→12) still work."""
try:
from tinytorch.core.training import Trainer
from tinytorch.core.compression import quantize_weights
from tinytorch.core.optimizers import SGD
from tinytorch.core.layers import Dense
# Efficiency features should work
model = Dense(8, 3)
optimizer = SGD(model.parameters(), lr=0.01)
trainer = Trainer(model, optimizer)
assert hasattr(trainer, 'train') or hasattr(trainer, 'fit'), "Training broken"
# Compression should work
if 'quantize_weights' in locals():
quantized = quantize_weights(model.weights, bits=8)
assert quantized.shape == model.weights.shape, "Quantization broken"
except ImportError:
assert True, "Efficiency features not implemented yet"
class TestModule13KernelsCore:
"""Test Module 13 (Kernels) core functionality."""
def test_optimized_tensor_operations(self):
"""Test optimized tensor operation kernels."""
try:
from tinytorch.core.kernels import optimized_matmul, vectorized_add
from tinytorch.core.tensor import Tensor
# Test optimized matrix multiplication
if 'optimized_matmul' in locals():
A = Tensor(np.random.randn(50, 30))
B = Tensor(np.random.randn(30, 20))
result = optimized_matmul(A, B)
expected = np.dot(A.data, B.data)
assert result.shape == (50, 20), "Optimized matmul shape broken"
assert np.allclose(result.data, expected, rtol=1e-5), "Optimized matmul accuracy broken"
# Test vectorized operations
if 'vectorized_add' in locals():
a = Tensor(np.random.randn(1000))
b = Tensor(np.random.randn(1000))
result = vectorized_add(a, b)
expected = a.data + b.data
assert result.shape == a.shape, "Vectorized add shape broken"
assert np.allclose(result.data, expected), "Vectorized add accuracy broken"
except ImportError:
assert True, "Optimized tensor operations not implemented yet"
def test_cuda_kernels(self):
"""Test CUDA acceleration kernels."""
try:
from tinytorch.core.kernels import cuda_available, CudaKernel
from tinytorch.core.tensor import Tensor
# Check CUDA availability
if 'cuda_available' in locals():
has_cuda = cuda_available()
if has_cuda:
# Test CUDA tensor operations
if 'CudaKernel' in locals():
kernel = CudaKernel('matmul')
A = Tensor(np.random.randn(100, 50))
B = Tensor(np.random.randn(50, 25))
# Move to CUDA (if supported)
if hasattr(A, 'cuda'):
A_cuda = A.cuda()
B_cuda = B.cuda()
result = kernel.execute(A_cuda, B_cuda)
assert result.shape == (100, 25), "CUDA kernel shape broken"
else:
# CPU fallback should work
assert True, "CUDA not available, CPU fallback used"
except ImportError:
assert True, "CUDA kernels not implemented yet"
def test_custom_kernel_compilation(self):
"""Test custom kernel compilation and execution."""
try:
from tinytorch.core.kernels import compile_kernel, KernelCompiler
# Test kernel compilation
if 'compile_kernel' in locals():
# Simple element-wise operation kernel
kernel_code = """
def element_wise_multiply(a, b):
return a * b
"""
compiled_kernel = compile_kernel(kernel_code, 'element_wise_multiply')
# Test compiled kernel
a = np.array([1, 2, 3, 4])
b = np.array([2, 3, 4, 5])
result = compiled_kernel(a, b)
expected = a * b
assert np.array_equal(result, expected), "Custom kernel compilation broken"
# Test kernel compiler
if 'KernelCompiler' in locals():
compiler = KernelCompiler(target='cpu', optimization_level=2)
assert hasattr(compiler, 'compile'), "Kernel compiler broken: No compile method"
assert hasattr(compiler, 'target'), "Kernel compiler broken: No target"
except ImportError:
assert True, "Custom kernel compilation not implemented yet"
class TestProgressiveStackIntegration:
"""Test that the complete stack (01→13) works together."""
def test_accelerated_training_pipeline(self):
"""Test training pipeline with kernel acceleration."""
try:
from tinytorch.core.tensor import Tensor
from tinytorch.core.layers import Dense
from tinytorch.core.optimizers import Adam
from tinytorch.core.training import Trainer
from tinytorch.core.kernels import enable_optimizations
from tinytorch.core.data import Dataset, DataLoader
# Enable kernel optimizations
if 'enable_optimizations' in locals():
enable_optimizations(backend='auto')
# Create accelerated training pipeline
class AcceleratedModel:
def __init__(self):
self.layer1 = Dense(50, 100)
self.layer2 = Dense(100, 20)
self.layer3 = Dense(20, 5)
def __call__(self, x):
h1 = self.layer1(x)
h2 = self.layer2(h1)
return self.layer3(h2)
def parameters(self):
params = []
for layer in [self.layer1, self.layer2, self.layer3]:
if hasattr(layer, 'parameters'):
params.extend(layer.parameters())
return params
# Dataset for performance testing
class PerformanceDataset(Dataset):
def __init__(self):
self.data = np.random.randn(200, 50)
self.targets = np.random.randint(0, 5, 200)
def __len__(self):
return 200
def __getitem__(self, idx):
return Tensor(self.data[idx]), self.targets[idx]
# Accelerated training
model = AcceleratedModel()
optimizer = Adam(model.parameters(), lr=0.001)
trainer = Trainer(model, optimizer)
dataset = PerformanceDataset()
dataloader = DataLoader(dataset, batch_size=16)
# Test accelerated forward pass
for batch_x, batch_y in dataloader:
output = model(batch_x)
assert output.shape == (16, 5), "Accelerated training broken"
break # Test one batch
except ImportError:
assert True, "Accelerated training pipeline not ready yet"
def test_large_scale_operations(self):
"""Test large-scale operations with kernel optimizations."""
try:
from tinytorch.core.kernels import optimized_matmul, batch_operations
from tinytorch.core.tensor import Tensor
# Large-scale matrix operations
if 'optimized_matmul' in locals():
# Large matrices
A = Tensor(np.random.randn(500, 300))
B = Tensor(np.random.randn(300, 200))
result = optimized_matmul(A, B)
assert result.shape == (500, 200), "Large-scale matmul broken"
# Batch operations
if 'batch_operations' in locals():
# Batch of operations
batch_size = 32
matrices = [Tensor(np.random.randn(50, 30)) for _ in range(batch_size)]
vectors = [Tensor(np.random.randn(30)) for _ in range(batch_size)]
results = batch_operations('matmul', matrices, vectors)
assert len(results) == batch_size, "Batch operations broken"
for result in results:
assert result.shape == (50,), "Batch operation result shape broken"
except ImportError:
assert True, "Large-scale operations not ready yet"
def test_memory_optimized_operations(self):
"""Test memory-optimized kernel operations."""
try:
from tinytorch.core.kernels import in_place_operations, memory_pool
from tinytorch.core.tensor import Tensor
# In-place operations to save memory
if 'in_place_operations' in locals():
a = Tensor(np.random.randn(100, 100))
b = Tensor(np.random.randn(100, 100))
original_id = id(a.data)
# In-place addition
in_place_operations.add_(a, b)
# Should modify original tensor
assert id(a.data) == original_id, "In-place operation created copy"
# Memory pool for efficient allocation
if 'memory_pool' in locals():
pool = memory_pool.MemoryPool()
# Allocate from pool
tensor1 = pool.allocate_tensor(shape=(200, 200))
tensor2 = pool.allocate_tensor(shape=(200, 200))
# Should be memory efficient
assert tensor1.shape == (200, 200), "Memory pool allocation broken"
assert tensor2.shape == (200, 200), "Memory pool allocation broken"
# Release memory
pool.release(tensor1)
pool.release(tensor2)
except ImportError:
assert True, "Memory-optimized operations not ready yet"
class TestPerformanceOptimizations:
"""Test performance optimizations and benchmarking."""
def test_kernel_benchmarking(self):
"""Test kernel performance benchmarking."""
try:
from tinytorch.core.kernels import benchmark_kernel, KernelProfiler
import time
# Benchmark matrix multiplication
if 'benchmark_kernel' in locals():
sizes = [(100, 100), (200, 200), (500, 500)]
for size in sizes:
A = np.random.randn(*size)
B = np.random.randn(*size)
# Benchmark different implementations
results = benchmark_kernel('matmul', A, B, num_trials=5)
assert 'mean_time' in results, "Benchmark missing timing"
assert 'std_time' in results, "Benchmark missing std"
assert results['mean_time'] > 0, "Benchmark timing invalid"
# Kernel profiler
if 'KernelProfiler' in locals():
profiler = KernelProfiler()
# Profile operations
profiler.start()
# Some operations to profile
for _ in range(10):
a = np.random.randn(50, 50)
b = np.random.randn(50, 50)
c = np.dot(a, b)
profile_results = profiler.stop()
assert 'total_time' in profile_results, "Profiler missing total time"
assert 'operation_count' in profile_results, "Profiler missing operation count"
except ImportError:
assert True, "Kernel benchmarking not ready yet"
def test_auto_optimization(self):
"""Test automatic kernel optimization selection."""
try:
from tinytorch.core.kernels import AutoOptimizer, select_best_kernel
# Auto optimizer
if 'AutoOptimizer' in locals():
optimizer = AutoOptimizer()
# Should detect best kernels for hardware
best_config = optimizer.detect_optimal_config()
assert 'matmul_kernel' in best_config, "Auto optimizer missing matmul"
assert 'device' in best_config, "Auto optimizer missing device"
# Kernel selection
if 'select_best_kernel' in locals():
# Test different kernel options for operation
kernels = ['numpy', 'optimized_cpu', 'cuda']
operation = 'matmul'
shape = (100, 100)
best_kernel = select_best_kernel(operation, shape, available_kernels=kernels)
assert best_kernel in kernels, "Kernel selection invalid"
except ImportError:
assert True, "Auto optimization not ready yet"
def test_vectorization_optimizations(self):
"""Test vectorization and SIMD optimizations."""
try:
from tinytorch.core.kernels import vectorized_ops, simd_support
# Vectorized operations
if 'vectorized_ops' in locals():
# Large arrays for vectorization
a = np.random.randn(10000)
b = np.random.randn(10000)
# Vectorized operations should be faster
import time
# Time numpy baseline
start = time.time()
numpy_result = a + b
numpy_time = time.time() - start
# Time vectorized version
start = time.time()
vectorized_result = vectorized_ops.add(a, b)
vectorized_time = time.time() - start
# Results should be equivalent
assert np.allclose(numpy_result, vectorized_result), "Vectorization accuracy broken"
# Vectorized should be competitive or faster
assert vectorized_time <= numpy_time * 2, "Vectorization significantly slower"
# SIMD support detection
if 'simd_support' in locals():
capabilities = simd_support.detect_capabilities()
assert isinstance(capabilities, dict), "SIMD detection should return dict"
# Common SIMD instruction sets
expected_keys = ['sse', 'avx', 'avx2']
for key in expected_keys:
if key in capabilities:
assert isinstance(capabilities[key], bool), f"SIMD {key} should be boolean"
except ImportError:
assert True, "Vectorization optimizations not ready yet"
class TestHardwareAcceleration:
"""Test hardware acceleration and device management."""
def test_device_detection(self):
"""Test hardware device detection and selection."""
try:
from tinytorch.core.kernels import Device, get_available_devices
# Device detection
if 'get_available_devices' in locals():
devices = get_available_devices()
assert isinstance(devices, list), "Available devices should be list"
assert len(devices) > 0, "Should detect at least CPU"
# Should include CPU at minimum
device_types = [device.type for device in devices]
assert 'cpu' in device_types, "CPU device not detected"
# Device object
if 'Device' in locals():
cpu_device = Device('cpu')
assert cpu_device.type == 'cpu', "CPU device creation broken"
# Test CUDA device if available
try:
cuda_device = Device('cuda:0')
assert cuda_device.type == 'cuda', "CUDA device creation broken"
except RuntimeError:
# CUDA not available, which is fine
assert True, "CUDA not available on this system"
except ImportError:
assert True, "Device detection not ready yet"
def test_tensor_device_movement(self):
"""Test moving tensors between devices."""
try:
from tinytorch.core.tensor import Tensor
from tinytorch.core.kernels import Device
# Create tensor on CPU
tensor = Tensor(np.random.randn(50, 50))
# Should start on CPU
if hasattr(tensor, 'device'):
assert tensor.device.type == 'cpu', "Tensor not starting on CPU"
# Test moving to different device (if available)
if hasattr(tensor, 'to'):
# Try moving to CUDA (will fallback to CPU if not available)
try:
cuda_tensor = tensor.to('cuda')
if hasattr(cuda_tensor, 'device'):
assert cuda_tensor.device.type in ['cuda', 'cpu'], "Device movement broken"
except RuntimeError:
# CUDA not available
assert True, "CUDA not available for tensor movement"
except ImportError:
assert True, "Tensor device movement not ready yet"
def test_multi_gpu_support(self):
"""Test multi-GPU support and parallelization."""
try:
from tinytorch.core.kernels import MultiGPUManager, data_parallel
# Multi-GPU manager
if 'MultiGPUManager' in locals():
gpu_manager = MultiGPUManager()
available_gpus = gpu_manager.get_gpu_count()
if available_gpus > 1:
# Test multi-GPU operations
assert available_gpus >= 2, "Multi-GPU testing requires 2+ GPUs"
# Should be able to manage multiple devices
devices = gpu_manager.get_device_list()
assert len(devices) == available_gpus, "GPU device list incorrect"
else:
# Single GPU or CPU only
assert True, "Multi-GPU not available, single device mode"
# Data parallel operations
if 'data_parallel' in locals():
# Test data parallel wrapper
from tinytorch.core.layers import Dense
model = Dense(10, 5)
parallel_model = data_parallel(model, device_ids=[0]) # Single device for testing
assert hasattr(parallel_model, 'forward'), "Data parallel wrapper broken"
except ImportError:
assert True, "Multi-GPU support not ready yet"
class TestRegressionPrevention:
"""Ensure previous modules still work after Module 13 development."""
def test_no_complete_system_regression(self):
"""Verify complete ML system (01→12) unchanged."""
# Core functionality should remain stable
assert sys.version_info.major >= 3, "Foundation: Python detection broken"
# Complete ML system should still work
try:
from tinytorch.core.tensor import Tensor
from tinytorch.core.layers import Dense
from tinytorch.core.optimizers import Adam
from tinytorch.core.training import Trainer
from tinytorch.core.compression import prune_weights
# All components should work together
model = Dense(8, 4)
optimizer = Adam(model.parameters(), lr=0.001)
trainer = Trainer(model, optimizer)
x = Tensor(np.random.randn(2, 8))
output = model(x)
assert output.shape == (2, 4), "System regression: Forward pass broken"
# Compression should still work
if 'prune_weights' in locals():
pruned = prune_weights(model.weights, sparsity=0.2)
assert pruned.shape == model.weights.shape, "System regression: Compression broken"
except ImportError:
import numpy as np
assert np.random is not None, "System regression: Basic functionality broken"
def test_no_efficiency_regression(self):
"""Verify efficiency features (11→12) unchanged."""
try:
from tinytorch.core.training import Trainer
from tinytorch.core.compression import quantize_weights
from tinytorch.core.optimizers import SGD
from tinytorch.core.layers import Dense
# Efficiency features should still work
model = Dense(6, 3)
optimizer = SGD(model.parameters(), lr=0.01)
trainer = Trainer(model, optimizer)
assert hasattr(trainer, 'train') or hasattr(trainer, 'fit'), "Efficiency regression: Training broken"
# Compression should still work
if 'quantize_weights' in locals():
quantized = quantize_weights(model.weights, bits=8)
assert quantized.shape == model.weights.shape, "Efficiency regression: Quantization broken"
except ImportError:
# Basic functionality should work
import numpy as np
assert np is not None, "Efficiency regression: Basic functionality broken"
def test_progressive_stability(self):
"""Test the progressive stack is stable through kernel optimization."""
# Stack should be stable through: Setup → ... → Compression → Kernels
# Setup level
import numpy as np
assert np is not None, "Setup level broken"
# Complete ML system level (if available)
try:
from tinytorch.core.tensor import Tensor
from tinytorch.core.layers import Dense
from tinytorch.core.optimizers import Adam
from tinytorch.core.training import Trainer
# Complete system should work
model = Dense(10, 5)
optimizer = Adam(model.parameters(), lr=0.001)
trainer = Trainer(model, optimizer)
x = Tensor(np.random.randn(3, 10))
output = model(x)
assert output.shape == (3, 5), "ML system level broken"
except ImportError:
pass # Not implemented yet
# Kernel optimization level (if available)
try:
from tinytorch.core.kernels import optimized_matmul
# Kernel optimizations should work with existing tensors
if 'optimized_matmul' in locals():
A = np.random.randn(20, 15)
B = np.random.randn(15, 10)
result = optimized_matmul(A, B)
assert result.shape == (20, 10), "Kernel optimization level broken"
else:
# Basic kernel concepts should work
assert True, "Basic kernel optimization ready"
except ImportError:
pass # Not implemented yet