Files
TinyTorch/tests/module_15/test_benchmarking_integration.py
Vijay Janapa Reddi 86b908fe5c Add TinyTorch examples gallery and fix module integration issues
- Create professional examples directory showcasing TinyTorch as real ML framework
- Add examples: XOR, MNIST, CIFAR-10, text generation, autograd demo, optimizer comparison
- Fix import paths in exported modules (training.py, dense.py)
- Update training module with autograd integration for loss functions
- Add progressive integration tests for all 16 modules
- Document framework capabilities and usage patterns

This commit establishes the examples gallery that demonstrates TinyTorch
works like PyTorch/TensorFlow, validating the complete framework.
2025-09-21 10:00:11 -04:00

285 lines
10 KiB
Python

"""
Integration Tests - Benchmarking Module
Tests real integration between benchmarking framework and other TinyTorch modules.
Uses actual TinyTorch components to verify systematic evaluation works correctly.
"""
import pytest
import numpy as np
from test_utils import setup_integration_test
# Ensure proper setup before importing
setup_integration_test()
# Import ONLY from TinyTorch package
from tinytorch.core.tensor import Tensor
from tinytorch.core.activations import ReLU, Sigmoid, Softmax
from tinytorch.core.layers import Dense
from tinytorch.core.networks import Sequential
from tinytorch.core.kernels import vectorized_relu
from tinytorch.core.benchmarking import BenchmarkScenarios, StatisticalValidator, TinyTorchPerf
class TestBenchmarkingIntegration:
"""Test real integration between benchmarking framework and TinyTorch components."""
def test_benchmark_scenarios_with_real_model(self):
"""Test BenchmarkScenarios works with real TinyTorch models."""
# Create real model
model = Sequential([
Dense(4, 8),
ReLU(),
Dense(8, 2),
Softmax()
])
# Create benchmark scenarios
scenarios = BenchmarkScenarios()
# Create simple dataset
dataset = [Tensor(np.random.randn(4).tolist()) for _ in range(10)]
# Test single stream scenario
results = scenarios.single_stream(model, dataset, num_queries=5)
# Verify integration
assert hasattr(results, 'latency')
assert hasattr(results, 'throughput')
assert hasattr(results, 'accuracy')
assert len(results.latency) == 5
assert results.throughput > 0
assert 0.0 <= results.accuracy <= 1.0
def test_statistical_validator_with_benchmark_results(self):
"""Test StatisticalValidator works with benchmark results."""
# Create validator
validator = StatisticalValidator()
# Create sample benchmark results
results_a = [0.01, 0.012, 0.011, 0.013, 0.009]
results_b = [0.015, 0.017, 0.016, 0.018, 0.014]
# Test statistical validation
stats = validator.validate_comparison(results_a, results_b)
# Verify statistical analysis
assert hasattr(stats, 'significant')
assert hasattr(stats, 'p_value')
assert hasattr(stats, 'effect_size')
assert hasattr(stats, 'recommendation')
# Verify reasonable values
assert isinstance(stats.significant, bool)
assert stats.p_value >= 0.0
assert isinstance(stats.effect_size, (int, float))
assert isinstance(stats.recommendation, str)
def test_tinytorch_perf_with_basic_models(self):
"""Test TinyTorchPerf framework with basic models."""
# Create real model
model = Sequential([Dense(10, 5), ReLU(), Dense(5, 2)])
# Create benchmarking framework
perf = TinyTorchPerf()
perf.set_model(model)
# Create dataset
dataset = [Tensor(np.random.randn(10).tolist()) for _ in range(8)]
perf.set_dataset(dataset)
# Test benchmarking
results = perf.run_single_stream(num_queries=5)
# Verify basic benchmarking integration
assert hasattr(results, 'latency')
assert hasattr(results, 'throughput')
assert hasattr(results, 'accuracy')
assert len(results.latency) == 5
assert results.throughput > 0
class TestBenchmarkingWithKernels:
"""Test benchmarking integration with optimized kernels."""
def test_benchmarking_kernel_optimized_operations(self):
"""Test benchmarking framework with kernel-optimized operations."""
# Create model using kernel operations
def kernel_model(x):
# Use kernel operations in model
return vectorized_relu(x)
# Create benchmarking framework
perf = TinyTorchPerf()
perf.set_model(kernel_model)
# Create dataset
dataset = [Tensor(np.random.randn(5).tolist()) for _ in range(8)]
perf.set_dataset(dataset)
# Benchmark kernel operations
results = perf.run_single_stream(num_queries=6)
# Verify kernel + benchmarking integration
assert hasattr(results, 'latency')
assert hasattr(results, 'throughput')
assert len(results.latency) == 6
assert results.throughput > 0
def test_performance_comparison_with_kernels(self):
"""Test performance comparison between standard and kernel operations."""
# Create standard model
standard_model = Sequential([Dense(4, 4), ReLU()])
# Create dataset
dataset = [Tensor(np.random.randn(4).tolist()) for _ in range(10)]
# Benchmark standard model
perf_standard = TinyTorchPerf()
perf_standard.set_model(standard_model)
perf_standard.set_dataset(dataset)
standard_results = perf_standard.run_single_stream(num_queries=5)
# Verify we can benchmark different implementations
assert hasattr(standard_results, 'latency')
assert hasattr(standard_results, 'throughput')
assert len(standard_results.latency) == 5
# Test that benchmarking framework can handle different model types
def kernel_relu_model(x):
return vectorized_relu(x)
perf_kernel = TinyTorchPerf()
perf_kernel.set_model(kernel_relu_model)
perf_kernel.set_dataset(dataset)
kernel_results = perf_kernel.run_single_stream(num_queries=5)
assert hasattr(kernel_results, 'latency')
assert hasattr(kernel_results, 'throughput')
assert len(kernel_results.latency) == 5
class TestBenchmarkingWithNetworks:
"""Test benchmarking framework with neural networks."""
def test_benchmarking_sequential_networks(self):
"""Test benchmarking with Sequential networks."""
# Create realistic network
network = Sequential([
Dense(8, 16),
ReLU(),
Dense(16, 8),
ReLU(),
Dense(8, 3),
Softmax()
])
# Create benchmarking framework
perf = TinyTorchPerf()
perf.set_model(network)
# Create dataset
dataset = [Tensor(np.random.randn(8).tolist()) for _ in range(12)]
perf.set_dataset(dataset)
# Test all benchmark scenarios
single_stream = perf.run_single_stream(num_queries=6)
server_results = perf.run_server(target_qps=10.0, duration=2.0)
offline_results = perf.run_offline(batch_size=4)
# Verify all scenarios work
for results in [single_stream, server_results, offline_results]:
assert hasattr(results, 'latency')
assert hasattr(results, 'throughput')
assert hasattr(results, 'accuracy')
assert len(results.latency) > 0
assert results.throughput > 0
def test_benchmarking_with_different_network_sizes(self):
"""Test benchmarking scales with network complexity."""
# Create small network
small_network = Sequential([Dense(4, 2)])
# Create large network
large_network = Sequential([
Dense(4, 32),
ReLU(),
Dense(32, 16),
ReLU(),
Dense(16, 2)
])
# Create dataset
dataset = [Tensor(np.random.randn(4).tolist()) for _ in range(10)]
# Benchmark both networks
for network in [small_network, large_network]:
perf = TinyTorchPerf()
perf.set_model(network)
perf.set_dataset(dataset)
results = perf.run_single_stream(num_queries=5)
# Verify benchmarking works regardless of network size
assert hasattr(results, 'latency')
assert hasattr(results, 'throughput')
assert len(results.latency) == 5
assert results.throughput > 0
def test_integration_summary():
"""Summary test demonstrating complete benchmarking integration."""
print("🎯 Integration Summary: Benchmarking ↔ TinyTorch Components")
print("=" * 60)
# Create comprehensive test
print("🏗️ Testing benchmarking integration...")
# Test 1: Create model with multiple components
model = Sequential([
Dense(6, 12),
ReLU(),
Dense(12, 8),
ReLU(),
Dense(8, 3),
Softmax()
])
# Test 2: Create benchmarking framework
perf = TinyTorchPerf()
perf.set_model(model)
# Test 3: Create dataset
dataset = [Tensor(np.random.randn(6).tolist()) for _ in range(15)]
perf.set_dataset(dataset)
# Test 4: Run comprehensive benchmarking
single_stream = perf.run_single_stream(num_queries=8)
server_results = perf.run_server(target_qps=10.0, duration=2.0)
offline_results = perf.run_offline(batch_size=5)
# Test 5: Statistical validation
validator = StatisticalValidator()
# Create comparison data
results_a = single_stream.latency[:5]
results_b = [x * 1.1 for x in results_a] # Slightly slower
stats = validator.validate_comparison(results_a, results_b)
# Verify complete integration
assert hasattr(single_stream, 'latency')
assert hasattr(server_results, 'throughput')
assert hasattr(offline_results, 'accuracy')
assert hasattr(stats, 'significant')
assert hasattr(stats, 'recommendation')
print("✅ Benchmarking integration successful!")
print(f" Single stream queries: {len(single_stream.latency)}")
print(f" Server throughput: {server_results.throughput:.1f} QPS")
print(f" Offline accuracy: {offline_results.accuracy:.3f}")
print(f" Statistical comparison: {stats.recommendation}")
print(" Components: Networks → Layers → Activations → Tensors → Benchmarking")
print("🎉 Systematic ML performance evaluation ready for production!")