mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-05-01 08:06:19 -05:00
- Create professional examples directory showcasing TinyTorch as real ML framework - Add examples: XOR, MNIST, CIFAR-10, text generation, autograd demo, optimizer comparison - Fix import paths in exported modules (training.py, dense.py) - Update training module with autograd integration for loss functions - Add progressive integration tests for all 16 modules - Document framework capabilities and usage patterns This commit establishes the examples gallery that demonstrates TinyTorch works like PyTorch/TensorFlow, validating the complete framework.
285 lines
10 KiB
Python
285 lines
10 KiB
Python
"""
|
|
Integration Tests - Benchmarking Module
|
|
|
|
Tests real integration between benchmarking framework and other TinyTorch modules.
|
|
Uses actual TinyTorch components to verify systematic evaluation works correctly.
|
|
"""
|
|
|
|
import pytest
|
|
import numpy as np
|
|
from test_utils import setup_integration_test
|
|
|
|
# Ensure proper setup before importing
|
|
setup_integration_test()
|
|
|
|
# Import ONLY from TinyTorch package
|
|
from tinytorch.core.tensor import Tensor
|
|
from tinytorch.core.activations import ReLU, Sigmoid, Softmax
|
|
from tinytorch.core.layers import Dense
|
|
from tinytorch.core.networks import Sequential
|
|
from tinytorch.core.kernels import vectorized_relu
|
|
from tinytorch.core.benchmarking import BenchmarkScenarios, StatisticalValidator, TinyTorchPerf
|
|
|
|
|
|
class TestBenchmarkingIntegration:
|
|
"""Test real integration between benchmarking framework and TinyTorch components."""
|
|
|
|
def test_benchmark_scenarios_with_real_model(self):
|
|
"""Test BenchmarkScenarios works with real TinyTorch models."""
|
|
# Create real model
|
|
model = Sequential([
|
|
Dense(4, 8),
|
|
ReLU(),
|
|
Dense(8, 2),
|
|
Softmax()
|
|
])
|
|
|
|
# Create benchmark scenarios
|
|
scenarios = BenchmarkScenarios()
|
|
|
|
# Create simple dataset
|
|
dataset = [Tensor(np.random.randn(4).tolist()) for _ in range(10)]
|
|
|
|
# Test single stream scenario
|
|
results = scenarios.single_stream(model, dataset, num_queries=5)
|
|
|
|
# Verify integration
|
|
assert hasattr(results, 'latency')
|
|
assert hasattr(results, 'throughput')
|
|
assert hasattr(results, 'accuracy')
|
|
assert len(results.latency) == 5
|
|
assert results.throughput > 0
|
|
assert 0.0 <= results.accuracy <= 1.0
|
|
|
|
def test_statistical_validator_with_benchmark_results(self):
|
|
"""Test StatisticalValidator works with benchmark results."""
|
|
# Create validator
|
|
validator = StatisticalValidator()
|
|
|
|
# Create sample benchmark results
|
|
results_a = [0.01, 0.012, 0.011, 0.013, 0.009]
|
|
results_b = [0.015, 0.017, 0.016, 0.018, 0.014]
|
|
|
|
# Test statistical validation
|
|
stats = validator.validate_comparison(results_a, results_b)
|
|
|
|
# Verify statistical analysis
|
|
assert hasattr(stats, 'significant')
|
|
assert hasattr(stats, 'p_value')
|
|
assert hasattr(stats, 'effect_size')
|
|
assert hasattr(stats, 'recommendation')
|
|
|
|
# Verify reasonable values
|
|
assert isinstance(stats.significant, bool)
|
|
assert stats.p_value >= 0.0
|
|
assert isinstance(stats.effect_size, (int, float))
|
|
assert isinstance(stats.recommendation, str)
|
|
|
|
def test_tinytorch_perf_with_basic_models(self):
|
|
"""Test TinyTorchPerf framework with basic models."""
|
|
# Create real model
|
|
model = Sequential([Dense(10, 5), ReLU(), Dense(5, 2)])
|
|
|
|
# Create benchmarking framework
|
|
perf = TinyTorchPerf()
|
|
perf.set_model(model)
|
|
|
|
# Create dataset
|
|
dataset = [Tensor(np.random.randn(10).tolist()) for _ in range(8)]
|
|
perf.set_dataset(dataset)
|
|
|
|
# Test benchmarking
|
|
results = perf.run_single_stream(num_queries=5)
|
|
|
|
# Verify basic benchmarking integration
|
|
assert hasattr(results, 'latency')
|
|
assert hasattr(results, 'throughput')
|
|
assert hasattr(results, 'accuracy')
|
|
assert len(results.latency) == 5
|
|
assert results.throughput > 0
|
|
|
|
|
|
class TestBenchmarkingWithKernels:
|
|
"""Test benchmarking integration with optimized kernels."""
|
|
|
|
def test_benchmarking_kernel_optimized_operations(self):
|
|
"""Test benchmarking framework with kernel-optimized operations."""
|
|
# Create model using kernel operations
|
|
def kernel_model(x):
|
|
# Use kernel operations in model
|
|
return vectorized_relu(x)
|
|
|
|
# Create benchmarking framework
|
|
perf = TinyTorchPerf()
|
|
perf.set_model(kernel_model)
|
|
|
|
# Create dataset
|
|
dataset = [Tensor(np.random.randn(5).tolist()) for _ in range(8)]
|
|
perf.set_dataset(dataset)
|
|
|
|
# Benchmark kernel operations
|
|
results = perf.run_single_stream(num_queries=6)
|
|
|
|
# Verify kernel + benchmarking integration
|
|
assert hasattr(results, 'latency')
|
|
assert hasattr(results, 'throughput')
|
|
assert len(results.latency) == 6
|
|
assert results.throughput > 0
|
|
|
|
def test_performance_comparison_with_kernels(self):
|
|
"""Test performance comparison between standard and kernel operations."""
|
|
# Create standard model
|
|
standard_model = Sequential([Dense(4, 4), ReLU()])
|
|
|
|
# Create dataset
|
|
dataset = [Tensor(np.random.randn(4).tolist()) for _ in range(10)]
|
|
|
|
# Benchmark standard model
|
|
perf_standard = TinyTorchPerf()
|
|
perf_standard.set_model(standard_model)
|
|
perf_standard.set_dataset(dataset)
|
|
|
|
standard_results = perf_standard.run_single_stream(num_queries=5)
|
|
|
|
# Verify we can benchmark different implementations
|
|
assert hasattr(standard_results, 'latency')
|
|
assert hasattr(standard_results, 'throughput')
|
|
assert len(standard_results.latency) == 5
|
|
|
|
# Test that benchmarking framework can handle different model types
|
|
def kernel_relu_model(x):
|
|
return vectorized_relu(x)
|
|
|
|
perf_kernel = TinyTorchPerf()
|
|
perf_kernel.set_model(kernel_relu_model)
|
|
perf_kernel.set_dataset(dataset)
|
|
|
|
kernel_results = perf_kernel.run_single_stream(num_queries=5)
|
|
|
|
assert hasattr(kernel_results, 'latency')
|
|
assert hasattr(kernel_results, 'throughput')
|
|
assert len(kernel_results.latency) == 5
|
|
|
|
|
|
class TestBenchmarkingWithNetworks:
|
|
"""Test benchmarking framework with neural networks."""
|
|
|
|
def test_benchmarking_sequential_networks(self):
|
|
"""Test benchmarking with Sequential networks."""
|
|
# Create realistic network
|
|
network = Sequential([
|
|
Dense(8, 16),
|
|
ReLU(),
|
|
Dense(16, 8),
|
|
ReLU(),
|
|
Dense(8, 3),
|
|
Softmax()
|
|
])
|
|
|
|
# Create benchmarking framework
|
|
perf = TinyTorchPerf()
|
|
perf.set_model(network)
|
|
|
|
# Create dataset
|
|
dataset = [Tensor(np.random.randn(8).tolist()) for _ in range(12)]
|
|
perf.set_dataset(dataset)
|
|
|
|
# Test all benchmark scenarios
|
|
single_stream = perf.run_single_stream(num_queries=6)
|
|
server_results = perf.run_server(target_qps=10.0, duration=2.0)
|
|
offline_results = perf.run_offline(batch_size=4)
|
|
|
|
# Verify all scenarios work
|
|
for results in [single_stream, server_results, offline_results]:
|
|
assert hasattr(results, 'latency')
|
|
assert hasattr(results, 'throughput')
|
|
assert hasattr(results, 'accuracy')
|
|
assert len(results.latency) > 0
|
|
assert results.throughput > 0
|
|
|
|
def test_benchmarking_with_different_network_sizes(self):
|
|
"""Test benchmarking scales with network complexity."""
|
|
# Create small network
|
|
small_network = Sequential([Dense(4, 2)])
|
|
|
|
# Create large network
|
|
large_network = Sequential([
|
|
Dense(4, 32),
|
|
ReLU(),
|
|
Dense(32, 16),
|
|
ReLU(),
|
|
Dense(16, 2)
|
|
])
|
|
|
|
# Create dataset
|
|
dataset = [Tensor(np.random.randn(4).tolist()) for _ in range(10)]
|
|
|
|
# Benchmark both networks
|
|
for network in [small_network, large_network]:
|
|
perf = TinyTorchPerf()
|
|
perf.set_model(network)
|
|
perf.set_dataset(dataset)
|
|
|
|
results = perf.run_single_stream(num_queries=5)
|
|
|
|
# Verify benchmarking works regardless of network size
|
|
assert hasattr(results, 'latency')
|
|
assert hasattr(results, 'throughput')
|
|
assert len(results.latency) == 5
|
|
assert results.throughput > 0
|
|
|
|
|
|
def test_integration_summary():
|
|
"""Summary test demonstrating complete benchmarking integration."""
|
|
print("🎯 Integration Summary: Benchmarking ↔ TinyTorch Components")
|
|
print("=" * 60)
|
|
|
|
# Create comprehensive test
|
|
print("🏗️ Testing benchmarking integration...")
|
|
|
|
# Test 1: Create model with multiple components
|
|
model = Sequential([
|
|
Dense(6, 12),
|
|
ReLU(),
|
|
Dense(12, 8),
|
|
ReLU(),
|
|
Dense(8, 3),
|
|
Softmax()
|
|
])
|
|
|
|
# Test 2: Create benchmarking framework
|
|
perf = TinyTorchPerf()
|
|
perf.set_model(model)
|
|
|
|
# Test 3: Create dataset
|
|
dataset = [Tensor(np.random.randn(6).tolist()) for _ in range(15)]
|
|
perf.set_dataset(dataset)
|
|
|
|
# Test 4: Run comprehensive benchmarking
|
|
single_stream = perf.run_single_stream(num_queries=8)
|
|
server_results = perf.run_server(target_qps=10.0, duration=2.0)
|
|
offline_results = perf.run_offline(batch_size=5)
|
|
|
|
# Test 5: Statistical validation
|
|
validator = StatisticalValidator()
|
|
|
|
# Create comparison data
|
|
results_a = single_stream.latency[:5]
|
|
results_b = [x * 1.1 for x in results_a] # Slightly slower
|
|
|
|
stats = validator.validate_comparison(results_a, results_b)
|
|
|
|
# Verify complete integration
|
|
assert hasattr(single_stream, 'latency')
|
|
assert hasattr(server_results, 'throughput')
|
|
assert hasattr(offline_results, 'accuracy')
|
|
assert hasattr(stats, 'significant')
|
|
assert hasattr(stats, 'recommendation')
|
|
|
|
print("✅ Benchmarking integration successful!")
|
|
print(f" Single stream queries: {len(single_stream.latency)}")
|
|
print(f" Server throughput: {server_results.throughput:.1f} QPS")
|
|
print(f" Offline accuracy: {offline_results.accuracy:.3f}")
|
|
print(f" Statistical comparison: {stats.recommendation}")
|
|
print(" Components: Networks → Layers → Activations → Tensors → Benchmarking")
|
|
print("🎉 Systematic ML performance evaluation ready for production!") |