Files
TinyTorch/tests/module_16/test_mlops_integration.py
Vijay Janapa Reddi 86b908fe5c Add TinyTorch examples gallery and fix module integration issues
- Create professional examples directory showcasing TinyTorch as real ML framework
- Add examples: XOR, MNIST, CIFAR-10, text generation, autograd demo, optimizer comparison
- Fix import paths in exported modules (training.py, dense.py)
- Update training module with autograd integration for loss functions
- Add progressive integration tests for all 16 modules
- Document framework capabilities and usage patterns

This commit establishes the examples gallery that demonstrates TinyTorch
works like PyTorch/TensorFlow, validating the complete framework.
2025-09-21 10:00:11 -04:00

383 lines
13 KiB
Python

"""
Integration Tests - MLOps Module
Tests real integration between MLOps pipeline and other TinyTorch modules.
Uses actual TinyTorch components to verify production monitoring works correctly.
"""
import pytest
import numpy as np
from test_utils import setup_integration_test
# Ensure proper setup before importing
setup_integration_test()
# Import ONLY from TinyTorch package
from tinytorch.core.tensor import Tensor
from tinytorch.core.layers import Dense
from tinytorch.core.networks import Sequential
from tinytorch.core.activations import ReLU, Softmax
from tinytorch.core.mlops import (
ModelMonitor, DriftDetector, RetrainingTrigger, MLOpsPipeline
)
class TestMLOpsIntegration:
"""Test real integration between MLOps pipeline and TinyTorch components."""
def test_model_monitor_with_real_models(self):
"""Test ModelMonitor works with real TinyTorch models."""
# Create real model
model = Sequential([
Dense(4, 8),
ReLU(),
Dense(8, 2),
Softmax()
])
# Create model monitor
monitor = ModelMonitor(model)
# Test data
test_data = [
(Tensor([1.0, 2.0, 3.0, 4.0]), Tensor([1])),
(Tensor([2.0, 3.0, 4.0, 5.0]), Tensor([0])),
(Tensor([3.0, 4.0, 5.0, 6.0]), Tensor([1]))
]
# Test monitoring
performance = monitor.track_performance(test_data)
# Verify integration
assert 'accuracy' in performance
assert 'loss' in performance
assert 'timestamp' in performance
assert 0.0 <= performance['accuracy'] <= 1.0
assert performance['loss'] >= 0.0
assert isinstance(performance['timestamp'], float)
def test_drift_detector_with_real_data(self):
"""Test DriftDetector works with real tensor data."""
# Create drift detector
detector = DriftDetector()
# Create baseline data
baseline_data = [Tensor([1.0, 2.0, 3.0]) for _ in range(10)]
detector.set_baseline(baseline_data)
# Test with similar data (no drift)
similar_data = [Tensor([1.1, 2.1, 3.1]) for _ in range(10)]
drift_result = detector.detect_drift(similar_data)
# Verify no drift detection
assert 'drift_detected' in drift_result
assert 'drift_score' in drift_result
assert 'threshold' in drift_result
assert isinstance(drift_result['drift_detected'], bool)
assert isinstance(drift_result['drift_score'], (int, float))
assert isinstance(drift_result['threshold'], (int, float))
def test_retraining_trigger_with_training_integration(self):
"""Test RetrainingTrigger works with training components."""
# Create simple model
model = Sequential([Dense(3, 2), Sigmoid()])
# Create training data
train_data = [
(Tensor([1.0, 2.0, 3.0]), Tensor([1])),
(Tensor([2.0, 3.0, 4.0]), Tensor([0])),
(Tensor([3.0, 4.0, 5.0]), Tensor([1]))
]
# Create retraining trigger
trigger = RetrainingTrigger(
model=model,
training_data=train_data,
performance_threshold=0.5
)
# Test trigger evaluation
should_retrain = trigger.should_retrain(current_accuracy=0.3)
# Verify trigger logic
assert isinstance(should_retrain, bool)
assert should_retrain == True # Accuracy below threshold
# Test with good performance
should_not_retrain = trigger.should_retrain(current_accuracy=0.8)
assert should_not_retrain == False # Accuracy above threshold
def test_mlops_pipeline_with_all_components(self):
"""Test complete MLOps pipeline with all TinyTorch components."""
# Create real model
model = Sequential([
Dense(4, 6),
ReLU(),
Dense(6, 2),
Softmax()
])
# Create datasets
train_data = [
(Tensor([1.0, 2.0, 3.0, 4.0]), Tensor([1])),
(Tensor([2.0, 3.0, 4.0, 5.0]), Tensor([0])),
(Tensor([3.0, 4.0, 5.0, 6.0]), Tensor([1]))
]
val_data = [
(Tensor([1.5, 2.5, 3.5, 4.5]), Tensor([1])),
(Tensor([2.5, 3.5, 4.5, 5.5]), Tensor([0]))
]
baseline_data = [Tensor([1.0, 2.0, 3.0, 4.0]) for _ in range(5)]
# Create MLOps pipeline
pipeline = MLOpsPipeline(
model=model,
training_data=train_data,
validation_data=val_data,
baseline_data=baseline_data
)
# Test system health check
new_data = [Tensor([1.2, 2.2, 3.2, 4.2]) for _ in range(3)]
health = pipeline.check_system_health(new_data, current_accuracy=0.7)
# Verify complete pipeline integration
assert 'model_performance' in health
assert 'drift_status' in health
assert 'retraining_needed' in health
assert 'system_status' in health
# Check data types
assert isinstance(health['model_performance'], dict)
assert isinstance(health['drift_status'], dict)
assert isinstance(health['retraining_needed'], bool)
assert isinstance(health['system_status'], str)
class TestMLOpsWithBenchmarking:
"""Test MLOps integration with benchmarking framework."""
def test_mlops_with_performance_benchmarking(self):
"""Test MLOps pipeline with performance benchmarking."""
# Create model
model = Sequential([Dense(4, 2), ReLU()])
# Create MLOps pipeline
train_data = [(Tensor([1.0, 2.0, 3.0, 4.0]), Tensor([1]))]
val_data = [(Tensor([2.0, 3.0, 4.0, 5.0]), Tensor([0]))]
baseline_data = [Tensor([1.0, 2.0, 3.0, 4.0])]
pipeline = MLOpsPipeline(
model=model,
training_data=train_data,
validation_data=val_data,
baseline_data=baseline_data
)
# Test with benchmarking
perf = TinyTorchPerf()
perf.set_model(model)
test_data = [Tensor([1.0, 2.0, 3.0, 4.0]) for _ in range(5)]
perf.set_dataset(test_data)
# Run benchmark
results = perf.run_single_stream(num_queries=3)
# Test MLOps health check
health = pipeline.check_system_health(test_data, current_accuracy=0.8)
# Verify benchmarking + MLOps integration
assert hasattr(results, 'latency')
assert hasattr(results, 'throughput')
assert len(results.latency) == 3
assert 'system_status' in health
assert 'model_performance' in health
assert health['system_status'] in ['healthy', 'degraded', 'critical']
def test_mlops_performance_monitoring_integration(self):
"""Test MLOps performance monitoring with benchmarking."""
# Create model
model = Sequential([Dense(3, 2), Sigmoid()])
# Create monitor
monitor = ModelMonitor(model)
# Test data
test_data = [
(Tensor([1.0, 2.0, 3.0]), Tensor([1])),
(Tensor([2.0, 3.0, 4.0]), Tensor([0]))
]
# Monitor performance
performance = monitor.track_performance(test_data)
# Test with benchmarking
perf = TinyTorchPerf()
perf.set_model(model)
inference_data = [Tensor([1.0, 2.0, 3.0]) for _ in range(4)]
perf.set_dataset(inference_data)
benchmark_results = perf.run_single_stream(num_queries=4)
# Verify monitoring + benchmarking integration
assert 'accuracy' in performance
assert 'loss' in performance
assert hasattr(benchmark_results, 'latency')
assert hasattr(benchmark_results, 'throughput')
# Both should work with the same model
assert len(benchmark_results.latency) == 4
assert 0.0 <= performance['accuracy'] <= 1.0
class TestMLOpsWithNetworks:
"""Test MLOps integration with different network architectures."""
def test_mlops_with_different_network_architectures(self):
"""Test MLOps pipeline with different network types."""
# Test with different architectures
networks = [
Sequential([Dense(4, 2)]), # Simple network
Sequential([Dense(4, 8), ReLU(), Dense(8, 2)]), # Deep network
Sequential([Dense(4, 4), ReLU(), Dense(4, 2), Softmax()]) # With softmax
]
for i, network in enumerate(networks):
# Create MLOps pipeline
train_data = [(Tensor([1.0, 2.0, 3.0, 4.0]), Tensor([1]))]
val_data = [(Tensor([2.0, 3.0, 4.0, 5.0]), Tensor([0]))]
baseline_data = [Tensor([1.0, 2.0, 3.0, 4.0])]
pipeline = MLOpsPipeline(
model=network,
training_data=train_data,
validation_data=val_data,
baseline_data=baseline_data
)
# Test system health
new_data = [Tensor([1.5, 2.5, 3.5, 4.5])]
health = pipeline.check_system_health(new_data, current_accuracy=0.7)
# Verify each architecture works
assert 'system_status' in health
assert 'model_performance' in health
assert health['system_status'] in ['healthy', 'degraded', 'critical']
def test_mlops_scalability_with_network_complexity(self):
"""Test MLOps pipeline scales with network complexity."""
# Create networks of different sizes
small_network = Sequential([Dense(2, 2)])
large_network = Sequential([
Dense(8, 16), ReLU(),
Dense(16, 8), ReLU(),
Dense(8, 2)
])
for network in [small_network, large_network]:
# Create monitor
monitor = ModelMonitor(network)
# Test data (adjust size for network)
input_size = 2 if network == small_network else 8
test_data = [
(Tensor(np.random.randn(input_size).tolist()), Tensor([1])),
(Tensor(np.random.randn(input_size).tolist()), Tensor([0]))
]
# Monitor performance
performance = monitor.track_performance(test_data)
# Verify monitoring works regardless of network size
assert 'accuracy' in performance
assert 'loss' in performance
assert 0.0 <= performance['accuracy'] <= 1.0
assert performance['loss'] >= 0.0
def test_integration_summary():
"""Summary test demonstrating complete MLOps integration."""
print("🎯 Integration Summary: MLOps ↔ TinyTorch Components")
print("=" * 60)
# Create comprehensive test
print("🏗️ Testing complete MLOps integration...")
# Test 1: Create model with multiple components
model = Sequential([
Dense(6, 12),
ReLU(),
Dense(12, 6),
ReLU(),
Dense(6, 2),
Softmax()
])
# Test 2: Create datasets
train_data = [
(Tensor(np.random.randn(6).tolist()), Tensor([1])),
(Tensor(np.random.randn(6).tolist()), Tensor([0])),
(Tensor(np.random.randn(6).tolist()), Tensor([1]))
]
val_data = [
(Tensor(np.random.randn(6).tolist()), Tensor([1])),
(Tensor(np.random.randn(6).tolist()), Tensor([0]))
]
baseline_data = [Tensor(np.random.randn(6).tolist()) for _ in range(5)]
# Test 3: Create complete MLOps pipeline
pipeline = MLOpsPipeline(
model=model,
training_data=train_data,
validation_data=val_data,
baseline_data=baseline_data
)
# Test 4: Test system health monitoring
new_data = [Tensor(np.random.randn(6).tolist()) for _ in range(3)]
health = pipeline.check_system_health(new_data, current_accuracy=0.75)
# Test 5: Test individual components
monitor = ModelMonitor(model)
performance = monitor.track_performance(val_data)
detector = DriftDetector()
detector.set_baseline(baseline_data)
drift_result = detector.detect_drift(new_data)
# Test 6: Test with benchmarking
perf = TinyTorchPerf()
perf.set_model(model)
perf.set_dataset(new_data)
benchmark_results = perf.run_single_stream(num_queries=3)
# Verify complete integration
assert 'system_status' in health
assert 'model_performance' in health
assert 'drift_status' in health
assert 'retraining_needed' in health
assert 'accuracy' in performance
assert 'loss' in performance
assert 'drift_detected' in drift_result
assert 'drift_score' in drift_result
assert hasattr(benchmark_results, 'latency')
assert hasattr(benchmark_results, 'throughput')
print("✅ MLOps integration successful!")
print(f" System status: {health['system_status']}")
print(f" Model accuracy: {performance['accuracy']:.3f}")
print(f" Drift detected: {drift_result['drift_detected']}")
print(f" Retraining needed: {health['retraining_needed']}")
print(f" Benchmark latency: {len(benchmark_results.latency)} measurements")
print(" Components: All TinyTorch modules → MLOps → Production System")
print("🎉 Complete production ML system ready for deployment!")