""" Module 15: Progressive Integration Tests Tests that Module 15 (MLOps) works correctly AND that the entire TinyTorch system (01→14) still works. DEPENDENCY CHAIN: 01_setup → ... → 14_benchmarking → 15_mlops This is where we enable production deployment, monitoring, and lifecycle management for ML systems. šŸŽÆ WHAT THIS TESTS: - Module 15: Production deployment, model monitoring, lifecycle management, CI/CD for ML - Integration: MLOps works with complete ML pipeline (models, training, benchmarking) - Regression: Entire TinyTorch system (01→14) still works correctly - Preparation: Ready for capstone (Module 16: Complete ML systems) šŸ’” FOR STUDENTS: If tests fail, check: 1. Does your ModelMonitor class exist in tinytorch.core.mlops? 2. Can you deploy models with monitoring and logging? 3. Do production pipelines work with real data workflows? 4. Are monitoring metrics meaningful for production decisions? šŸ”§ DEBUGGING HELP: - MLOps includes: model versioning, deployment, monitoring, rollback, A/B testing - Monitoring tracks: accuracy drift, latency, throughput, errors, resource usage - Deployment enables: auto-scaling, load balancing, health checks, graceful updates """ import numpy as np import sys import time from pathlib import Path # Add project root to path sys.path.insert(0, str(Path(__file__).parent.parent.parent)) class TestCompleteTinyTorchSystemStillWorks: """ šŸ”„ REGRESSION CHECK: Verify complete TinyTorch system (01→14) still works after MLOps development. šŸ’” If these fail: You may have broken something in the core system while implementing MLOps. šŸ”§ Fix: Check that MLOps code doesn't interfere with core ML functionality. """ def test_complete_ml_system_stable(self): """ āœ… TEST: Complete TinyTorch system (all modules 01→14) should still work šŸ“‹ COMPLETE SYSTEM COMPONENTS: - Foundation: Setup, tensors, activations, layers - Networks: Dense networks, spatial operations, attention - Training: Data loading, autograd, optimizers, training loops - Production: Compression, kernels, benchmarking 🚨 IF FAILS: Core TinyTorch system broken by MLOps development """ try: # Test that complete TinyTorch system still works from tinytorch.core.tensor import Tensor from tinytorch.core.spatial import Conv2D, MaxPool2D from tinytorch.core.attention import MultiHeadAttention from tinytorch.core.layers import Dense from tinytorch.core.activations import ReLU, Softmax from tinytorch.core.optimizers import Adam from tinytorch.core.training import Trainer from tinytorch.core.data import Dataset, DataLoader from tinytorch.core.compression import prune_weights from tinytorch.core.benchmarking import benchmark_model # Create sophisticated ML system (Vision + Language) class MultiModalSystem: def __init__(self): # Vision pathway self.vision_conv = Conv2D(3, 64, kernel_size=3, padding=1) self.vision_pool = MaxPool2D(kernel_size=2) self.vision_proj = Dense(64 * 16 * 16, 256) # Language pathway self.language_embed = Dense(1000, 256) # vocab_size=1000 self.attention = MultiHeadAttention(embed_dim=256, num_heads=8) # Fusion self.fusion = Dense(512, 128) self.classifier = Dense(128, 10) # Activations self.relu = ReLU() self.softmax = Softmax() def __call__(self, vision_input, language_input): # Vision processing vis_feat = self.relu(self.vision_conv(vision_input)) vis_pooled = self.vision_pool(vis_feat) vis_flat = Tensor(vis_pooled.data.reshape(vis_pooled.shape[0], -1)) vis_embed = self.vision_proj(vis_flat) # Language processing lang_embed = self.language_embed(language_input) lang_attn = self.attention(lang_embed.data.reshape(1, -1, 256)) lang_feat = Tensor(lang_attn.data.reshape(lang_embed.shape[0], -1)) # Multimodal fusion combined_data = np.concatenate([vis_embed.data, lang_feat.data], axis=1) combined = Tensor(combined_data) # Classification fused = self.relu(self.fusion(combined)) logits = self.classifier(fused) return self.softmax(logits) def parameters(self): params = [] layers = [self.vision_conv, self.vision_proj, self.language_embed, self.fusion, self.classifier] for layer in layers: if hasattr(layer, 'parameters'): params.extend(layer.parameters()) elif hasattr(layer, 'weights'): params.append(layer.weights) if hasattr(layer, 'bias') and layer.bias is not None: params.append(layer.bias) return params # Test complete system system = MultiModalSystem() # Test data vision_data = Tensor(np.random.randn(2, 3, 32, 32)) language_data = Tensor(np.random.randint(0, 1000, (2, 50))) # Test forward pass predictions = system(vision_data, language_data) assert predictions.shape == (2, 10), \ f"āŒ Complete system shape broken. Expected (2, 10), got {predictions.shape}" # Test training components optimizer = Adam(system.parameters(), lr=0.001) assert hasattr(optimizer, 'step'), "āŒ Training components broken" # Test compression if 'prune_weights' in locals(): original_weights = system.vision_conv.weights.data.copy() pruned = prune_weights(system.vision_conv.weights, sparsity=0.2) assert pruned.shape == original_weights.shape, "āŒ Compression broken" # Test benchmarking if 'benchmark_model' in locals(): # Simplified benchmark for vision pathway benchmark_results = benchmark_model(system.vision_conv, (2, 3, 32, 32)) assert 'latency' in benchmark_results, "āŒ Benchmarking broken" except ImportError as e: assert False, f""" āŒ COMPLETE TINYTORCH SYSTEM IMPORTS BROKEN! šŸ” IMPORT ERROR: {str(e)} šŸ”§ COMPLETE SYSTEM REQUIREMENTS: ALL modules (01→14) must be working perfectly: Foundation (01-05): āœ… Setup environment and tools āœ… Tensor operations and mathematics āœ… Activation functions and non-linearity āœ… Layer infrastructure and inheritance āœ… Dense networks and neural architectures Advanced ML (06-08): āœ… Spatial processing and computer vision āœ… Attention mechanisms and transformers āœ… Data loading and preprocessing pipelines Training Infrastructure (09-11): āœ… Automatic differentiation and gradients āœ… Optimization algorithms (SGD, Adam) āœ… Training loops and learning coordination Production Systems (12-14): āœ… Model compression and efficiency āœ… Performance kernels and acceleration āœ… Benchmarking and performance analysis šŸ’” SYSTEM INTEGRITY: MLOps should be PURELY ADDITIVE - it adds deployment and monitoring but doesn't break any existing ML functionality. """ except Exception as e: assert False, f""" āŒ COMPLETE TINYTORCH SYSTEM FUNCTIONALITY BROKEN! šŸ” ERROR: {str(e)} šŸ”§ SYSTEM STABILITY REQUIREMENTS: 1. All forward passes work correctly 2. Training components remain functional 3. Advanced architectures still integrate 4. Performance tools remain operational 5. No interference from MLOps code šŸ’” PRODUCTION READINESS: The complete TinyTorch system must work flawlessly because MLOps will deploy and monitor these models in production environments where reliability is critical. 🚨 CRITICAL ISSUE: If the core ML system is broken, MLOps cannot deploy reliable models to production! """ def test_benchmarking_and_optimization_stable(self): """ āœ… TEST: Performance benchmarking and optimization should still work šŸ“‹ PERFORMANCE SYSTEM: - Model benchmarking and profiling - Performance comparison tools - Hardware analysis and optimization - Training and inference analysis šŸŽÆ MLOps needs performance data for production decisions """ try: from tinytorch.core.benchmarking import benchmark_model from tinytorch.core.layers import Dense from tinytorch.core.spatial import Conv2D from tinytorch.core.tensor import Tensor # Test that benchmarking still works models_to_benchmark = [ ("dense_model", Dense(100, 50)), ("conv_model", Conv2D(3, 16, kernel_size=3)) ] benchmark_results = {} for model_name, model in models_to_benchmark: if model_name == "dense_model": input_shape = (16, 100) else: # conv_model input_shape = (4, 3, 32, 32) # Test benchmarking results = benchmark_model(model, input_shape) benchmark_results[model_name] = results # Verify benchmark structure assert 'latency' in results, f"āŒ Benchmarking broken for {model_name}" assert 'throughput' in results, f"āŒ Benchmarking broken for {model_name}" assert results['latency'] > 0, f"āŒ Invalid latency for {model_name}" assert results['throughput'] > 0, f"āŒ Invalid throughput for {model_name}" # Verify performance comparison works dense_perf = benchmark_results["dense_model"] conv_perf = benchmark_results["conv_model"] # Should have different performance characteristics assert dense_perf['latency'] != conv_perf['latency'], \ "āŒ Performance comparison broken - models show identical performance" except Exception as e: assert False, f""" āŒ BENCHMARKING AND OPTIMIZATION BROKEN! šŸ” ERROR: {str(e)} šŸ”§ PERFORMANCE REQUIREMENTS FOR MLOPS: 1. Model benchmarking must work for deployment planning 2. Performance comparison guides model selection 3. Hardware analysis informs infrastructure decisions 4. Training metrics track system health šŸ’” MLOPS DEPENDENCY ON PERFORMANCE: MLOps uses performance data for: - Auto-scaling decisions - Resource allocation - SLA monitoring - Cost optimization - Infrastructure planning Without working performance tools, MLOps cannot make intelligent production decisions! """ class TestModule15MLOpsCore: """ šŸ†• NEW FUNCTIONALITY: Test Module 15 (MLOps) core implementation. šŸ’” What you're implementing: Production deployment, monitoring, and lifecycle management for ML systems. šŸŽÆ Goal: Enable reliable, scalable, and monitored ML systems in production. """ def test_model_monitoring_exists(self): """ āœ… TEST: Model monitoring - Track model performance in production šŸ“‹ WHAT YOU NEED TO IMPLEMENT: class ModelMonitor: def __init__(self, model, metrics=['accuracy', 'latency', 'throughput']): # Setup monitoring infrastructure def log_prediction(self, inputs, outputs, targets=None): # Track individual predictions def get_metrics(self): # Return current performance metrics 🚨 IF FAILS: Model monitoring doesn't exist or missing components """ try: from tinytorch.core.mlops import ModelMonitor from tinytorch.core.layers import Dense from tinytorch.core.tensor import Tensor # Test model monitoring setup model = Dense(50, 10) monitor = ModelMonitor(model, metrics=['accuracy', 'latency', 'drift']) # Should track the model assert hasattr(monitor, 'model'), \ "āŒ ModelMonitor missing 'model' attribute" assert monitor.model is model, \ "āŒ ModelMonitor not correctly tracking the model" # Should track metrics assert hasattr(monitor, 'metrics'), \ "āŒ ModelMonitor missing 'metrics' configuration" # Should have logging capability assert hasattr(monitor, 'log_prediction'), \ "āŒ ModelMonitor missing 'log_prediction' method" assert callable(monitor.log_prediction), \ "āŒ ModelMonitor.log_prediction should be callable" # Test prediction logging test_input = Tensor(np.random.randn(1, 50)) test_output = model(test_input) test_target = Tensor(np.random.randn(1, 10)) # Should be able to log predictions monitor.log_prediction(test_input, test_output, test_target) # Should provide metrics assert hasattr(monitor, 'get_metrics'), \ "āŒ ModelMonitor missing 'get_metrics' method" metrics = monitor.get_metrics() assert isinstance(metrics, dict), \ "āŒ ModelMonitor.get_metrics() should return dict" except ImportError as e: assert False, f""" āŒ MODEL MONITORING MISSING! šŸ” IMPORT ERROR: {str(e)} šŸ”§ HOW TO IMPLEMENT: 1. Create in modules/source/15_mlops/15_mlops_dev.py: import time import numpy as np from collections import defaultdict, deque from tinytorch.core.tensor import Tensor class ModelMonitor: '''Production model monitoring and alerting.''' def __init__(self, model, metrics=['accuracy', 'latency', 'drift']): self.model = model self.metrics = metrics self.prediction_log = deque(maxlen=10000) # Keep last 10k predictions self.metric_history = defaultdict(list) self.start_time = time.time() def log_prediction(self, inputs, outputs, targets=None, latency=None): '''Log a prediction for monitoring.''' timestamp = time.time() prediction_record = {{ 'timestamp': timestamp, 'input_shape': inputs.shape, 'output_shape': outputs.shape, 'latency': latency or 0.001, # Default latency }} if targets is not None: # Calculate accuracy (simplified) pred_classes = np.argmax(outputs.data, axis=-1) true_classes = np.argmax(targets.data, axis=-1) accuracy = np.mean(pred_classes == true_classes) prediction_record['accuracy'] = accuracy self.prediction_log.append(prediction_record) def get_metrics(self): '''Get current monitoring metrics.''' if not self.prediction_log: return {{'status': 'no_data'}} recent_predictions = list(self.prediction_log)[-100:] # Last 100 # Calculate metrics avg_latency = np.mean([p['latency'] for p in recent_predictions]) throughput = len(recent_predictions) / (time.time() - recent_predictions[0]['timestamp']) metrics = {{ 'avg_latency': avg_latency, 'throughput': throughput, 'prediction_count': len(self.prediction_log), 'uptime': time.time() - self.start_time }} # Add accuracy if available accuracies = [p.get('accuracy') for p in recent_predictions if 'accuracy' in p] if accuracies: metrics['accuracy'] = np.mean(accuracies) return metrics def check_drift(self): '''Check for model drift.''' # Simplified drift detection if len(self.prediction_log) < 100: return {{'drift_detected': False, 'reason': 'insufficient_data'}} recent = list(self.prediction_log)[-50:] older = list(self.prediction_log)[-100:-50] recent_acc = np.mean([p.get('accuracy', 0.5) for p in recent]) older_acc = np.mean([p.get('accuracy', 0.5) for p in older]) drift_threshold = 0.05 # 5% accuracy drop drift_detected = (older_acc - recent_acc) > drift_threshold return {{ 'drift_detected': drift_detected, 'accuracy_drop': older_acc - recent_acc, 'threshold': drift_threshold }} 2. Export the module: tito module complete 15_mlops šŸ“Š MONITORING CAPABILITIES: - Real-time performance tracking - Drift detection and alerting - Resource usage monitoring - Error rate tracking - Custom metric support """ except Exception as e: assert False, f""" āŒ MODEL MONITORING BROKEN! šŸ” ERROR: {str(e)} šŸ”§ MONITORING REQUIREMENTS: 1. Track model predictions and performance 2. Detect accuracy/performance drift 3. Monitor latency and throughput 4. Log prediction history 5. Provide actionable metrics 6. Support alerting and notifications šŸ’” PRODUCTION MONITORING: Model monitoring enables: - Early detection of model degradation - Automatic retraining triggers - Performance SLA tracking - A/B testing validation - Incident response and debugging 🚨 CRITICAL FOR PRODUCTION: Without monitoring, production ML systems are: - Unreliable (undetected failures) - Untrustworthy (silent degradation) - Unoptimizable (no performance data) - Unmaintainable (no operational visibility) """ def test_model_deployment_infrastructure(self): """ āœ… TEST: Model deployment - Deploy models to production environments šŸ“‹ DEPLOYMENT CAPABILITIES: - Model serving and inference endpoints - Load balancing and auto-scaling - Health checks and rollback - Version management and A/B testing šŸŽÆ Enable reliable model serving at scale """ try: from tinytorch.core.mlops import ModelServer, deploy_model from tinytorch.core.layers import Dense from tinytorch.core.tensor import Tensor # Test model deployment model = Dense(20, 5) # Test model server if 'ModelServer' in locals(): server = ModelServer(model, port=8080) # Should configure serving assert hasattr(server, 'model'), \ "āŒ ModelServer missing model configuration" assert hasattr(server, 'predict'), \ "āŒ ModelServer missing predict method" # Test prediction interface test_input = Tensor(np.random.randn(1, 20)) prediction = server.predict(test_input) assert prediction.shape == (1, 5), \ f"āŒ ModelServer prediction shape wrong. Expected (1, 5), got {prediction.shape}" # Test health check if hasattr(server, 'health_check'): health = server.health_check() assert isinstance(health, dict), \ "āŒ Health check should return dict" assert 'status' in health, \ "āŒ Health check missing status" # Test deployment function if 'deploy_model' in locals(): deployment = deploy_model(model, endpoint='/predict', replicas=2) assert hasattr(deployment, 'predict'), \ "āŒ Deployment missing predict interface" assert hasattr(deployment, 'scale'), \ "āŒ Deployment missing scaling capability" # Test scaling deployment.scale(replicas=4) assert deployment.replicas == 4, \ "āŒ Deployment scaling broken" except ImportError: assert False, f""" āŒ MODEL DEPLOYMENT INFRASTRUCTURE MISSING! šŸ”§ DEPLOYMENT IMPLEMENTATION: class ModelServer: '''Production model serving infrastructure.''' def __init__(self, model, port=8080, health_check_interval=30): self.model = model self.port = port self.health_check_interval = health_check_interval self.request_count = 0 self.error_count = 0 self.start_time = time.time() def predict(self, inputs): '''Serve model predictions.''' try: self.request_count += 1 return self.model(inputs) except Exception as e: self.error_count += 1 raise e def health_check(self): '''Check server health status.''' uptime = time.time() - self.start_time error_rate = self.error_count / max(self.request_count, 1) status = 'healthy' if error_rate < 0.05 else 'unhealthy' return {{ 'status': status, 'uptime': uptime, 'request_count': self.request_count, 'error_rate': error_rate, 'memory_usage': 'unknown' # Would implement actual monitoring }} def start(self): '''Start the model server.''' print(f"Starting model server on port {{self.port}}") # Would implement actual HTTP server def stop(self): '''Stop the model server.''' print("Stopping model server") def deploy_model(model, endpoint='/predict', replicas=1, auto_scale=True): '''Deploy model with production configuration.''' class Deployment: def __init__(self, model, endpoint, replicas): self.model = model self.endpoint = endpoint self.replicas = replicas self.servers = [] # Create server instances for i in range(replicas): server = ModelServer(model, port=8080+i) self.servers.append(server) def predict(self, inputs): # Load balance across servers server_idx = hash(str(inputs.data)) % len(self.servers) return self.servers[server_idx].predict(inputs) def scale(self, replicas): self.replicas = replicas # Would implement actual scaling logic def rollback(self, version): # Would implement model version rollback pass return Deployment(model, endpoint, replicas) šŸ’” DEPLOYMENT FEATURES: - High availability with load balancing - Auto-scaling based on traffic - Health monitoring and alerting - Blue-green deployments - Canary releases and A/B testing """ except Exception as e: assert False, f""" āŒ MODEL DEPLOYMENT INFRASTRUCTURE BROKEN! šŸ” ERROR: {str(e)} šŸ”§ DEPLOYMENT REQUIREMENTS: 1. Serve models via HTTP/gRPC endpoints 2. Handle concurrent requests efficiently 3. Provide health checks and monitoring 4. Support auto-scaling and load balancing 5. Enable blue-green and canary deployments 6. Track deployment metrics and logs 🌐 PRODUCTION SERVING: Model deployment enables: - Real-time inference APIs - Batch processing pipelines - Edge deployment for mobile/IoT - Multi-region serving for global apps - Cost-effective auto-scaling """ def test_ml_pipeline_orchestration(self): """ āœ… TEST: ML pipeline orchestration - Coordinate training, evaluation, deployment šŸ“‹ PIPELINE CAPABILITIES: - Training pipeline automation - Model evaluation and validation - Automated deployment triggers - Rollback and recovery šŸ’” Enable end-to-end ML automation """ try: from tinytorch.core.mlops import MLPipeline, PipelineStep from tinytorch.core.tensor import Tensor from tinytorch.core.layers import Dense from tinytorch.core.optimizers import SGD from tinytorch.core.training import Trainer # Test ML pipeline orchestration if 'MLPipeline' in locals(): pipeline = MLPipeline(name="production_model_pipeline") # Should support adding steps assert hasattr(pipeline, 'add_step'), \ "āŒ MLPipeline missing add_step method" # Create pipeline steps if 'PipelineStep' in locals(): # Training step train_step = PipelineStep( name="training", function=lambda: "training_complete", inputs=['data', 'model'], outputs=['trained_model'] ) # Evaluation step eval_step = PipelineStep( name="evaluation", function=lambda: {"accuracy": 0.95, "precision": 0.93}, inputs=['trained_model', 'test_data'], outputs=['metrics'] ) # Deployment step deploy_step = PipelineStep( name="deployment", function=lambda: "deployment_successful", inputs=['trained_model', 'metrics'], outputs=['deployment_url'] ) # Add steps to pipeline pipeline.add_step(train_step) pipeline.add_step(eval_step) pipeline.add_step(deploy_step) # Should be able to execute pipeline if hasattr(pipeline, 'execute'): results = pipeline.execute() assert isinstance(results, dict), \ "āŒ Pipeline execution should return results dict" # Test simpler pipeline coordination # Simulate ML pipeline steps pipeline_state = { 'model': Dense(10, 3), 'optimizer': None, 'trainer': None, 'metrics': {}, 'deployment': None } # Step 1: Setup training pipeline_state['optimizer'] = SGD(pipeline_state['model'].parameters(), lr=0.01) pipeline_state['trainer'] = Trainer(pipeline_state['model'], pipeline_state['optimizer']) # Step 2: Training simulation x = Tensor(np.random.randn(16, 10)) output = pipeline_state['model'](x) pipeline_state['metrics']['training_loss'] = 0.5 # Simulated loss # Step 3: Evaluation eval_x = Tensor(np.random.randn(8, 10)) eval_output = pipeline_state['model'](eval_x) pipeline_state['metrics']['accuracy'] = 0.85 # Simulated accuracy # Step 4: Deployment decision accuracy_threshold = 0.8 if pipeline_state['metrics']['accuracy'] > accuracy_threshold: pipeline_state['deployment'] = 'approved' else: pipeline_state['deployment'] = 'rejected' # Verify pipeline coordination assert pipeline_state['trainer'] is not None, \ "āŒ Pipeline training setup broken" assert 'accuracy' in pipeline_state['metrics'], \ "āŒ Pipeline evaluation broken" assert pipeline_state['deployment'] == 'approved', \ f"āŒ Pipeline deployment logic broken. Accuracy: {pipeline_state['metrics']['accuracy']}" except Exception as e: assert False, f""" āŒ ML PIPELINE ORCHESTRATION BROKEN! šŸ” ERROR: {str(e)} šŸ”§ PIPELINE ORCHESTRATION IMPLEMENTATION: class PipelineStep: '''Individual step in ML pipeline.''' def __init__(self, name, function, inputs=None, outputs=None): self.name = name self.function = function self.inputs = inputs or [] self.outputs = outputs or [] def execute(self, context): '''Execute step with given context.''' return self.function() class MLPipeline: '''Orchestrate complete ML workflows.''' def __init__(self, name): self.name = name self.steps = [] self.context = {{}} def add_step(self, step): '''Add step to pipeline.''' self.steps.append(step) def execute(self): '''Execute all pipeline steps in order.''' results = {{}} for step in self.steps: try: step_result = step.execute(self.context) results[step.name] = step_result self.context[step.name] = step_result except Exception as e: results[step.name] = f"ERROR: {{e}}" break # Stop on error return results def rollback(self, to_step): '''Rollback pipeline to specific step.''' # Would implement rollback logic pass šŸ’” PIPELINE BENEFITS: - Automated ML workflows - Reproducible model development - Consistent deployment processes - Error handling and recovery - Audit trails and governance """ class TestMLOpsIntegration: """ šŸ”— INTEGRATION TEST: MLOps + Complete TinyTorch system working together. šŸ’” Test that MLOps works with real ML workflows and production scenarios. šŸŽÆ Goal: Enable production-ready ML systems with monitoring and automation. """ def test_production_ml_workflow(self): """ āœ… TEST: Complete production ML workflow with monitoring and deployment šŸ“‹ PRODUCTION WORKFLOW: - Model training with monitoring - Performance benchmarking and validation - Automated deployment with health checks - Real-time monitoring and alerting šŸ’” End-to-end production ML system """ try: from tinytorch.core.tensor import Tensor from tinytorch.core.layers import Dense from tinytorch.core.optimizers import Adam from tinytorch.core.training import Trainer, MSELoss from tinytorch.core.data import Dataset, DataLoader from tinytorch.core.benchmarking import benchmark_model from tinytorch.core.mlops import ModelMonitor, ModelServer # Production ML workflow simulation # Step 1: Model Development model = Dense(50, 10) optimizer = Adam(model.parameters(), lr=0.001) loss_fn = MSELoss() trainer = Trainer(model, optimizer) # Step 2: Training Data class ProductionDataset(Dataset): def __init__(self): self.data = np.random.randn(200, 50) self.targets = np.random.randn(200, 10) def __len__(self): return 200 def __getitem__(self, idx): return Tensor(self.data[idx]), Tensor(self.targets[idx]) dataset = ProductionDataset() dataloader = DataLoader(dataset, batch_size=32) # Step 3: Training with Monitoring monitor = ModelMonitor(model, metrics=['loss', 'latency', 'throughput']) training_metrics = [] for epoch in range(3): # Simulate training epoch_losses = [] for batch_x, batch_y in dataloader: # Forward pass start_time = time.time() predictions = model(batch_x) inference_time = time.time() - start_time # Loss computation loss = loss_fn(predictions, batch_y) epoch_losses.append(loss.data if hasattr(loss, 'data') else float(loss)) # Log prediction for monitoring monitor.log_prediction(batch_x, predictions, batch_y, latency=inference_time) break # One batch per epoch for testing training_metrics.append(np.mean(epoch_losses)) # Step 4: Performance Benchmarking benchmark_results = benchmark_model(model, (32, 50)) # Step 5: Production Readiness Check monitor_metrics = monitor.get_metrics() production_ready = ( benchmark_results['latency'] < 0.1 and # < 100ms latency monitor_metrics.get('throughput', 0) > 100 and # > 100 samples/sec training_metrics[-1] < 1.0 # Reasonable loss ) # Step 6: Deployment (if ready) if production_ready: if 'ModelServer' in locals(): server = ModelServer(model, port=8080) # Test production serving test_input = Tensor(np.random.randn(1, 50)) production_prediction = server.predict(test_input) assert production_prediction.shape == (1, 10), \ f"āŒ Production serving broken. Expected (1, 10), got {production_prediction.shape}" # Health check health = server.health_check() assert health['status'] in ['healthy', 'unhealthy'], \ f"āŒ Health check broken. Got status: {health.get('status')}" # Verify complete workflow assert len(training_metrics) == 3, \ "āŒ Training workflow broken" assert 'latency' in benchmark_results, \ "āŒ Benchmarking integration broken" assert 'throughput' in monitor_metrics, \ "āŒ Monitoring integration broken" assert isinstance(production_ready, bool), \ "āŒ Production readiness check broken" except Exception as e: assert False, f""" āŒ PRODUCTION ML WORKFLOW BROKEN! šŸ” ERROR: {str(e)} šŸ”§ PRODUCTION WORKFLOW REQUIREMENTS: 1. āœ… Model training with monitoring 2. āœ… Performance benchmarking integration 3. āœ… Automated deployment decisions 4. āœ… Real-time serving with health checks 5. āœ… End-to-end workflow coordination šŸ’” PRODUCTION ML SYSTEM: Complete workflow should include: Training Phase: - Data validation and preprocessing - Model training with experiment tracking - Performance monitoring during training - Model validation and testing Deployment Phase: - Performance benchmarking - Production readiness validation - Automated deployment with rollback - Real-time monitoring and alerting Operations Phase: - Continuous monitoring - Drift detection and retraining - A/B testing and experimentation - Incident response and debugging šŸš€ PRODUCTION SUCCESS CRITERIA: - Latency < 100ms for real-time apps - Throughput > 1000 QPS for high-scale - Accuracy maintained > 95% SLA - 99.9% uptime with automatic recovery """ def test_continuous_integration_ml(self): """ āœ… TEST: Continuous Integration for ML (CI/ML) - Automated testing and validation šŸ“‹ CI/ML CAPABILITIES: - Automated model testing - Performance regression detection - Data validation and schema checking - Model quality gates šŸŽÆ Ensure model quality through automation """ try: from tinytorch.core.mlops import ModelValidator, DataValidator from tinytorch.core.tensor import Tensor from tinytorch.core.layers import Dense from tinytorch.core.benchmarking import benchmark_model # CI/ML workflow simulation # Step 1: Data Validation if 'DataValidator' in locals(): data_validator = DataValidator(schema={'features': 50, 'samples': 100}) # Test data test_data = np.random.randn(100, 50) validation_result = data_validator.validate(test_data) assert validation_result['valid'], \ f"āŒ Data validation failed: {validation_result.get('errors')}" # Step 2: Model Testing model = Dense(50, 10) if 'ModelValidator' in locals(): model_validator = ModelValidator() # Test model structure structure_valid = model_validator.validate_structure(model) assert structure_valid, "āŒ Model structure validation failed" # Test model functionality test_input = Tensor(np.random.randn(5, 50)) functionality_valid = model_validator.validate_functionality(model, test_input) assert functionality_valid, "āŒ Model functionality validation failed" # Step 3: Performance Regression Testing baseline_performance = {'latency': 0.01, 'accuracy': 0.90} current_performance = benchmark_model(model, (16, 50)) # Performance regression check latency_regression = current_performance['latency'] > baseline_performance['latency'] * 1.5 # accuracy_regression = current_performance.get('accuracy', 0.9) < baseline_performance['accuracy'] * 0.95 performance_check = { 'latency_regression': latency_regression, 'performance_acceptable': not latency_regression } # Step 4: Quality Gates quality_gates = { 'data_quality': True, # From data validation 'model_structure': True, # From model validation 'performance_acceptable': performance_check['performance_acceptable'], 'security_scan': True, # Would implement security validation } all_gates_passed = all(quality_gates.values()) # CI/ML Decision ci_ml_result = { 'quality_gates': quality_gates, 'deployment_approved': all_gates_passed, 'recommendations': [] } if not all_gates_passed: ci_ml_result['recommendations'].append("Fix failing quality gates before deployment") # Verify CI/ML workflow assert isinstance(quality_gates, dict), \ "āŒ Quality gates structure broken" assert 'deployment_approved' in ci_ml_result, \ "āŒ CI/ML decision logic broken" # Test manual validation workflow manual_checks = { 'model_loads': True, 'inference_works': True, 'output_shape_correct': True, 'no_errors': True } # Test model loading and inference try: test_input = Tensor(np.random.randn(3, 50)) output = model(test_input) manual_checks['model_loads'] = True manual_checks['inference_works'] = True manual_checks['output_shape_correct'] = (output.shape == (3, 10)) manual_checks['no_errors'] = True except Exception as e: manual_checks['model_loads'] = False manual_checks['inference_works'] = False manual_checks['no_errors'] = False assert all(manual_checks.values()), \ f"āŒ Manual validation checks failed: {manual_checks}" except Exception as e: assert False, f""" āŒ CONTINUOUS INTEGRATION ML BROKEN! šŸ” ERROR: {str(e)} šŸ”§ CI/ML IMPLEMENTATION: class DataValidator: '''Validate data quality and schema.''' def __init__(self, schema): self.schema = schema def validate(self, data): errors = [] # Check shape expected_shape = (self.schema['samples'], self.schema['features']) if data.shape != expected_shape: errors.append(f"Shape mismatch: expected {{expected_shape}}, got {{data.shape}}") # Check for NaN/inf if np.any(np.isnan(data)) or np.any(np.isinf(data)): errors.append("Data contains NaN or infinity values") return {{ 'valid': len(errors) == 0, 'errors': errors }} class ModelValidator: '''Validate model structure and functionality.''' def validate_structure(self, model): # Check if model is callable return callable(model) def validate_functionality(self, model, test_input): try: output = model(test_input) return output is not None except Exception: return False šŸ’” CI/ML QUALITY GATES: Data Quality: - Schema validation - Distribution checks - Anomaly detection - Data lineage tracking Model Quality: - Structure validation - Functionality testing - Performance benchmarking - Security scanning Deployment Gates: - All tests pass - Performance meets SLA - Security scan clean - Manual approval (if required) šŸ”’ PRODUCTION SAFETY: CI/ML prevents deploying broken models to production! """ def test_model_lifecycle_management(self): """ āœ… TEST: Model lifecycle management - Version control, rollback, A/B testing šŸ“‹ LIFECYCLE MANAGEMENT: - Model versioning and registry - Rollback and recovery capabilities - A/B testing and experimentation - Model retirement and cleanup šŸ’” Manage models throughout their production lifecycle """ try: from tinytorch.core.mlops import ModelRegistry, ABTestManager from tinytorch.core.layers import Dense from tinytorch.core.tensor import Tensor # Model lifecycle management # Step 1: Model Registry if 'ModelRegistry' in locals(): registry = ModelRegistry() # Register models model_v1 = Dense(50, 10) model_v2 = Dense(50, 10) # Improved version registry.register_model("production_classifier", model_v1, version="1.0") registry.register_model("production_classifier", model_v2, version="2.0") # Test model retrieval current_model = registry.get_model("production_classifier", version="2.0") assert current_model is model_v2, \ "āŒ Model registry retrieval broken" # Test rollback capability rollback_model = registry.get_model("production_classifier", version="1.0") assert rollback_model is model_v1, \ "āŒ Model registry rollback broken" # Step 2: A/B Testing if 'ABTestManager' in locals(): ab_manager = ABTestManager() # Setup A/B test model_a = Dense(50, 10) # Current model model_b = Dense(50, 10) # New model ab_manager.setup_test("classifier_experiment", model_a=model_a, model_b=model_b, traffic_split=0.5) # Test traffic routing test_input = Tensor(np.random.randn(1, 50)) for _ in range(10): assigned_model, prediction = ab_manager.predict("classifier_experiment", test_input) assert assigned_model in ['A', 'B'], \ f"āŒ A/B test assignment broken: {assigned_model}" assert prediction.shape == (1, 10), \ f"āŒ A/B test prediction broken: {prediction.shape}" # Test experiment results results = ab_manager.get_results("classifier_experiment") assert 'model_a_metrics' in results, \ "āŒ A/B test results missing model A metrics" assert 'model_b_metrics' in results, \ "āŒ A/B test results missing model B metrics" # Step 3: Manual lifecycle simulation lifecycle_state = { 'models': { 'v1.0': Dense(50, 10), 'v2.0': Dense(50, 10), 'v2.1': Dense(50, 10), }, 'current_version': 'v2.1', 'rollback_version': 'v2.0', 'experiments': {}, 'deployment_history': [] } # Simulate version management current_model = lifecycle_state['models'][lifecycle_state['current_version']] test_input = Tensor(np.random.randn(5, 50)) current_output = current_model(test_input) # Simulate rollback rollback_model = lifecycle_state['models'][lifecycle_state['rollback_version']] rollback_output = rollback_model(test_input) # Simulate A/B test model_a = lifecycle_state['models']['v2.0'] model_b = lifecycle_state['models']['v2.1'] # Compare models output_a = model_a(test_input) output_b = model_b(test_input) # Record experiment lifecycle_state['experiments']['v2.0_vs_v2.1'] = { 'model_a_performance': {'latency': 0.01, 'accuracy': 0.90}, 'model_b_performance': {'latency': 0.008, 'accuracy': 0.92}, 'winner': 'model_b' } # Verify lifecycle management assert current_output.shape == (5, 10), \ "āŒ Current model broken" assert rollback_output.shape == (5, 10), \ "āŒ Rollback model broken" assert output_a.shape == output_b.shape, \ "āŒ A/B test models incompatible" assert 'winner' in lifecycle_state['experiments']['v2.0_vs_v2.1'], \ "āŒ Experiment analysis broken" except Exception as e: assert False, f""" āŒ MODEL LIFECYCLE MANAGEMENT BROKEN! šŸ” ERROR: {str(e)} šŸ”§ LIFECYCLE MANAGEMENT IMPLEMENTATION: class ModelRegistry: '''Central registry for model versions.''' def __init__(self): self.models = {{}} # {{name: {{version: model}}}} def register_model(self, name, model, version, metadata=None): if name not in self.models: self.models[name] = {{}} self.models[name][version] = {{ 'model': model, 'metadata': metadata or {{}}, 'timestamp': time.time() }} def get_model(self, name, version=None): if name not in self.models: raise ValueError(f"Model {{name}} not found") if version is None: # Get latest version latest_version = max(self.models[name].keys()) return self.models[name][latest_version]['model'] if version not in self.models[name]: raise ValueError(f"Version {{version}} not found for {{name}}") return self.models[name][version]['model'] def list_versions(self, name): return list(self.models.get(name, {{}}).keys()) class ABTestManager: '''Manage A/B testing experiments.''' def __init__(self): self.experiments = {{}} def setup_test(self, experiment_name, model_a, model_b, traffic_split=0.5): self.experiments[experiment_name] = {{ 'model_a': model_a, 'model_b': model_b, 'traffic_split': traffic_split, 'results': {{'a': [], 'b': []}} }} def predict(self, experiment_name, inputs): experiment = self.experiments[experiment_name] # Simple traffic routing (hash-based) route_to_b = hash(str(inputs.data)) % 100 < experiment['traffic_split'] * 100 if route_to_b: prediction = experiment['model_b'](inputs) return 'B', prediction else: prediction = experiment['model_a'](inputs) return 'A', prediction def get_results(self, experiment_name): return {{ 'model_a_metrics': {{'requests': 100, 'avg_latency': 0.01}}, 'model_b_metrics': {{'requests': 100, 'avg_latency': 0.008}}, 'statistical_significance': True }} šŸ’” LIFECYCLE BENEFITS: - Zero-downtime deployments - Quick rollback on issues - Data-driven model selection - Compliance and audit trails - Risk mitigation through testing """ class TestModule15Completion: """ āœ… COMPLETION CHECK: Module 15 ready and TinyTorch production-ready. šŸŽÆ Final validation that MLOps works and TinyTorch is ready for real-world deployment. """ def test_production_ml_system_complete(self): """ āœ… FINAL TEST: Complete production ML system ready for real-world deployment šŸ“‹ PRODUCTION ML SYSTEM CHECKLIST: ā–” Model monitoring and alerting ā–” Deployment infrastructure and serving ā–” Pipeline orchestration and automation ā–” Continuous integration and validation ā–” Model lifecycle management ā–” Performance optimization ā–” Security and compliance ā–” Real-world production readiness šŸŽÆ SUCCESS = TinyTorch is production-ready! """ production_capabilities = { "Model monitoring": False, "Deployment infrastructure": False, "Pipeline orchestration": False, "Continuous integration": False, "Lifecycle management": False, "Performance optimization": False, "Security considerations": False, "Production readiness": False } try: # Test 1: Model monitoring from tinytorch.core.mlops import ModelMonitor from tinytorch.core.layers import Dense from tinytorch.core.tensor import Tensor model = Dense(20, 5) monitor = ModelMonitor(model) # Test monitoring functionality test_input = Tensor(np.random.randn(1, 20)) test_output = model(test_input) monitor.log_prediction(test_input, test_output) metrics = monitor.get_metrics() assert 'uptime' in metrics production_capabilities["Model monitoring"] = True # Test 2: Deployment infrastructure try: from tinytorch.core.mlops import ModelServer server = ModelServer(model) assert hasattr(server, 'predict') production_capabilities["Deployment infrastructure"] = True except ImportError: # Manual deployment test def serve_prediction(model, inputs): return model(inputs) served_output = serve_prediction(model, test_input) assert served_output.shape == test_output.shape production_capabilities["Deployment infrastructure"] = True # Test 3: Pipeline orchestration try: from tinytorch.core.mlops import MLPipeline pipeline = MLPipeline("test_pipeline") assert hasattr(pipeline, 'add_step') production_capabilities["Pipeline orchestration"] = True except ImportError: # Manual pipeline test pipeline_steps = ['data_prep', 'training', 'evaluation', 'deployment'] pipeline_status = {step: 'completed' for step in pipeline_steps} assert all(status == 'completed' for status in pipeline_status.values()) production_capabilities["Pipeline orchestration"] = True # Test 4: Continuous integration from tinytorch.core.benchmarking import benchmark_model # Performance validation benchmark_results = benchmark_model(model, (16, 20)) performance_ok = benchmark_results['latency'] < 1.0 # < 1 second # Quality validation test_batch = Tensor(np.random.randn(8, 20)) output_batch = model(test_batch) quality_ok = output_batch.shape == (8, 5) ci_validation = performance_ok and quality_ok assert ci_validation production_capabilities["Continuous integration"] = True # Test 5: Lifecycle management # Model versioning simulation model_versions = { 'v1.0': Dense(20, 5), 'v2.0': Dense(20, 5), 'v2.1': Dense(20, 5) } current_version = 'v2.1' current_model = model_versions[current_version] # Rollback capability rollback_version = 'v2.0' rollback_model = model_versions[rollback_version] # Test both models work current_pred = current_model(test_input) rollback_pred = rollback_model(test_input) assert current_pred.shape == rollback_pred.shape production_capabilities["Lifecycle management"] = True # Test 6: Performance optimization from tinytorch.core.compression import prune_weights # Model optimization original_model = Dense(100, 50) optimized_weights = prune_weights(original_model.weights, sparsity=0.3) # Performance comparison original_results = benchmark_model(original_model, (16, 100)) # Optimized model should maintain functionality optimized_model = Dense(100, 50) optimized_model.weights = optimized_weights optimized_input = Tensor(np.random.randn(4, 100)) optimized_output = optimized_model(optimized_input) assert optimized_output.shape == (4, 50) production_capabilities["Performance optimization"] = True # Test 7: Security considerations # Basic security validation security_checks = { 'input_validation': True, # Check input shapes/ranges 'output_sanitization': True, # Check output validity 'error_handling': True, # Graceful error handling 'resource_limits': True # Memory/compute limits } # Test input validation try: # Test with invalid input invalid_input = Tensor(np.random.randn(1, 999)) # Wrong shape _ = model(invalid_input) # May fail gracefully except: pass # Expected for wrong shape # Test output validation valid_output = model(test_input) output_valid = ( not np.any(np.isnan(valid_output.data)) and not np.any(np.isinf(valid_output.data)) ) security_validation = output_valid and all(security_checks.values()) assert security_validation production_capabilities["Security considerations"] = True # Test 8: Production readiness # Overall system validation production_checklist = { 'model_inference_works': True, 'monitoring_functional': True, 'deployment_ready': True, 'performance_acceptable': True, 'error_handling_robust': True } # Final production test try: # Simulate production load production_inputs = [ Tensor(np.random.randn(1, 20)), Tensor(np.random.randn(8, 20)), Tensor(np.random.randn(32, 20)) ] for prod_input in production_inputs: pred = model(prod_input) monitor.log_prediction(prod_input, pred) # Validate production prediction assert pred.shape[0] == prod_input.shape[0] assert pred.shape[1] == 5 assert not np.any(np.isnan(pred.data)) # Check monitoring works under load final_metrics = monitor.get_metrics() assert final_metrics['prediction_count'] > 0 production_readiness = all(production_checklist.values()) assert production_readiness except Exception as prod_error: assert False, f"Production simulation failed: {prod_error}" production_capabilities["Production readiness"] = True except Exception as e: # Show progress even if not complete completed_count = sum(production_capabilities.values()) total_count = len(production_capabilities) progress_report = "\nšŸ” PRODUCTION ML SYSTEM PROGRESS:\n" for capability, completed in production_capabilities.items(): status = "āœ…" if completed else "āŒ" progress_report += f" {status} {capability}\n" progress_report += f"\nšŸ“Š Progress: {completed_count}/{total_count} capabilities ready" assert False, f""" āŒ PRODUCTION ML SYSTEM NOT COMPLETE! šŸ” ERROR: {str(e)} {progress_report} šŸ”§ NEXT STEPS: 1. Fix the failing capability above 2. Re-run this test 3. When all āœ…, TinyTorch is production-ready! šŸ’” ALMOST THERE! You've completed {completed_count}/{total_count} production capabilities. Just fix the error above and you'll have a complete production ML system! """ # If we get here, everything passed! assert True, """ šŸŽ‰ PRODUCTION ML SYSTEM COMPLETE! šŸŽ‰ āœ… Model monitoring and alerting āœ… Deployment infrastructure and serving āœ… Pipeline orchestration and automation āœ… Continuous integration and validation āœ… Model lifecycle management āœ… Performance optimization āœ… Security considerations āœ… Production readiness validation šŸš€ TINYTORCH IS PRODUCTION-READY! šŸ’” What you can now deploy: - Real-time ML APIs with monitoring - Batch processing pipelines with automation - A/B testing and experimentation platforms - Auto-scaling ML services with health checks - Enterprise ML systems with governance šŸ† PRODUCTION ML ENGINEERING ACHIEVED: You've built a complete ML system that includes: - Research-grade model development - Production-grade deployment infrastructure - Enterprise-grade monitoring and governance - Industry-standard CI/CD for ML - Real-world operational capabilities šŸŽÆ READY FOR MODULE 16: CAPSTONE PROJECT! Build complete end-to-end ML systems: - TinyGPT transformer models - Computer vision applications - Multimodal AI systems - Production ML platforms 🌟 CONGRATULATIONS! You are now a complete ML Systems Engineer! """ # Note: No separate regression prevention - we test complete system stability above