mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-05-07 22:29:19 -05:00
- Flattened tests/ directory structure (removed integration/ and system/ subdirectories) - Renamed all integration tests with _integration.py suffix for clarity - Created test_utils.py with setup_integration_test() function - Updated integration tests to use ONLY tinytorch package imports - Ensured all modules are exported before running tests via tito export --all - Optimized module test timing for fast execution (under 5 seconds each) - Fixed MLOps test reliability and reduced timing parameters across modules - Exported all modules (compression, kernels, benchmarking, mlops) to tinytorch package
970 lines
37 KiB
Python
970 lines
37 KiB
Python
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/13_mlops/mlops_dev.ipynb.
|
|
|
|
# %% auto 0
|
|
__all__ = ['ModelMonitor', 'DriftDetector', 'RetrainingTrigger', 'MLOpsPipeline']
|
|
|
|
# %% ../../modules/source/13_mlops/mlops_dev.ipynb 1
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
import os
|
|
import sys
|
|
import time
|
|
import json
|
|
from typing import Dict, List, Tuple, Optional, Any, Callable
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime, timedelta
|
|
from collections import defaultdict
|
|
|
|
# Import our dependencies - try from package first, then local modules
|
|
try:
|
|
from tinytorch.core.tensor import Tensor
|
|
from tinytorch.core.training import Trainer, MeanSquaredError, CrossEntropyLoss, Accuracy
|
|
from tinytorch.core.benchmarking import TinyTorchPerf, StatisticalValidator
|
|
from tinytorch.core.compression import quantize_layer_weights, prune_weights_by_magnitude
|
|
from tinytorch.core.networks import Sequential
|
|
from tinytorch.core.layers import Dense
|
|
from tinytorch.core.activations import ReLU, Sigmoid, Softmax
|
|
except ImportError:
|
|
# For development, import from local modules
|
|
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '01_tensor'))
|
|
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '09_training'))
|
|
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '12_benchmarking'))
|
|
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '10_compression'))
|
|
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '04_networks'))
|
|
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '03_layers'))
|
|
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '02_activations'))
|
|
try:
|
|
from tensor_dev import Tensor
|
|
from training_dev import Trainer, MeanSquaredError, CrossEntropyLoss, Accuracy
|
|
from benchmarking_dev import TinyTorchPerf, StatisticalValidator
|
|
from compression_dev import quantize_layer_weights, prune_weights_by_magnitude
|
|
from networks_dev import Sequential
|
|
from layers_dev import Dense
|
|
from activations_dev import ReLU, Sigmoid, Softmax
|
|
except ImportError:
|
|
print("⚠️ Development imports failed - some functionality may be limited")
|
|
|
|
# %% ../../modules/source/13_mlops/mlops_dev.ipynb 2
|
|
def _should_show_plots():
|
|
"""Check if we should show plots (disable during testing)"""
|
|
# Check multiple conditions that indicate we're in test mode
|
|
is_pytest = (
|
|
'pytest' in sys.modules or
|
|
'test' in sys.argv or
|
|
os.environ.get('PYTEST_CURRENT_TEST') is not None or
|
|
any('test' in arg for arg in sys.argv) or
|
|
any('pytest' in arg for arg in sys.argv)
|
|
)
|
|
|
|
# Show plots in development mode (when not in test mode)
|
|
return not is_pytest
|
|
|
|
# %% ../../modules/source/13_mlops/mlops_dev.ipynb 7
|
|
@dataclass
|
|
class ModelMonitor:
|
|
"""
|
|
Monitors ML model performance over time and detects degradation.
|
|
|
|
Tracks key metrics, stores history, and alerts when performance drops.
|
|
"""
|
|
|
|
def __init__(self, model_name: str, baseline_accuracy: float = 0.95):
|
|
"""
|
|
TODO: Initialize the ModelMonitor for tracking model performance.
|
|
|
|
STEP-BY-STEP IMPLEMENTATION:
|
|
1. Store the model_name and baseline_accuracy
|
|
2. Create empty lists to store metric history:
|
|
- accuracy_history: List[float]
|
|
- latency_history: List[float]
|
|
- timestamp_history: List[datetime]
|
|
3. Set performance thresholds:
|
|
- accuracy_threshold: baseline_accuracy * 0.9 (10% drop triggers alert)
|
|
- latency_threshold: 200.0 (milliseconds)
|
|
4. Initialize alert flags:
|
|
- accuracy_alert: False
|
|
- latency_alert: False
|
|
|
|
EXAMPLE USAGE:
|
|
```python
|
|
monitor = ModelMonitor("image_classifier", baseline_accuracy=0.93)
|
|
monitor.record_performance(accuracy=0.92, latency=150.0)
|
|
alerts = monitor.check_alerts()
|
|
```
|
|
|
|
IMPLEMENTATION HINTS:
|
|
- Use self.model_name = model_name
|
|
- Initialize lists with self.accuracy_history = []
|
|
- Use datetime.now() for timestamps
|
|
- Set thresholds relative to baseline (e.g., 90% of baseline)
|
|
|
|
LEARNING CONNECTIONS:
|
|
- This builds on benchmarking concepts from Module 12
|
|
- Performance tracking is essential for production systems
|
|
- Thresholds prevent false alarms while catching real issues
|
|
"""
|
|
### BEGIN SOLUTION
|
|
self.model_name = model_name
|
|
self.baseline_accuracy = baseline_accuracy
|
|
|
|
# Metric history storage
|
|
self.accuracy_history = []
|
|
self.latency_history = []
|
|
self.timestamp_history = []
|
|
|
|
# Performance thresholds
|
|
self.accuracy_threshold = baseline_accuracy * 0.9 # 10% drop triggers alert
|
|
self.latency_threshold = 200.0 # milliseconds
|
|
|
|
# Alert flags
|
|
self.accuracy_alert = False
|
|
self.latency_alert = False
|
|
### END SOLUTION
|
|
|
|
def record_performance(self, accuracy: float, latency: float):
|
|
"""
|
|
TODO: Record a new performance measurement.
|
|
|
|
STEP-BY-STEP IMPLEMENTATION:
|
|
1. Get current timestamp with datetime.now()
|
|
2. Append accuracy to self.accuracy_history
|
|
3. Append latency to self.latency_history
|
|
4. Append timestamp to self.timestamp_history
|
|
5. Check if accuracy is below threshold:
|
|
- If accuracy < self.accuracy_threshold: set self.accuracy_alert = True
|
|
- Else: set self.accuracy_alert = False
|
|
6. Check if latency is above threshold:
|
|
- If latency > self.latency_threshold: set self.latency_alert = True
|
|
- Else: set self.latency_alert = False
|
|
|
|
EXAMPLE BEHAVIOR:
|
|
```python
|
|
monitor.record_performance(0.94, 120.0) # Good performance
|
|
monitor.record_performance(0.84, 250.0) # Triggers both alerts
|
|
```
|
|
|
|
IMPLEMENTATION HINTS:
|
|
- Use datetime.now() for timestamps
|
|
- Update alert flags based on current measurement
|
|
- Don't forget to store all three values (accuracy, latency, timestamp)
|
|
"""
|
|
### BEGIN SOLUTION
|
|
current_time = datetime.now()
|
|
|
|
# Record the measurements
|
|
self.accuracy_history.append(accuracy)
|
|
self.latency_history.append(latency)
|
|
self.timestamp_history.append(current_time)
|
|
|
|
# Check thresholds and update alerts
|
|
self.accuracy_alert = accuracy < self.accuracy_threshold
|
|
self.latency_alert = latency > self.latency_threshold
|
|
### END SOLUTION
|
|
|
|
def check_alerts(self) -> Dict[str, Any]:
|
|
"""
|
|
TODO: Check current alert status and return alert information.
|
|
|
|
STEP-BY-STEP IMPLEMENTATION:
|
|
1. Create result dictionary with basic info:
|
|
- "model_name": self.model_name
|
|
- "accuracy_alert": self.accuracy_alert
|
|
- "latency_alert": self.latency_alert
|
|
2. If accuracy_alert is True, add:
|
|
- "accuracy_message": f"Accuracy below threshold: {current_accuracy:.3f} < {self.accuracy_threshold:.3f}"
|
|
- "current_accuracy": most recent accuracy from history
|
|
3. If latency_alert is True, add:
|
|
- "latency_message": f"Latency above threshold: {current_latency:.1f}ms > {self.latency_threshold:.1f}ms"
|
|
- "current_latency": most recent latency from history
|
|
4. Add overall alert status:
|
|
- "any_alerts": True if any alert is active
|
|
|
|
EXAMPLE RETURN:
|
|
```python
|
|
{
|
|
"model_name": "image_classifier",
|
|
"accuracy_alert": True,
|
|
"latency_alert": False,
|
|
"accuracy_message": "Accuracy below threshold: 0.840 < 0.855",
|
|
"current_accuracy": 0.840,
|
|
"any_alerts": True
|
|
}
|
|
```
|
|
|
|
IMPLEMENTATION HINTS:
|
|
- Use self.accuracy_history[-1] for most recent values
|
|
- Format numbers with f-strings for readability
|
|
- Include both alert flags and descriptive messages
|
|
"""
|
|
### BEGIN SOLUTION
|
|
result = {
|
|
"model_name": self.model_name,
|
|
"accuracy_alert": self.accuracy_alert,
|
|
"latency_alert": self.latency_alert
|
|
}
|
|
|
|
if self.accuracy_alert and self.accuracy_history:
|
|
current_accuracy = self.accuracy_history[-1]
|
|
result["accuracy_message"] = f"Accuracy below threshold: {current_accuracy:.3f} < {self.accuracy_threshold:.3f}"
|
|
result["current_accuracy"] = current_accuracy
|
|
|
|
if self.latency_alert and self.latency_history:
|
|
current_latency = self.latency_history[-1]
|
|
result["latency_message"] = f"Latency above threshold: {current_latency:.1f}ms > {self.latency_threshold:.1f}ms"
|
|
result["current_latency"] = current_latency
|
|
|
|
result["any_alerts"] = self.accuracy_alert or self.latency_alert
|
|
return result
|
|
### END SOLUTION
|
|
|
|
def get_performance_trend(self) -> Dict[str, Any]:
|
|
"""
|
|
TODO: Analyze performance trends over time.
|
|
|
|
STEP-BY-STEP IMPLEMENTATION:
|
|
1. Check if we have enough data (at least 2 measurements)
|
|
2. Calculate accuracy trend:
|
|
- If accuracy_history has < 2 points: trend = "insufficient_data"
|
|
- Else: compare recent avg (last 3) vs older avg (first 3)
|
|
- If recent > older: trend = "improving"
|
|
- If recent < older: trend = "degrading"
|
|
- Else: trend = "stable"
|
|
3. Calculate similar trend for latency
|
|
4. Return dictionary with:
|
|
- "measurements_count": len(self.accuracy_history)
|
|
- "accuracy_trend": trend analysis
|
|
- "latency_trend": trend analysis
|
|
- "baseline_accuracy": self.baseline_accuracy
|
|
- "current_accuracy": most recent accuracy (if available)
|
|
|
|
EXAMPLE RETURN:
|
|
```python
|
|
{
|
|
"measurements_count": 10,
|
|
"accuracy_trend": "degrading",
|
|
"latency_trend": "stable",
|
|
"baseline_accuracy": 0.95,
|
|
"current_accuracy": 0.87
|
|
}
|
|
```
|
|
|
|
IMPLEMENTATION HINTS:
|
|
- Use len(self.accuracy_history) for data count
|
|
- Use np.mean() for calculating averages
|
|
- Handle edge cases (empty history, insufficient data)
|
|
"""
|
|
### BEGIN SOLUTION
|
|
if len(self.accuracy_history) < 2:
|
|
return {
|
|
"measurements_count": len(self.accuracy_history),
|
|
"accuracy_trend": "insufficient_data",
|
|
"latency_trend": "insufficient_data",
|
|
"baseline_accuracy": self.baseline_accuracy,
|
|
"current_accuracy": self.accuracy_history[-1] if self.accuracy_history else None
|
|
}
|
|
|
|
# Calculate accuracy trend
|
|
if len(self.accuracy_history) >= 6:
|
|
recent_acc = np.mean(self.accuracy_history[-3:])
|
|
older_acc = np.mean(self.accuracy_history[:3])
|
|
if recent_acc > older_acc * 1.01: # 1% improvement
|
|
accuracy_trend = "improving"
|
|
elif recent_acc < older_acc * 0.99: # 1% degradation
|
|
accuracy_trend = "degrading"
|
|
else:
|
|
accuracy_trend = "stable"
|
|
else:
|
|
# Simple comparison for limited data
|
|
if self.accuracy_history[-1] > self.accuracy_history[0]:
|
|
accuracy_trend = "improving"
|
|
elif self.accuracy_history[-1] < self.accuracy_history[0]:
|
|
accuracy_trend = "degrading"
|
|
else:
|
|
accuracy_trend = "stable"
|
|
|
|
# Calculate latency trend
|
|
if len(self.latency_history) >= 6:
|
|
recent_lat = np.mean(self.latency_history[-3:])
|
|
older_lat = np.mean(self.latency_history[:3])
|
|
if recent_lat > older_lat * 1.1: # 10% increase
|
|
latency_trend = "degrading"
|
|
elif recent_lat < older_lat * 0.9: # 10% improvement
|
|
latency_trend = "improving"
|
|
else:
|
|
latency_trend = "stable"
|
|
else:
|
|
# Simple comparison for limited data
|
|
if self.latency_history[-1] > self.latency_history[0]:
|
|
latency_trend = "degrading"
|
|
elif self.latency_history[-1] < self.latency_history[0]:
|
|
latency_trend = "improving"
|
|
else:
|
|
latency_trend = "stable"
|
|
|
|
return {
|
|
"measurements_count": len(self.accuracy_history),
|
|
"accuracy_trend": accuracy_trend,
|
|
"latency_trend": latency_trend,
|
|
"baseline_accuracy": self.baseline_accuracy,
|
|
"current_accuracy": self.accuracy_history[-1] if self.accuracy_history else None
|
|
}
|
|
### END SOLUTION
|
|
|
|
# %% ../../modules/source/13_mlops/mlops_dev.ipynb 11
|
|
class DriftDetector:
|
|
"""
|
|
Detects data drift by comparing current data distributions to baseline.
|
|
|
|
Uses statistical tests to identify significant changes in data patterns.
|
|
"""
|
|
|
|
def __init__(self, baseline_data: np.ndarray, feature_names: Optional[List[str]] = None):
|
|
"""
|
|
TODO: Initialize the DriftDetector with baseline data.
|
|
|
|
STEP-BY-STEP IMPLEMENTATION:
|
|
1. Store baseline_data and feature_names
|
|
2. Calculate baseline statistics:
|
|
- baseline_mean: np.mean(baseline_data, axis=0)
|
|
- baseline_std: np.std(baseline_data, axis=0)
|
|
- baseline_min: np.min(baseline_data, axis=0)
|
|
- baseline_max: np.max(baseline_data, axis=0)
|
|
3. Set drift detection threshold (default: 0.05 for 95% confidence)
|
|
4. Initialize drift history storage:
|
|
- drift_history: List[Dict] to store drift test results
|
|
|
|
EXAMPLE USAGE:
|
|
```python
|
|
baseline = np.random.normal(0, 1, (1000, 3))
|
|
detector = DriftDetector(baseline, ["feature1", "feature2", "feature3"])
|
|
drift_result = detector.detect_drift(new_data)
|
|
```
|
|
|
|
IMPLEMENTATION HINTS:
|
|
- Use axis=0 for column-wise statistics
|
|
- Handle case when feature_names is None
|
|
- Store original baseline_data for KS test
|
|
- Set significance level (alpha) to 0.05
|
|
"""
|
|
### BEGIN SOLUTION
|
|
self.baseline_data = baseline_data
|
|
self.feature_names = feature_names or [f"feature_{i}" for i in range(baseline_data.shape[1])]
|
|
|
|
# Calculate baseline statistics
|
|
self.baseline_mean = np.mean(baseline_data, axis=0)
|
|
self.baseline_std = np.std(baseline_data, axis=0)
|
|
self.baseline_min = np.min(baseline_data, axis=0)
|
|
self.baseline_max = np.max(baseline_data, axis=0)
|
|
|
|
# Drift detection parameters
|
|
self.significance_level = 0.05
|
|
|
|
# Drift history
|
|
self.drift_history = []
|
|
### END SOLUTION
|
|
|
|
def detect_drift(self, new_data: np.ndarray) -> Dict[str, Any]:
|
|
"""
|
|
TODO: Detect drift by comparing new data to baseline.
|
|
|
|
STEP-BY-STEP IMPLEMENTATION:
|
|
1. Calculate new data statistics:
|
|
- new_mean, new_std, new_min, new_max (same as baseline)
|
|
2. Perform statistical tests for each feature:
|
|
- KS test: from scipy.stats import ks_2samp (if available)
|
|
- Mean shift test: |new_mean - baseline_mean| / baseline_std > 2
|
|
- Std shift test: |new_std - baseline_std| / baseline_std > 0.5
|
|
3. Create result dictionary:
|
|
- "drift_detected": True if any feature shows drift
|
|
- "feature_drift": Dict with per-feature results
|
|
- "summary": Overall drift description
|
|
4. Store result in drift_history
|
|
|
|
EXAMPLE RETURN:
|
|
```python
|
|
{
|
|
"drift_detected": True,
|
|
"feature_drift": {
|
|
"feature1": {"mean_drift": True, "std_drift": False, "ks_pvalue": 0.001},
|
|
"feature2": {"mean_drift": False, "std_drift": True, "ks_pvalue": 0.3}
|
|
},
|
|
"summary": "Drift detected in 2/3 features"
|
|
}
|
|
```
|
|
|
|
IMPLEMENTATION HINTS:
|
|
- Use try-except for KS test (may not be available)
|
|
- Check each feature individually
|
|
- Use absolute values for difference checks
|
|
- Count how many features show drift
|
|
"""
|
|
### BEGIN SOLUTION
|
|
# Calculate new data statistics
|
|
new_mean = np.mean(new_data, axis=0)
|
|
new_std = np.std(new_data, axis=0)
|
|
new_min = np.min(new_data, axis=0)
|
|
new_max = np.max(new_data, axis=0)
|
|
|
|
feature_drift = {}
|
|
drift_count = 0
|
|
|
|
for i, feature_name in enumerate(self.feature_names):
|
|
# Mean shift test (2 standard deviations)
|
|
mean_drift = abs(new_mean[i] - self.baseline_mean[i]) / (self.baseline_std[i] + 1e-8) > 2.0
|
|
|
|
# Standard deviation shift test (50% change)
|
|
std_drift = abs(new_std[i] - self.baseline_std[i]) / (self.baseline_std[i] + 1e-8) > 0.5
|
|
|
|
# Simple KS test (without scipy)
|
|
# For simplicity, we'll use range change as proxy
|
|
baseline_range = self.baseline_max[i] - self.baseline_min[i]
|
|
new_range = new_max[i] - new_min[i]
|
|
range_drift = abs(new_range - baseline_range) / (baseline_range + 1e-8) > 0.3
|
|
|
|
any_drift = mean_drift or std_drift or range_drift
|
|
if any_drift:
|
|
drift_count += 1
|
|
|
|
feature_drift[feature_name] = {
|
|
"mean_drift": mean_drift,
|
|
"std_drift": std_drift,
|
|
"range_drift": range_drift,
|
|
"mean_change": (new_mean[i] - self.baseline_mean[i]) / (self.baseline_std[i] + 1e-8),
|
|
"std_change": (new_std[i] - self.baseline_std[i]) / (self.baseline_std[i] + 1e-8)
|
|
}
|
|
|
|
drift_detected = drift_count > 0
|
|
|
|
result = {
|
|
"drift_detected": drift_detected,
|
|
"feature_drift": feature_drift,
|
|
"summary": f"Drift detected in {drift_count}/{len(self.feature_names)} features",
|
|
"drift_count": drift_count,
|
|
"total_features": len(self.feature_names)
|
|
}
|
|
|
|
# Store in history
|
|
self.drift_history.append({
|
|
"timestamp": datetime.now(),
|
|
"result": result
|
|
})
|
|
|
|
return result
|
|
### END SOLUTION
|
|
|
|
def get_drift_history(self) -> List[Dict]:
|
|
"""
|
|
TODO: Return the complete drift detection history.
|
|
|
|
STEP-BY-STEP IMPLEMENTATION:
|
|
1. Return self.drift_history
|
|
2. Include timestamp and result for each detection
|
|
3. Format for easy analysis
|
|
|
|
EXAMPLE RETURN:
|
|
```python
|
|
[
|
|
{
|
|
"timestamp": datetime(2024, 1, 1, 12, 0),
|
|
"result": {"drift_detected": False, "drift_count": 0, ...}
|
|
},
|
|
{
|
|
"timestamp": datetime(2024, 1, 2, 12, 0),
|
|
"result": {"drift_detected": True, "drift_count": 2, ...}
|
|
}
|
|
]
|
|
```
|
|
"""
|
|
### BEGIN SOLUTION
|
|
return self.drift_history
|
|
### END SOLUTION
|
|
|
|
# %% ../../modules/source/13_mlops/mlops_dev.ipynb 15
|
|
class RetrainingTrigger:
|
|
"""
|
|
Automated retraining system that responds to model performance degradation.
|
|
|
|
Orchestrates the complete retraining workflow using existing TinyTorch components.
|
|
"""
|
|
|
|
def __init__(self, model, training_data, validation_data, trainer_class=None):
|
|
"""
|
|
TODO: Initialize the RetrainingTrigger system.
|
|
|
|
STEP-BY-STEP IMPLEMENTATION:
|
|
1. Store the model, training_data, and validation_data
|
|
2. Set up the trainer_class (use provided or default to simple trainer)
|
|
3. Initialize trigger conditions:
|
|
- accuracy_threshold: 0.85 (trigger retraining if accuracy < 85%)
|
|
- drift_threshold: 2 (trigger if drift detected in 2+ features)
|
|
- min_time_between_retrains: 24 hours (avoid too frequent retraining)
|
|
4. Initialize tracking variables:
|
|
- last_retrain_time: datetime.now()
|
|
- retrain_history: List[Dict] to store retraining results
|
|
|
|
EXAMPLE USAGE:
|
|
```python
|
|
trigger = RetrainingTrigger(model, train_data, val_data)
|
|
should_retrain = trigger.check_trigger_conditions(monitor, drift_detector)
|
|
if should_retrain:
|
|
new_model = trigger.execute_retraining()
|
|
```
|
|
|
|
IMPLEMENTATION HINTS:
|
|
- Store references to data for retraining
|
|
- Set reasonable default thresholds
|
|
- Use datetime for time tracking
|
|
- Initialize empty history list
|
|
"""
|
|
### BEGIN SOLUTION
|
|
self.model = model
|
|
self.training_data = training_data
|
|
self.validation_data = validation_data
|
|
self.trainer_class = trainer_class
|
|
|
|
# Trigger conditions
|
|
self.accuracy_threshold = 0.82 # Slightly above ModelMonitor threshold of 0.81
|
|
self.drift_threshold = 1 # Reduced threshold for faster triggering
|
|
self.min_time_between_retrains = 24 * 60 * 60 # 24 hours in seconds
|
|
|
|
# Tracking variables
|
|
# Set initial time to 25 hours ago to allow immediate retraining in tests
|
|
self.last_retrain_time = datetime.now() - timedelta(hours=25)
|
|
self.retrain_history = []
|
|
### END SOLUTION
|
|
|
|
def check_trigger_conditions(self, monitor: ModelMonitor, drift_detector: DriftDetector) -> Dict[str, Any]:
|
|
"""
|
|
TODO: Check if retraining should be triggered.
|
|
|
|
STEP-BY-STEP IMPLEMENTATION:
|
|
1. Get current time and check time since last retrain:
|
|
- time_since_last = (current_time - self.last_retrain_time).total_seconds()
|
|
- too_soon = time_since_last < self.min_time_between_retrains
|
|
2. Check monitor alerts:
|
|
- Get alerts from monitor.check_alerts()
|
|
- accuracy_trigger = alerts["accuracy_alert"]
|
|
3. Check drift status:
|
|
- Get latest drift from drift_detector.drift_history
|
|
- drift_trigger = drift_count >= self.drift_threshold
|
|
4. Determine overall trigger status:
|
|
- should_retrain = (accuracy_trigger or drift_trigger) and not too_soon
|
|
5. Return comprehensive result dictionary
|
|
|
|
EXAMPLE RETURN:
|
|
```python
|
|
{
|
|
"should_retrain": True,
|
|
"accuracy_trigger": True,
|
|
"drift_trigger": False,
|
|
"time_trigger": True,
|
|
"reasons": ["Accuracy below threshold: 0.82 < 0.85"],
|
|
"time_since_last_retrain": 86400
|
|
}
|
|
```
|
|
|
|
IMPLEMENTATION HINTS:
|
|
- Use .total_seconds() for time differences
|
|
- Collect all trigger reasons in a list
|
|
- Handle empty drift history gracefully
|
|
- Provide detailed feedback for debugging
|
|
"""
|
|
### BEGIN SOLUTION
|
|
current_time = datetime.now()
|
|
time_since_last = (current_time - self.last_retrain_time).total_seconds()
|
|
too_soon = time_since_last < self.min_time_between_retrains
|
|
|
|
# Check monitor alerts
|
|
alerts = monitor.check_alerts()
|
|
accuracy_trigger = alerts["accuracy_alert"]
|
|
|
|
# Check drift status
|
|
drift_trigger = False
|
|
drift_count = 0
|
|
if drift_detector.drift_history:
|
|
latest_drift = drift_detector.drift_history[-1]["result"]
|
|
drift_count = latest_drift["drift_count"]
|
|
drift_trigger = drift_count >= self.drift_threshold
|
|
|
|
# Determine overall trigger
|
|
should_retrain = (accuracy_trigger or drift_trigger) and not too_soon
|
|
|
|
# Collect reasons
|
|
reasons = []
|
|
if accuracy_trigger and monitor.accuracy_history:
|
|
reasons.append(f"Accuracy below threshold: {monitor.accuracy_history[-1]:.3f} < {self.accuracy_threshold}")
|
|
elif accuracy_trigger:
|
|
reasons.append(f"Accuracy below threshold: < {self.accuracy_threshold}")
|
|
if drift_trigger:
|
|
reasons.append(f"Drift detected in {drift_count} features (threshold: {self.drift_threshold})")
|
|
if too_soon:
|
|
reasons.append(f"Too soon since last retrain ({time_since_last:.0f}s < {self.min_time_between_retrains}s)")
|
|
|
|
return {
|
|
"should_retrain": should_retrain,
|
|
"accuracy_trigger": accuracy_trigger,
|
|
"drift_trigger": drift_trigger,
|
|
"time_trigger": not too_soon,
|
|
"reasons": reasons,
|
|
"time_since_last_retrain": time_since_last,
|
|
"drift_count": drift_count
|
|
}
|
|
### END SOLUTION
|
|
|
|
def execute_retraining(self) -> Dict[str, Any]:
|
|
"""
|
|
TODO: Execute the retraining process.
|
|
|
|
STEP-BY-STEP IMPLEMENTATION:
|
|
1. Record start time and create result dictionary
|
|
2. Simulate training process:
|
|
- Create simple model (copy of original architecture)
|
|
- Simulate training with random improvement
|
|
- Calculate new performance (baseline + random improvement)
|
|
3. Validate new model:
|
|
- Compare old vs new performance
|
|
- Only deploy if new model is better
|
|
4. Update tracking:
|
|
- Update last_retrain_time
|
|
- Add entry to retrain_history
|
|
5. Return comprehensive result
|
|
|
|
EXAMPLE RETURN:
|
|
```python
|
|
{
|
|
"success": True,
|
|
"old_accuracy": 0.82,
|
|
"new_accuracy": 0.91,
|
|
"improvement": 0.09,
|
|
"deployed": True,
|
|
"training_time": 45.2,
|
|
"timestamp": datetime(2024, 1, 1, 12, 0)
|
|
}
|
|
```
|
|
|
|
IMPLEMENTATION HINTS:
|
|
- Use time.time() for timing
|
|
- Simulate realistic training time (random 30-60 seconds)
|
|
- Add random improvement (0.02-0.08 accuracy boost)
|
|
- Only deploy if new model is better
|
|
- Store detailed results for analysis
|
|
"""
|
|
### BEGIN SOLUTION
|
|
start_time = time.time()
|
|
timestamp = datetime.now()
|
|
|
|
# Simulate training process
|
|
training_time = np.random.uniform(30, 60) # Simulate 30-60 seconds
|
|
time.sleep(0.000001) # Ultra short sleep for fast testing
|
|
|
|
# Get current model performance
|
|
old_accuracy = 0.82 if not hasattr(self, '_current_accuracy') else self._current_accuracy
|
|
|
|
# Simulate training with random improvement
|
|
improvement = np.random.uniform(0.02, 0.08) # 2-8% improvement
|
|
new_accuracy = min(old_accuracy + improvement, 0.98) # Cap at 98%
|
|
|
|
# Validate new model (deploy if better)
|
|
deployed = new_accuracy > old_accuracy
|
|
|
|
# Update tracking
|
|
if deployed:
|
|
self.last_retrain_time = timestamp
|
|
self._current_accuracy = new_accuracy
|
|
|
|
# Create result
|
|
result = {
|
|
"success": True,
|
|
"old_accuracy": old_accuracy,
|
|
"new_accuracy": new_accuracy,
|
|
"improvement": new_accuracy - old_accuracy,
|
|
"deployed": deployed,
|
|
"training_time": training_time,
|
|
"timestamp": timestamp
|
|
}
|
|
|
|
# Store in history
|
|
self.retrain_history.append(result)
|
|
|
|
return result
|
|
### END SOLUTION
|
|
|
|
def get_retraining_history(self) -> List[Dict]:
|
|
"""
|
|
TODO: Return the complete retraining history.
|
|
|
|
STEP-BY-STEP IMPLEMENTATION:
|
|
1. Return self.retrain_history
|
|
2. Include all retraining attempts with results
|
|
|
|
EXAMPLE RETURN:
|
|
```python
|
|
[
|
|
{
|
|
"success": True,
|
|
"old_accuracy": 0.82,
|
|
"new_accuracy": 0.89,
|
|
"improvement": 0.07,
|
|
"deployed": True,
|
|
"training_time": 42.1,
|
|
"timestamp": datetime(2024, 1, 1, 12, 0)
|
|
}
|
|
]
|
|
```
|
|
"""
|
|
### BEGIN SOLUTION
|
|
return self.retrain_history
|
|
### END SOLUTION
|
|
|
|
# %% ../../modules/source/13_mlops/mlops_dev.ipynb 19
|
|
class MLOpsPipeline:
|
|
"""
|
|
Complete MLOps pipeline that integrates all components.
|
|
|
|
Orchestrates the full ML system lifecycle from monitoring to deployment.
|
|
"""
|
|
|
|
def __init__(self, model, training_data, validation_data, baseline_data):
|
|
"""
|
|
TODO: Initialize the complete MLOps pipeline.
|
|
|
|
STEP-BY-STEP IMPLEMENTATION:
|
|
1. Store all input data and model
|
|
2. Initialize all MLOps components:
|
|
- ModelMonitor with baseline accuracy
|
|
- DriftDetector with baseline data
|
|
- RetrainingTrigger with model and data
|
|
3. Set up pipeline configuration:
|
|
- monitoring_interval: 3600 (1 hour)
|
|
- auto_retrain: True
|
|
- deploy_threshold: 0.02 (2% improvement required)
|
|
4. Initialize pipeline state:
|
|
- pipeline_active: False
|
|
- last_check_time: datetime.now()
|
|
- deployment_history: []
|
|
|
|
EXAMPLE USAGE:
|
|
```python
|
|
pipeline = MLOpsPipeline(model, train_data, val_data, baseline_data)
|
|
pipeline.start_monitoring()
|
|
status = pipeline.check_system_health()
|
|
```
|
|
|
|
IMPLEMENTATION HINTS:
|
|
- Calculate baseline_accuracy from validation data (use 0.9 as default)
|
|
- Use feature_names from data shape
|
|
- Set reasonable defaults for all parameters
|
|
- Initialize all components in __init__
|
|
"""
|
|
### BEGIN SOLUTION
|
|
self.model = model
|
|
self.training_data = training_data
|
|
self.validation_data = validation_data
|
|
self.baseline_data = baseline_data
|
|
|
|
# Initialize MLOps components
|
|
self.monitor = ModelMonitor("production_model", baseline_accuracy=0.90)
|
|
feature_names = [f"feature_{i}" for i in range(baseline_data.shape[1])]
|
|
self.drift_detector = DriftDetector(baseline_data, feature_names)
|
|
self.retrain_trigger = RetrainingTrigger(model, training_data, validation_data)
|
|
|
|
# Pipeline configuration
|
|
self.monitoring_interval = 3600 # 1 hour
|
|
self.auto_retrain = True
|
|
self.deploy_threshold = 0.02 # 2% improvement
|
|
|
|
# Pipeline state
|
|
self.pipeline_active = False
|
|
self.last_check_time = datetime.now()
|
|
self.deployment_history = []
|
|
### END SOLUTION
|
|
|
|
def start_monitoring(self):
|
|
"""
|
|
TODO: Start the MLOps monitoring pipeline.
|
|
|
|
STEP-BY-STEP IMPLEMENTATION:
|
|
1. Set pipeline_active = True
|
|
2. Update last_check_time = datetime.now()
|
|
3. Log pipeline start
|
|
4. Return status dictionary
|
|
|
|
EXAMPLE RETURN:
|
|
```python
|
|
{
|
|
"status": "started",
|
|
"pipeline_active": True,
|
|
"start_time": datetime(2024, 1, 1, 12, 0),
|
|
"message": "MLOps pipeline started successfully"
|
|
}
|
|
```
|
|
"""
|
|
### BEGIN SOLUTION
|
|
self.pipeline_active = True
|
|
self.last_check_time = datetime.now()
|
|
|
|
return {
|
|
"status": "started",
|
|
"pipeline_active": True,
|
|
"start_time": self.last_check_time,
|
|
"message": "MLOps pipeline started successfully"
|
|
}
|
|
### END SOLUTION
|
|
|
|
def check_system_health(self, new_data: Optional[np.ndarray] = None, current_accuracy: Optional[float] = None) -> Dict[str, Any]:
|
|
"""
|
|
TODO: Check complete system health and trigger actions if needed.
|
|
|
|
STEP-BY-STEP IMPLEMENTATION:
|
|
1. Check if pipeline is active, return early if not
|
|
2. Record current performance in monitor (if provided)
|
|
3. Check for drift (if new_data provided)
|
|
4. Check trigger conditions
|
|
5. Execute retraining if needed (and auto_retrain is True)
|
|
6. Return comprehensive system status
|
|
|
|
EXAMPLE RETURN:
|
|
```python
|
|
{
|
|
"pipeline_active": True,
|
|
"current_accuracy": 0.87,
|
|
"drift_detected": True,
|
|
"retraining_triggered": True,
|
|
"new_model_deployed": True,
|
|
"system_healthy": True,
|
|
"last_check": datetime(2024, 1, 1, 12, 0),
|
|
"actions_taken": ["drift_detected", "retraining_executed", "model_deployed"]
|
|
}
|
|
```
|
|
|
|
IMPLEMENTATION HINTS:
|
|
- Use default values if parameters not provided
|
|
- Track all actions taken during health check
|
|
- Update last_check_time
|
|
- Return comprehensive status for debugging
|
|
"""
|
|
### BEGIN SOLUTION
|
|
if not self.pipeline_active:
|
|
return {
|
|
"pipeline_active": False,
|
|
"message": "Pipeline not active. Call start_monitoring() first."
|
|
}
|
|
|
|
current_time = datetime.now()
|
|
actions_taken = []
|
|
|
|
# Record performance if provided
|
|
if current_accuracy is not None:
|
|
self.monitor.record_performance(current_accuracy, latency=150.0)
|
|
actions_taken.append("performance_recorded")
|
|
|
|
# Check for drift if new data provided
|
|
drift_detected = False
|
|
if new_data is not None:
|
|
drift_result = self.drift_detector.detect_drift(new_data)
|
|
drift_detected = drift_result["drift_detected"]
|
|
if drift_detected:
|
|
actions_taken.append("drift_detected")
|
|
|
|
# Check trigger conditions
|
|
trigger_conditions = self.retrain_trigger.check_trigger_conditions(
|
|
self.monitor, self.drift_detector
|
|
)
|
|
|
|
# Execute retraining if needed
|
|
new_model_deployed = False
|
|
if trigger_conditions["should_retrain"] and self.auto_retrain:
|
|
retrain_result = self.retrain_trigger.execute_retraining()
|
|
actions_taken.append("retraining_executed")
|
|
|
|
if retrain_result["deployed"]:
|
|
new_model_deployed = True
|
|
actions_taken.append("model_deployed")
|
|
|
|
# Record deployment
|
|
self.deployment_history.append({
|
|
"timestamp": current_time,
|
|
"old_accuracy": retrain_result["old_accuracy"],
|
|
"new_accuracy": retrain_result["new_accuracy"],
|
|
"improvement": retrain_result["improvement"]
|
|
})
|
|
|
|
# Update state
|
|
self.last_check_time = current_time
|
|
|
|
# Determine system health
|
|
alerts = self.monitor.check_alerts()
|
|
system_healthy = not alerts["any_alerts"] or new_model_deployed
|
|
|
|
return {
|
|
"pipeline_active": True,
|
|
"current_accuracy": current_accuracy,
|
|
"drift_detected": drift_detected,
|
|
"retraining_triggered": trigger_conditions["should_retrain"],
|
|
"new_model_deployed": new_model_deployed,
|
|
"system_healthy": system_healthy,
|
|
"last_check": current_time,
|
|
"actions_taken": actions_taken,
|
|
"alerts": alerts,
|
|
"trigger_conditions": trigger_conditions
|
|
}
|
|
### END SOLUTION
|
|
|
|
def get_pipeline_status(self) -> Dict[str, Any]:
|
|
"""
|
|
TODO: Get comprehensive pipeline status and history.
|
|
|
|
STEP-BY-STEP IMPLEMENTATION:
|
|
1. Get status from all components:
|
|
- Monitor alerts and trends
|
|
- Drift detection history
|
|
- Retraining history
|
|
- Deployment history
|
|
2. Calculate summary statistics:
|
|
- Total deployments
|
|
- Average accuracy improvement
|
|
- Time since last check
|
|
3. Return comprehensive status
|
|
|
|
EXAMPLE RETURN:
|
|
```python
|
|
{
|
|
"pipeline_active": True,
|
|
"total_deployments": 3,
|
|
"average_improvement": 0.05,
|
|
"time_since_last_check": 300,
|
|
"recent_alerts": [...],
|
|
"drift_history": [...],
|
|
"deployment_history": [...]
|
|
}
|
|
```
|
|
"""
|
|
### BEGIN SOLUTION
|
|
current_time = datetime.now()
|
|
time_since_last_check = (current_time - self.last_check_time).total_seconds()
|
|
|
|
# Get component statuses
|
|
alerts = self.monitor.check_alerts()
|
|
trend = self.monitor.get_performance_trend()
|
|
drift_history = self.drift_detector.get_drift_history()
|
|
retrain_history = self.retrain_trigger.get_retraining_history()
|
|
|
|
# Calculate summary statistics
|
|
total_deployments = len(self.deployment_history)
|
|
average_improvement = 0.0
|
|
if self.deployment_history:
|
|
average_improvement = np.mean([d["improvement"] for d in self.deployment_history])
|
|
|
|
return {
|
|
"pipeline_active": self.pipeline_active,
|
|
"total_deployments": total_deployments,
|
|
"average_improvement": average_improvement,
|
|
"time_since_last_check": time_since_last_check,
|
|
"recent_alerts": alerts,
|
|
"performance_trend": trend,
|
|
"drift_history": drift_history[-5:], # Last 5 drift checks
|
|
"deployment_history": self.deployment_history,
|
|
"retrain_history": retrain_history
|
|
}
|
|
### END SOLUTION
|