mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-04-29 03:51:33 -05:00
- Implement complete capability showcase system (11 demonstrations) - Add auto-run showcases after successful module completion - Create interactive launcher for easy showcase navigation - Integrate with tito module complete workflow - Add user preference system for logo themes - Showcase student achievements without requiring additional work - Demonstrate real ML capabilities from tensors to TinyGPT - Use Rich terminal UI for beautiful visualizations
372 lines
14 KiB
Python
372 lines
14 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
🚀 CAPABILITY SHOWCASE: Production Systems
|
|
After Module 15 (MLOps)
|
|
|
|
"Look what you built!" - Your MLOps tools handle production!
|
|
"""
|
|
|
|
import sys
|
|
import time
|
|
import random
|
|
from rich.console import Console
|
|
from rich.panel import Panel
|
|
from rich.table import Table
|
|
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn
|
|
from rich.align import Align
|
|
from rich.live import Live
|
|
|
|
# Import from YOUR TinyTorch implementation
|
|
try:
|
|
from tinytorch.core.mlops import ModelDeployment, Monitor, AutoScaler
|
|
except ImportError:
|
|
print("❌ TinyTorch MLOps not found. Make sure you've completed Module 15 (MLOps)!")
|
|
sys.exit(1)
|
|
|
|
console = Console()
|
|
|
|
def simulate_model_deployment():
|
|
"""Simulate deploying a model to production."""
|
|
console.print(Panel.fit("🚀 MODEL DEPLOYMENT SIMULATION", style="bold green"))
|
|
|
|
console.print("📦 Deploying YOUR TinyTorch model to production environment...")
|
|
console.print()
|
|
|
|
with Progress(
|
|
SpinnerColumn(),
|
|
TextColumn("[progress.description]{task.description}"),
|
|
console=console,
|
|
) as progress:
|
|
|
|
# Deployment steps
|
|
steps = [
|
|
("Loading model artifacts...", 2),
|
|
("Validating model integrity...", 1),
|
|
("Setting up inference server...", 2),
|
|
("Configuring load balancer...", 1),
|
|
("Running health checks...", 2),
|
|
("Enabling traffic routing...", 1),
|
|
]
|
|
|
|
for step_desc, duration in steps:
|
|
task = progress.add_task(step_desc, total=None)
|
|
time.sleep(duration)
|
|
progress.update(task, description=f"✅ {step_desc[:-3]} complete!")
|
|
time.sleep(0.3)
|
|
|
|
console.print("🎯 [bold]Deployment Configuration:[/bold]")
|
|
console.print(" 🌐 Load Balancer: 3 inference nodes")
|
|
console.print(" 📊 Auto-scaling: 1-10 instances")
|
|
console.print(" 💾 Model cache: 95% hit rate")
|
|
console.print(" 🔒 Security: TLS encryption, API authentication")
|
|
console.print(" 📈 Monitoring: Real-time metrics collection")
|
|
|
|
return True
|
|
|
|
def demonstrate_live_monitoring():
|
|
"""Show live monitoring dashboard simulation."""
|
|
console.print(Panel.fit("📊 LIVE MONITORING DASHBOARD", style="bold blue"))
|
|
|
|
console.print("🔍 YOUR monitoring system tracking production model...")
|
|
console.print()
|
|
|
|
# Simulate live metrics for 10 seconds
|
|
with Live(refresh_per_second=2) as live:
|
|
for _ in range(20): # 10 seconds worth of updates
|
|
|
|
# Generate realistic metrics
|
|
timestamp = time.strftime("%H:%M:%S")
|
|
requests_per_sec = random.randint(850, 1200)
|
|
avg_latency = random.uniform(45, 85)
|
|
error_rate = random.uniform(0.1, 0.5)
|
|
cpu_usage = random.uniform(35, 75)
|
|
memory_usage = random.uniform(60, 85)
|
|
accuracy = random.uniform(94.2, 95.8)
|
|
|
|
# Create live dashboard
|
|
table = Table(title=f"Production Metrics - {timestamp}")
|
|
table.add_column("Metric", style="cyan")
|
|
table.add_column("Current", style="yellow")
|
|
table.add_column("Target", style="green")
|
|
table.add_column("Status", style="magenta")
|
|
|
|
# Add metrics with status indicators
|
|
metrics = [
|
|
("Requests/sec", f"{requests_per_sec:,}", "1000+", "🟢" if requests_per_sec > 1000 else "🟡"),
|
|
("Avg Latency", f"{avg_latency:.1f}ms", "<100ms", "🟢" if avg_latency < 100 else "🟡"),
|
|
("Error Rate", f"{error_rate:.2f}%", "<1%", "🟢" if error_rate < 1 else "🔴"),
|
|
("CPU Usage", f"{cpu_usage:.1f}%", "<80%", "🟢" if cpu_usage < 80 else "🟡"),
|
|
("Memory", f"{memory_usage:.1f}%", "<90%", "🟢" if memory_usage < 90 else "🟡"),
|
|
("Model Accuracy", f"{accuracy:.1f}%", ">94%", "🟢" if accuracy > 94 else "🔴"),
|
|
]
|
|
|
|
for metric, current, target, status in metrics:
|
|
table.add_row(metric, current, target, status)
|
|
|
|
live.update(table)
|
|
time.sleep(0.5)
|
|
|
|
console.print("\n💡 [bold]Monitoring Insights:[/bold]")
|
|
console.print(" 📈 System handling ~1000 requests/sec successfully")
|
|
console.print(" ⚡ Latency consistently under 100ms target")
|
|
console.print(" 🎯 Model accuracy stable at 95%+")
|
|
console.print(" 🔧 Resource utilization within healthy ranges")
|
|
|
|
def simulate_auto_scaling():
|
|
"""Demonstrate auto-scaling in response to traffic."""
|
|
console.print(Panel.fit("🔄 AUTO-SCALING SIMULATION", style="bold yellow"))
|
|
|
|
console.print("📈 Simulating traffic spike and auto-scaling response...")
|
|
console.print()
|
|
|
|
# Simulate traffic pattern
|
|
time_points = list(range(0, 31, 5)) # 0 to 30 minutes
|
|
traffic_pattern = [100, 150, 300, 800, 1500, 1200, 400] # requests/sec
|
|
|
|
table = Table(title="Auto-Scaling Response to Traffic")
|
|
table.add_column("Time", style="cyan")
|
|
table.add_column("Traffic (RPS)", style="yellow")
|
|
table.add_column("Instances", style="green")
|
|
table.add_column("Avg Latency", style="magenta")
|
|
table.add_column("Action", style="blue")
|
|
|
|
for i, (time_point, traffic) in enumerate(zip(time_points, traffic_pattern)):
|
|
# Calculate instances based on traffic
|
|
if traffic < 200:
|
|
instances = 1
|
|
latency = random.uniform(40, 60)
|
|
action = "Baseline"
|
|
elif traffic < 500:
|
|
instances = 2
|
|
latency = random.uniform(50, 70)
|
|
action = "Scale up +1"
|
|
elif traffic < 1000:
|
|
instances = 4
|
|
latency = random.uniform(60, 80)
|
|
action = "Scale up +2"
|
|
else:
|
|
instances = 7
|
|
latency = random.uniform(70, 90)
|
|
action = "Scale up +3"
|
|
|
|
# Show scale down
|
|
if i > 0 and traffic < traffic_pattern[i-1] * 0.7:
|
|
action = "Scale down"
|
|
|
|
table.add_row(
|
|
f"{time_point}min",
|
|
f"{traffic:,}",
|
|
str(instances),
|
|
f"{latency:.1f}ms",
|
|
action
|
|
)
|
|
|
|
console.print(table)
|
|
|
|
console.print("\n🎯 [bold]Auto-Scaling Logic:[/bold]")
|
|
console.print(" 📊 Monitor: Request rate, latency, CPU usage")
|
|
console.print(" 🔼 Scale up: When latency > 100ms or CPU > 80%")
|
|
console.print(" 🔽 Scale down: When resources underutilized for 5+ minutes")
|
|
console.print(" ⚡ Speed: New instances ready in 30-60 seconds")
|
|
|
|
def demonstrate_model_versioning():
|
|
"""Show model versioning and deployment strategies."""
|
|
console.print(Panel.fit("🗂️ MODEL VERSIONING & DEPLOYMENT", style="bold magenta"))
|
|
|
|
console.print("📋 Managing multiple model versions in production...")
|
|
console.print()
|
|
|
|
# Model versions table
|
|
table = Table(title="Production Model Versions")
|
|
table.add_column("Version", style="cyan")
|
|
table.add_column("Accuracy", style="yellow")
|
|
table.add_column("Latency", style="green")
|
|
table.add_column("Traffic %", style="magenta")
|
|
table.add_column("Status", style="blue")
|
|
|
|
versions = [
|
|
("v1.2.3", "94.2%", "65ms", "80%", "🟢 Stable"),
|
|
("v1.3.0", "95.1%", "72ms", "15%", "🟡 A/B Testing"),
|
|
("v1.3.1", "95.3%", "68ms", "5%", "🔵 Canary"),
|
|
("v1.1.9", "93.8%", "58ms", "0%", "🔴 Deprecated"),
|
|
]
|
|
|
|
for version, accuracy, latency, traffic, status in versions:
|
|
table.add_row(version, accuracy, latency, traffic, status)
|
|
|
|
console.print(table)
|
|
|
|
console.print("\n🚀 [bold]Deployment Strategies:[/bold]")
|
|
console.print(" 🐦 [bold]Canary Deployment:[/bold] 5% traffic to new version")
|
|
console.print(" • Monitor for regressions")
|
|
console.print(" • Gradual rollout if successful")
|
|
console.print(" • Instant rollback if issues")
|
|
console.print()
|
|
console.print(" 🧪 [bold]A/B Testing:[/bold] Compare model performance")
|
|
console.print(" • Statistical significance testing")
|
|
console.print(" • Business metric optimization")
|
|
console.print(" • User experience validation")
|
|
console.print()
|
|
console.print(" 🔄 [bold]Blue-Green Deployment:[/bold] Zero-downtime updates")
|
|
console.print(" • Parallel environment preparation")
|
|
console.print(" • Traffic switch validation")
|
|
console.print(" • Immediate rollback capability")
|
|
|
|
def show_alerting_system():
|
|
"""Demonstrate the alerting system."""
|
|
console.print(Panel.fit("🚨 INTELLIGENT ALERTING SYSTEM", style="bold red"))
|
|
|
|
console.print("🔔 YOUR alerting system monitoring production health...")
|
|
console.print()
|
|
|
|
# Simulate some alerts
|
|
with Progress(
|
|
SpinnerColumn(),
|
|
TextColumn("[progress.description]{task.description}"),
|
|
console=console,
|
|
) as progress:
|
|
|
|
task = progress.add_task("Monitoring system health...", total=None)
|
|
time.sleep(2)
|
|
|
|
progress.update(task, description="🟡 Warning: Latency spike detected")
|
|
time.sleep(1)
|
|
|
|
progress.update(task, description="🟢 Alert resolved: Auto-scaling activated")
|
|
time.sleep(1)
|
|
|
|
progress.update(task, description="📊 All systems nominal")
|
|
time.sleep(0.5)
|
|
|
|
# Alert configuration
|
|
table = Table(title="Alert Configuration")
|
|
table.add_column("Alert Type", style="cyan")
|
|
table.add_column("Threshold", style="yellow")
|
|
table.add_column("Action", style="green")
|
|
table.add_column("Escalation", style="magenta")
|
|
|
|
alerts = [
|
|
("High Latency", ">150ms for 2min", "Auto-scale", "Page oncall if >5min"),
|
|
("Error Rate", ">2% for 1min", "Circuit breaker", "Immediate escalation"),
|
|
("Accuracy Drop", "<93% for 5min", "Traffic redirect", "Model team alert"),
|
|
("Resource Usage", ">90% for 3min", "Scale up", "Infrastructure team"),
|
|
("Model Drift", "Drift score >0.8", "Flag for review", "ML team notification"),
|
|
]
|
|
|
|
for alert_type, threshold, action, escalation in alerts:
|
|
table.add_row(alert_type, threshold, action, escalation)
|
|
|
|
console.print(table)
|
|
|
|
console.print("\n🎯 [bold]Smart Alerting Features:[/bold]")
|
|
console.print(" 🧠 Machine learning-based anomaly detection")
|
|
console.print(" 📊 Context-aware thresholds (time of day, seasonality)")
|
|
console.print(" 🔇 Alert fatigue reduction with intelligent grouping")
|
|
console.print(" 📱 Multi-channel notifications (Slack, PagerDuty, SMS)")
|
|
|
|
def show_production_best_practices():
|
|
"""Show production ML best practices."""
|
|
console.print(Panel.fit("🏆 PRODUCTION ML BEST PRACTICES", style="bold cyan"))
|
|
|
|
console.print("💡 Essential practices for production ML systems:")
|
|
console.print()
|
|
|
|
practices = [
|
|
{
|
|
"category": "🔒 Reliability & Security",
|
|
"items": [
|
|
"Multi-region deployment for disaster recovery",
|
|
"Input validation and sanitization",
|
|
"Model access controls and authentication",
|
|
"Regular security audits and updates"
|
|
]
|
|
},
|
|
{
|
|
"category": "📊 Monitoring & Observability",
|
|
"items": [
|
|
"End-to-end request tracing",
|
|
"Business metric correlation",
|
|
"Data drift detection",
|
|
"Model explanation and interpretability"
|
|
]
|
|
},
|
|
{
|
|
"category": "🚀 Performance & Efficiency",
|
|
"items": [
|
|
"Model compression and optimization",
|
|
"Caching strategies for repeated queries",
|
|
"Batch processing for efficiency",
|
|
"Hardware-specific optimization"
|
|
]
|
|
},
|
|
{
|
|
"category": "🔄 Continuous Improvement",
|
|
"items": [
|
|
"Automated retraining pipelines",
|
|
"Feature store for consistency",
|
|
"Experiment tracking and reproducibility",
|
|
"Feedback loop integration"
|
|
]
|
|
}
|
|
]
|
|
|
|
for practice in practices:
|
|
console.print(f"[bold]{practice['category']}[/bold]")
|
|
for item in practice['items']:
|
|
console.print(f" • {item}")
|
|
console.print()
|
|
|
|
def main():
|
|
"""Main showcase function."""
|
|
console.clear()
|
|
|
|
# Header
|
|
header = Panel.fit(
|
|
"[bold cyan]🚀 CAPABILITY SHOWCASE: PRODUCTION SYSTEMS[/bold cyan]\n"
|
|
"[yellow]After Module 15 (MLOps)[/yellow]\n\n"
|
|
"[green]\"Look what you built!\" - Your MLOps tools handle production![/green]",
|
|
border_style="bright_blue"
|
|
)
|
|
console.print(Align.center(header))
|
|
console.print()
|
|
|
|
try:
|
|
simulate_model_deployment()
|
|
console.print("\n" + "="*60)
|
|
|
|
demonstrate_live_monitoring()
|
|
console.print("\n" + "="*60)
|
|
|
|
simulate_auto_scaling()
|
|
console.print("\n" + "="*60)
|
|
|
|
demonstrate_model_versioning()
|
|
console.print("\n" + "="*60)
|
|
|
|
show_alerting_system()
|
|
console.print("\n" + "="*60)
|
|
|
|
show_production_best_practices()
|
|
|
|
# Celebration
|
|
console.print("\n" + "="*60)
|
|
console.print(Panel.fit(
|
|
"[bold green]🎉 PRODUCTION SYSTEMS MASTERY! 🎉[/bold green]\n\n"
|
|
"[cyan]You've mastered enterprise-grade ML operations![/cyan]\n\n"
|
|
"[white]Your MLOps expertise enables:[/white]\n"
|
|
"[white]• Reliable 24/7 model serving[/white]\n"
|
|
"[white]• Automatic scaling and recovery[/white]\n"
|
|
"[white]• Continuous monitoring and alerting[/white]\n"
|
|
"[white]• Safe deployment and rollback[/white]\n\n"
|
|
"[yellow]You now understand what it takes to run[/yellow]\n"
|
|
"[yellow]ML systems at enterprise scale![/yellow]\n\n"
|
|
"[bold bright_green]Ready to deploy AI that millions can depend on! 🌟[/bold bright_green]",
|
|
border_style="green"
|
|
))
|
|
|
|
except Exception as e:
|
|
console.print(f"❌ Error running showcase: {e}")
|
|
console.print("💡 Make sure you've completed Module 15 and your MLOps tools work!")
|
|
|
|
if __name__ == "__main__":
|
|
main() |