feat: add exported packages for benchmarking, competition, and data utilities

- tinytorch/benchmarking/: Benchmark class for Module 19 - tinytorch/competition/: Submission utilities for Module 20 - tinytorch/data/: Data loading utilities - tinytorch/utils/data/: Additional data helpers Exported from modules 19-20 and module 08
2026-03-11 22:03:34 -05:00 · 2025-11-09 14:42:23 -05:00
parent 9070efc8af
commit f4fdf968c5
6 changed files with 1996 additions and 0 deletions
--- a/tinytorch/benchmarking/benchmark.py
+++ b/tinytorch/benchmarking/benchmark.py
--- a/tinytorch/competition/init.py
+++ b/tinytorch/competition/init.py
--- a/tinytorch/competition/submit.py
+++ b/tinytorch/competition/submit.py
@@ -0,0 +1,642 @@
+# ╔═══════════════════════════════════════════════════════════════════════════════╗
+# ║                        🚨 CRITICAL WARNING 🚨                                ║
+# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
+# ║                                                                               ║
+# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
+# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
+# ║                                                                               ║
+# ║  ✅ TO EDIT: modules/source/XX_submit/submit_dev.py                 ║
+# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
+# ║                                                                               ║
+# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
+# ║     Editing it directly may break module functionality and training.         ║
+# ║                                                                               ║
+# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
+# ║     happens! The tinytorch/ directory is just the compiled output.           ║
+# ╚═══════════════════════════════════════════════════════════════════════════════╝
+# %% auto 0
+__all__ = ['validate_installation', 'load_baseline_model', 'generate_baseline', 'worked_example_optimization',
+           'optimize_for_competition', 'validate_submission', 'generate_submission']
+
+# %% ../../modules/source/20_competition/competition_dev.ipynb 4
+import numpy as np
+import json
+import time
+from pathlib import Path
+from typing import Dict, List, Tuple, Any, Optional
+from ..benchmarking.benchmark import Benchmark, calculate_normalized_scores
+from ..profiling.profiler import Profiler
+
+def validate_installation() -> Dict[str, bool]:
+    """
+    Validate TinyTorch installation and return status of each component.
+    
+    Returns:
+        Dictionary mapping module names to validation status (True = working)
+    
+    Example:
+        >>> status = validate_installation()
+        >>> print(status)
+        {'tensor': True, 'autograd': True, 'layers': True, ...}
+    """
+    validation_results = {}
+    
+    print("🔧 Validating TinyTorch Installation...")
+    print("=" * 60)
+    
+    # Core modules (M01-13)
+    core_modules = [
+        ("tensor", "tinytorch.core.tensor", "Tensor"),
+        ("autograd", "tinytorch.core.autograd", "enable_autograd"),
+        ("layers", "tinytorch.core.layers", "Linear"),
+        ("activations", "tinytorch.core.activations", "ReLU"),
+        ("losses", "tinytorch.core.training", "MSELoss"),
+        ("optimizers", "tinytorch.core.optimizers", "SGD"),
+        ("spatial", "tinytorch.core.spatial", "Conv2d"),
+        ("attention", "tinytorch.core.attention", "MultiHeadAttention"),
+        ("transformers", "tinytorch.models.transformer", "GPT"),
+    ]
+    
+    for name, module_path, class_name in core_modules:
+        try:
+            exec(f"from {module_path} import {class_name}")
+            validation_results[name] = True
+            print(f"✅ {name.capitalize()}: Working")
+        except Exception as e:
+            validation_results[name] = False
+            print(f"❌ {name.capitalize()}: Failed - {str(e)}")
+    
+    # Optimization modules (M14-18)
+    opt_modules = [
+        ("kv_caching", "tinytorch.generation.kv_cache", "enable_kv_cache"),
+        ("profiling", "tinytorch.profiling.profiler", "Profiler"),
+        ("quantization", "tinytorch.optimization.quantization", "quantize_model"),
+        ("compression", "tinytorch.optimization.compression", "magnitude_prune"),
+    ]
+    
+    for name, module_path, func_name in opt_modules:
+        try:
+            exec(f"from {module_path} import {func_name}")
+            validation_results[name] = True
+            print(f"✅ {name.replace('_', ' ').capitalize()}: Working")
+        except Exception as e:
+            validation_results[name] = False
+            print(f"❌ {name.replace('_', ' ').capitalize()}: Failed - {str(e)}")
+    
+    # Benchmarking (M19)
+    try:
+        from tinytorch.benchmarking.benchmark import Benchmark, OlympicEvent
+        validation_results["benchmarking"] = True
+        print(f"✅ Benchmarking: Working")
+    except Exception as e:
+        validation_results["benchmarking"] = False
+        print(f"❌ Benchmarking: Failed - {str(e)}")
+    
+    print("=" * 60)
+    
+    # Summary
+    total = len(validation_results)
+    working = sum(validation_results.values())
+    
+    if working == total:
+        print(f"🎉 Perfect! All {total}/{total} modules working!")
+        print("✅ You're ready to compete in TorchPerf Olympics!")
+    else:
+        print(f"⚠️  {working}/{total} modules working")
+        print(f"❌ {total - working} modules need attention")
+        print("\nPlease run: pip install -e . (in TinyTorch root)")
+    
+    return validation_results
+
+# %% ../../modules/source/20_competition/competition_dev.ipynb 6
+def load_baseline_model(model_name: str = "cifar10_cnn"):
+    """
+    Load a baseline model for TorchPerf Olympics competition.
+    
+    Args:
+        model_name: Name of baseline model to load
+            - "cifar10_cnn": Simple CNN for CIFAR-10 classification
+    
+    Returns:
+        Baseline model instance
+    
+    Example:
+        >>> model = load_baseline_model("cifar10_cnn")
+        >>> print(f"Parameters: {sum(p.size for p in model.parameters())}")
+    """
+    from tinytorch.core.layers import Linear
+    from tinytorch.core.spatial import Conv2d, MaxPool2d, Flatten
+    from tinytorch.core.activations import ReLU
+    
+    if model_name == "cifar10_cnn":
+        # Simple CNN: Conv -> Pool -> Conv -> Pool -> FC -> FC
+        class BaselineCNN:
+            def __init__(self):
+                self.name = "Baseline_CIFAR10_CNN"
+                
+                # Convolutional layers
+                self.conv1 = Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
+                self.relu1 = ReLU()
+                self.pool1 = MaxPool2d(kernel_size=2, stride=2)
+                
+                self.conv2 = Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
+                self.relu2 = ReLU()
+                self.pool2 = MaxPool2d(kernel_size=2, stride=2)
+                
+                # Fully connected layers
+                self.flatten = Flatten()
+                self.fc1 = Linear(64 * 8 * 8, 128)
+                self.relu3 = ReLU()
+                self.fc2 = Linear(128, 10)  # 10 classes for CIFAR-10
+            
+            def forward(self, x):
+                # Forward pass
+                x = self.conv1.forward(x)
+                x = self.relu1.forward(x)
+                x = self.pool1.forward(x)
+                
+                x = self.conv2.forward(x)
+                x = self.relu2.forward(x)
+                x = self.pool2.forward(x)
+                
+                x = self.flatten.forward(x)
+                x = self.fc1.forward(x)
+                x = self.relu3.forward(x)
+                x = self.fc2.forward(x)
+                
+                return x
+            
+            def __call__(self, x):
+                return self.forward(x)
+        
+        return BaselineCNN()
+    else:
+        raise ValueError(f"Unknown baseline model: {model_name}")
+
+def generate_baseline(model_name: str = "cifar10_cnn", quick: bool = True) -> Dict[str, Any]:
+    """
+    Generate baseline performance metrics for a model.
+    
+    Args:
+        model_name: Name of baseline model
+        quick: If True, use quick estimates instead of full benchmarks
+    
+    Returns:
+        Baseline scorecard with metrics
+    
+    Example:
+        >>> baseline = generate_baseline("cifar10_cnn", quick=True)
+        >>> print(f"Baseline latency: {baseline['latency_ms']}ms")
+    """
+    print("📊 Generating Baseline Scorecard...")
+    print("=" * 60)
+    
+    # Load model
+    model = load_baseline_model(model_name)
+    print(f"✅ Loaded baseline model: {model.name}")
+    
+    # Count parameters
+    def count_parameters(model):
+        total = 0
+        for attr_name in dir(model):
+            attr = getattr(model, attr_name)
+            if hasattr(attr, 'weights') and attr.weights is not None:
+                total += attr.weights.size
+            if hasattr(attr, 'bias') and attr.bias is not None:
+                total += attr.bias.size
+        return total
+    
+    params = count_parameters(model)
+    memory_mb = params * 4 / (1024 * 1024)  # Assuming float32
+    
+    if quick:
+        # Quick estimates for fast validation
+        print("⚡ Using quick estimates (set quick=False for full benchmark)")
+        
+        baseline = {
+            "model": model_name,
+            "accuracy": 85.0,  # Typical for this architecture
+            "latency_ms": 45.2,
+            "memory_mb": memory_mb,
+            "parameters": params,
+            "mode": "quick_estimate"
+        }
+    else:
+        # Full benchmark (requires more time)
+        from tinytorch.benchmarking.benchmark import Benchmark
+        
+        print("🔬 Running full benchmark (this may take a minute)...")
+        
+        benchmark = Benchmark([model], [{"name": "baseline"}], 
+                            warmup_runs=5, measurement_runs=20)
+        
+        # Measure latency
+        input_shape = (1, 3, 32, 32)  # CIFAR-10 input
+        latency_results = benchmark.run_latency_benchmark(input_shape=input_shape)
+        latency_ms = list(latency_results.values())[0].mean * 1000
+        
+        baseline = {
+            "model": model_name,
+            "accuracy": 85.0,  # Would need actual test set evaluation
+            "latency_ms": latency_ms,
+            "memory_mb": memory_mb,
+            "parameters": params,
+            "mode": "full_benchmark"
+        }
+    
+    # Display baseline
+    print("\n📋 BASELINE SCORECARD")
+    print("=" * 60)
+    print(f"Model:          {baseline['model']}")
+    print(f"Accuracy:       {baseline['accuracy']:.1f}%")
+    print(f"Latency:        {baseline['latency_ms']:.1f}ms")
+    print(f"Memory:         {baseline['memory_mb']:.2f}MB")
+    print(f"Parameters:     {baseline['parameters']:,}")
+    print("=" * 60)
+    print("📌 This is your starting point. Optimize to compete!")
+    print()
+    
+    return baseline
+
+# %% ../../modules/source/20_competition/competition_dev.ipynb 8
+def worked_example_optimization():
+    """
+    Complete worked example showing full optimization workflow.
+    
+    This demonstrates:
+    - Loading baseline model
+    - Applying multiple optimization techniques
+    - Benchmarking systematically
+    - Generating submission
+    
+    Students should study this and adapt for their own strategies!
+    """
+    print("🏅 WORKED EXAMPLE: Complete Optimization Workflow")
+    print("=" * 70)
+    print("Target: All-Around Event (balanced performance)")
+    print("Strategy: Quantization (INT8) → Pruning (60%)")
+    print("=" * 70)
+    print()
+    
+    # Step 1: Load Baseline
+    print("📦 Step 1: Load Baseline Model")
+    print("-" * 70)
+    baseline = load_baseline_model("cifar10_cnn")
+    baseline_metrics = generate_baseline("cifar10_cnn", quick=True)
+    print()
+    
+    # Step 2: Apply Quantization
+    print("🔧 Step 2: Apply INT8 Quantization (Module 17)")
+    print("-" * 70)
+    print("💡 Why quantize? Reduces memory 4x (FP32 → INT8)")
+    
+    # For demonstration, we'll simulate quantization
+    # In real competition, students would use:
+    # from tinytorch.optimization.quantization import quantize_model
+    # optimized = quantize_model(baseline, bits=8)
+    
+    print("✅ Quantized model (simulated)")
+    print("   - Memory: 12.4MB → 3.1MB (4x reduction)")
+    print()
+    
+    # Step 3: Apply Pruning
+    print("✂️  Step 3: Apply Magnitude Pruning (Module 18)")
+    print("-" * 70)
+    print("💡 Why prune? Removes 60% of weights for faster inference")
+    
+    # For demonstration, we'll simulate pruning
+    # In real competition, students would use:
+    # from tinytorch.optimization.compression import magnitude_prune
+    # optimized = magnitude_prune(optimized, sparsity=0.6)
+    
+    print("✅ Pruned model (simulated)")
+    print("   - Active parameters: 3.2M → 1.28M (60% removed)")
+    print()
+    
+    # Step 4: Benchmark Results
+    print("📊 Step 4: Benchmark Optimized Model (Module 19)")
+    print("-" * 70)
+    
+    # Simulated optimized metrics
+    optimized_metrics = {
+        "model": "Optimized_CIFAR10_CNN",
+        "accuracy": 83.5,  # Slight drop from aggressive optimization
+        "latency_ms": 22.1,
+        "memory_mb": 1.24,  # 4x quantization + 60% pruning
+        "parameters": 1280000,
+        "techniques": ["quantization_int8", "magnitude_prune_0.6"]
+    }
+    
+    print("Baseline vs Optimized:")
+    print(f"  Accuracy:    {baseline_metrics['accuracy']:.1f}% → {optimized_metrics['accuracy']:.1f}% (-1.5pp)")
+    print(f"  Latency:     {baseline_metrics['latency_ms']:.1f}ms → {optimized_metrics['latency_ms']:.1f}ms (2.0x faster ✅)")
+    print(f"  Memory:      {baseline_metrics['memory_mb']:.2f}MB → {optimized_metrics['memory_mb']:.2f}MB (10.0x smaller ✅)")
+    print(f"  Parameters:  {baseline_metrics['parameters']:,} → {optimized_metrics['parameters']:,} (60% fewer ✅)")
+    print()
+    
+    # Step 5: Generate Submission
+    print("📤 Step 5: Generate Competition Submission")
+    print("-" * 70)
+    
+    submission = {
+        "event": "all_around",
+        "athlete_name": "Example_Submission",
+        "baseline": baseline_metrics,
+        "optimized": optimized_metrics,
+        "improvements": {
+            "accuracy_drop": -1.5,
+            "latency_speedup": 2.0,
+            "memory_reduction": 10.0
+        },
+        "techniques_applied": ["quantization_int8", "magnitude_prune_0.6"],
+        "technique_order": "quantize_first_then_prune"
+    }
+    
+    print("✅ Submission generated!")
+    print(f"   Event: {submission['event']}")
+    print(f"   Techniques: {', '.join(submission['techniques_applied'])}")
+    print()
+    print("=" * 70)
+    print("🎯 This is the complete workflow!")
+    print("   Now it's your turn to implement your own optimization strategy.")
+    print("=" * 70)
+    
+    return submission
+
+# %% ../../modules/source/20_competition/competition_dev.ipynb 10
+def optimize_for_competition(baseline_model, event: str = "all_around", division: str = "closed"):
+    """
+    🏅 YOUR COMPETITION ENTRY - IMPLEMENT YOUR STRATEGY HERE!
+    
+    Args:
+        baseline_model: Starting model (use for Closed, optional for Open)
+        event: Category you're competing in
+            - "latency_sprint": Minimize latency
+            - "memory_challenge": Minimize memory
+            - "accuracy_contest": Maximize accuracy
+            - "all_around": Best balance
+            - "extreme_push": Most aggressive
+        division: "closed" or "open" - which track you chose
+    
+    Returns:
+        Your optimized model
+    
+    🔒 CLOSED DIVISION Example:
+        from tinytorch.optimization.quantization import quantize_model
+        from tinytorch.optimization.compression import magnitude_prune
+        
+        optimized = baseline_model
+        optimized = quantize_model(optimized, bits=8)
+        optimized = magnitude_prune(optimized, sparsity=0.7)
+        return optimized
+    
+    🔓 OPEN DIVISION Example:
+        # Build your own model OR
+        # Use your improved implementations from earlier modules
+        # (after you've modified and re-exported them)
+        
+        from tinytorch.models import YourCustomArchitecture
+        optimized = YourCustomArchitecture()
+        return optimized
+    """
+    
+    print(f"🏅 YOUR OPTIMIZATION STRATEGY FOR: {event}")
+    print("=" * 70)
+    
+    # Start with baseline
+    optimized_model = baseline_model
+    
+    # ============================================================
+    # YOUR CODE BELOW - Apply optimization techniques here!
+    # ============================================================
+    
+    # TODO: Students implement their optimization strategy
+    #
+    # Example strategies by event:
+    #
+    # Latency Sprint (speed priority):
+    #   - Heavy quantization (INT4 or INT8)
+    #   - Aggressive pruning (80-90%)
+    #   - Kernel fusion if applicable
+    #
+    # Memory Challenge (size priority):
+    #   - INT8 or INT4 quantization
+    #   - Aggressive pruning (70-90%)
+    #   - Compression techniques
+    #
+    # All-Around (balanced):
+    #   - INT8 quantization
+    #   - Moderate pruning (50-70%)
+    #   - Selective optimization
+    #
+    # Your strategy:
+    
+    
+    
+    # ============================================================
+    # YOUR CODE ABOVE
+    # ============================================================
+    
+    print("✅ Optimization complete!")
+    print("💡 Tip: Benchmark your result to see the impact!")
+    
+    return optimized_model
+
+#| export
+def validate_submission(submission: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Validate competition submission with sanity checks.
+    
+    This catches honest mistakes like unrealistic speedups or accidental training.
+    Honor code system - we trust but verify basic reasonableness.
+    
+    Args:
+        submission: Submission dictionary to validate
+        
+    Returns:
+        Dict with validation results and warnings
+    """
+    checks = []
+    warnings = []
+    errors = []
+    
+    # Extract metrics
+    normalized = submission.get("normalized_scores", {})
+    speedup = normalized.get("speedup", 1.0)
+    compression = normalized.get("compression_ratio", 1.0)
+    accuracy_delta = normalized.get("accuracy_delta", 0.0)
+    
+    # Check 1: Speedup is reasonable (not claiming impossible gains)
+    if speedup > 50:
+        errors.append(f"❌ Speedup {speedup:.1f}x seems unrealistic (>50x)")
+    elif speedup > 20:
+        warnings.append(f"⚠️  Speedup {speedup:.1f}x is very high - please verify measurements")
+    else:
+        checks.append(f"✅ Speedup {speedup:.2f}x is reasonable")
+    
+    # Check 2: Compression is reasonable
+    if compression > 32:
+        errors.append(f"❌ Compression {compression:.1f}x seems unrealistic (>32x)")
+    elif compression > 16:
+        warnings.append(f"⚠️  Compression {compression:.1f}x is very high - please verify")
+    else:
+        checks.append(f"✅ Compression {compression:.2f}x is reasonable")
+    
+    # Check 3: Accuracy didn't improve (Closed Division rule - no training allowed!)
+    division = submission.get("division", "closed")
+    if division == "closed" and accuracy_delta > 1.0:
+        errors.append(f"❌ Accuracy improved by {accuracy_delta:.1f}pp - did you accidentally train the model?")
+    elif accuracy_delta > 0.5:
+        warnings.append(f"⚠️  Accuracy improved by {accuracy_delta:.1f}pp - verify no training occurred")
+    else:
+        checks.append(f"✅ Accuracy change {accuracy_delta:+.2f}pp is reasonable")
+    
+    # Check 4: GitHub repo provided
+    github_repo = submission.get("github_repo", "")
+    if not github_repo or github_repo == "":
+        warnings.append("⚠️  No GitHub repo provided - required for verification")
+    else:
+        checks.append(f"✅ GitHub repo provided: {github_repo}")
+    
+    # Check 5: Required fields present
+    required_fields = ["division", "event", "athlete_name", "baseline", "optimized", "normalized_scores"]
+    missing = [f for f in required_fields if f not in submission]
+    if missing:
+        errors.append(f"❌ Missing required fields: {', '.join(missing)}")
+    else:
+        checks.append("✅ All required fields present")
+    
+    # Check 6: Techniques documented
+    techniques = submission.get("techniques_applied", [])
+    if not techniques or "TODO" in str(techniques):
+        warnings.append("⚠️  No optimization techniques listed")
+    else:
+        checks.append(f"✅ Techniques documented: {', '.join(techniques[:3])}...")
+    
+    return {
+        "valid": len(errors) == 0,
+        "checks": checks,
+        "warnings": warnings,
+        "errors": errors
+    }
+
+#| export
+def generate_submission(baseline_model, optimized_model, 
+                       division: str = "closed",
+                       event: str = "all_around",
+                       athlete_name: str = "YourName",
+                       github_repo: str = "",
+                       techniques: List[str] = None) -> Dict[str, Any]:
+    """
+    Generate standardized TinyMLPerf competition submission with normalized scoring.
+    
+    Args:
+        baseline_model: Original unoptimized model
+        optimized_model: Your optimized model
+        division: "closed" or "open"
+        event: Competition category (latency_sprint, memory_challenge, all_around, etc.)
+        athlete_name: Your name for submission
+        github_repo: GitHub repository URL for code verification
+        techniques: List of optimization techniques applied
+    
+    Returns:
+        Submission dictionary (will be saved as JSON)
+    """
+    print("📤 Generating TinyMLPerf Competition Submission...")
+    print("=" * 70)
+    
+    # Get baseline metrics
+    baseline_metrics = generate_baseline(quick=True)
+    
+    # Benchmark optimized model
+    print("🔬 Benchmarking optimized model...")
+    
+    # Use Profiler and Benchmark from Module 19
+    profiler = Profiler()
+    
+    # For demonstration, we'll use placeholder metrics
+    # In real competition, students would measure their actual optimized model
+    optimized_metrics = {
+        "model": getattr(optimized_model, 'name', 'Optimized_Model'),
+        "accuracy": 84.0,  # Would be measured with actual test set
+        "latency_ms": 28.0,  # Would be measured with profiler
+        "memory_mb": 4.0,  # Would be measured with profiler
+        "parameters": 2000000,  # Would be counted
+    }
+    
+    # Calculate normalized scores using Module 19's function
+    baseline_for_norm = {
+        "latency": baseline_metrics["latency_ms"],
+        "memory": baseline_metrics["memory_mb"],
+        "accuracy": baseline_metrics["accuracy"]
+    }
+    
+    optimized_for_norm = {
+        "latency": optimized_metrics["latency_ms"],
+        "memory": optimized_metrics["memory_mb"],
+        "accuracy": optimized_metrics["accuracy"]
+    }
+    
+    normalized_scores = calculate_normalized_scores(baseline_for_norm, optimized_for_norm)
+    
+    # Create submission with all required fields
+    submission = {
+        "division": division,
+        "event": event,
+        "athlete_name": athlete_name,
+        "github_repo": github_repo,
+        "baseline": baseline_metrics,
+        "optimized": optimized_metrics,
+        "normalized_scores": {
+            "speedup": normalized_scores["speedup"],
+            "compression_ratio": normalized_scores["compression_ratio"],
+            "accuracy_delta": normalized_scores["accuracy_delta"],
+            "efficiency_score": normalized_scores["efficiency_score"]
+        },
+        "techniques_applied": techniques or ["TODO: Document your optimization techniques"],
+        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
+        "tinytorch_version": "0.1.0",
+        "honor_code": False  # Must be explicitly set to True after validation
+    }
+    
+    # Validate submission
+    print("\n🔍 Validating submission...")
+    validation = validate_submission(submission)
+    
+    # Display validation results
+    print("\n📋 Validation Results:")
+    for check in validation["checks"]:
+        print(f"  {check}")
+    for warning in validation["warnings"]:
+        print(f"  {warning}")
+    for error in validation["errors"]:
+        print(f"  {error}")
+    
+    if not validation["valid"]:
+        print("\n❌ Submission has errors - please fix before submitting")
+        return submission
+    
+    # Save to JSON
+    output_file = Path("submission.json")
+    with open(output_file, "w") as f:
+        json.dump(submission, f, indent=2)
+    
+    print(f"\n✅ Submission saved to: {output_file}")
+    print()
+    print("📊 Your Normalized Scores (MLPerf-style):")
+    print(f"  Division:        {division.upper()}")
+    print(f"  Event:           {event.replace('_', ' ').title()}")
+    print(f"  Speedup:         {normalized_scores['speedup']:.2f}x faster ⚡")
+    print(f"  Compression:     {normalized_scores['compression_ratio']:.2f}x smaller 💾")
+    print(f"  Accuracy:        {optimized_metrics['accuracy']:.1f}% (Δ {normalized_scores['accuracy_delta']:+.2f}pp)")
+    print(f"  Efficiency:      {normalized_scores['efficiency_score']:.2f}")
+    print()
+    print("📤 Next Steps:")
+    print("  1. Verify all metrics are correct")
+    print("  2. Push your code to GitHub (if not done)")
+    print("  3. Run: tito submit submission.json")
+    print("     (This will validate and prepare final submission)")
+    print()
+    print("=" * 70)
+    
+    return submission
--- a/tinytorch/data/init.py
+++ b/tinytorch/data/init.py
--- a/tinytorch/data/loader.py
+++ b/tinytorch/data/loader.py
@@ -0,0 +1,262 @@
+# ╔═══════════════════════════════════════════════════════════════════════════════╗
+# ║                        🚨 CRITICAL WARNING 🚨                                ║
+# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
+# ║                                                                               ║
+# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
+# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
+# ║                                                                               ║
+# ║  ✅ TO EDIT: modules/source/XX_loader/loader_dev.py                 ║
+# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
+# ║                                                                               ║
+# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
+# ║     Editing it directly may break module functionality and training.         ║
+# ║                                                                               ║
+# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
+# ║     happens! The tinytorch/ directory is just the compiled output.           ║
+# ╚═══════════════════════════════════════════════════════════════════════════════╝
+# %% auto 0
+__all__ = ['Dataset', 'TensorDataset', 'DataLoader']
+
+# %% ../../modules/source/08_dataloader/dataloader_dev.ipynb 0
+#| default_exp data.loader
+#| export
+
+# %% ../../modules/source/08_dataloader/dataloader_dev.ipynb 2
+# Essential imports for data loading
+import numpy as np
+import random
+from typing import Iterator, Tuple, List, Optional, Union
+from abc import ABC, abstractmethod
+
+# Import real Tensor class from tinytorch package
+from ..core.tensor import Tensor
+
+# %% ../../modules/source/08_dataloader/dataloader_dev.ipynb 4
+class Dataset(ABC):
+    """
+    Abstract base class for all datasets.
+
+    Provides the fundamental interface that all datasets must implement:
+    - __len__(): Returns the total number of samples
+    - __getitem__(idx): Returns the sample at given index
+
+    TODO: Implement the abstract Dataset base class
+
+    APPROACH:
+    1. Use ABC (Abstract Base Class) to define interface
+    2. Mark methods as @abstractmethod to force implementation
+    3. Provide clear docstrings for subclasses
+
+    EXAMPLE:
+    >>> class MyDataset(Dataset):
+    ...     def __len__(self): return 100
+    ...     def __getitem__(self, idx): return idx
+    >>> dataset = MyDataset()
+    >>> print(len(dataset))  # 100
+    >>> print(dataset[42])   # 42
+
+    HINT: Abstract methods force subclasses to implement core functionality
+    """
+
+    ### BEGIN SOLUTION
+    @abstractmethod
+    def __len__(self) -> int:
+        """
+        Return the total number of samples in the dataset.
+
+        This method must be implemented by all subclasses to enable
+        len(dataset) calls and batch size calculations.
+        """
+        pass
+
+    @abstractmethod
+    def __getitem__(self, idx: int):
+        """
+        Return the sample at the given index.
+
+        Args:
+            idx: Index of the sample to retrieve (0 <= idx < len(dataset))
+
+        Returns:
+            The sample at index idx. Format depends on the dataset implementation.
+            Could be (data, label) tuple, single tensor, etc.
+        """
+        pass
+    ### END SOLUTION
+
+# %% ../../modules/source/08_dataloader/dataloader_dev.ipynb 7
+class TensorDataset(Dataset):
+    """
+    Dataset wrapping tensors for supervised learning.
+
+    Each sample is a tuple of tensors from the same index across all input tensors.
+    All tensors must have the same size in their first dimension.
+
+    TODO: Implement TensorDataset for tensor-based data
+
+    APPROACH:
+    1. Store all input tensors
+    2. Validate they have same first dimension (number of samples)
+    3. Return tuple of tensor slices for each index
+
+    EXAMPLE:
+    >>> features = Tensor([[1, 2], [3, 4], [5, 6]])  # 3 samples, 2 features each
+    >>> labels = Tensor([0, 1, 0])                    # 3 labels
+    >>> dataset = TensorDataset(features, labels)
+    >>> print(len(dataset))  # 3
+    >>> print(dataset[1])    # (Tensor([3, 4]), Tensor(1))
+
+    HINTS:
+    - Use *tensors to accept variable number of tensor arguments
+    - Check all tensors have same length in dimension 0
+    - Return tuple of tensor[idx] for all tensors
+    """
+
+    def __init__(self, *tensors):
+        """
+        Create dataset from multiple tensors.
+
+        Args:
+            *tensors: Variable number of Tensor objects
+
+        All tensors must have the same size in their first dimension.
+        """
+        ### BEGIN SOLUTION
+        assert len(tensors) > 0, "Must provide at least one tensor"
+
+        # Store all tensors
+        self.tensors = tensors
+
+        # Validate all tensors have same first dimension
+        first_size = len(tensors[0].data)  # Size of first dimension
+        for i, tensor in enumerate(tensors):
+            if len(tensor.data) != first_size:
+                raise ValueError(
+                    f"All tensors must have same size in first dimension. "
+                    f"Tensor 0: {first_size}, Tensor {i}: {len(tensor.data)}"
+                )
+        ### END SOLUTION
+
+    def __len__(self) -> int:
+        """Return number of samples (size of first dimension)."""
+        ### BEGIN SOLUTION
+        return len(self.tensors[0].data)
+        ### END SOLUTION
+
+    def __getitem__(self, idx: int) -> Tuple[Tensor, ...]:
+        """
+        Return tuple of tensor slices at given index.
+
+        Args:
+            idx: Sample index
+
+        Returns:
+            Tuple containing tensor[idx] for each input tensor
+        """
+        ### BEGIN SOLUTION
+        if idx >= len(self) or idx < 0:
+            raise IndexError(f"Index {idx} out of range for dataset of size {len(self)}")
+
+        # Return tuple of slices from all tensors
+        return tuple(Tensor(tensor.data[idx]) for tensor in self.tensors)
+        ### END SOLUTION
+
+# %% ../../modules/source/08_dataloader/dataloader_dev.ipynb 10
+class DataLoader:
+    """
+    Data loader with batching and shuffling support.
+
+    Wraps a dataset to provide batched iteration with optional shuffling.
+    Essential for efficient training with mini-batch gradient descent.
+
+    TODO: Implement DataLoader with batching and shuffling
+
+    APPROACH:
+    1. Store dataset, batch_size, and shuffle settings
+    2. Create iterator that groups samples into batches
+    3. Handle shuffling by randomizing indices
+    4. Collate individual samples into batch tensors
+
+    EXAMPLE:
+    >>> dataset = TensorDataset(Tensor([[1,2], [3,4], [5,6]]), Tensor([0,1,0]))
+    >>> loader = DataLoader(dataset, batch_size=2, shuffle=True)
+    >>> for batch in loader:
+    ...     features_batch, labels_batch = batch
+    ...     print(f"Features: {features_batch.shape}, Labels: {labels_batch.shape}")
+
+    HINTS:
+    - Use random.shuffle() for index shuffling
+    - Group consecutive samples into batches
+    - Stack individual tensors using np.stack()
+    """
+
+    def __init__(self, dataset: Dataset, batch_size: int, shuffle: bool = False):
+        """
+        Create DataLoader for batched iteration.
+
+        Args:
+            dataset: Dataset to load from
+            batch_size: Number of samples per batch
+            shuffle: Whether to shuffle data each epoch
+        """
+        ### BEGIN SOLUTION
+        self.dataset = dataset
+        self.batch_size = batch_size
+        self.shuffle = shuffle
+        ### END SOLUTION
+
+    def __len__(self) -> int:
+        """Return number of batches per epoch."""
+        ### BEGIN SOLUTION
+        # Calculate number of complete batches
+        return (len(self.dataset) + self.batch_size - 1) // self.batch_size
+        ### END SOLUTION
+
+    def __iter__(self) -> Iterator:
+        """Return iterator over batches."""
+        ### BEGIN SOLUTION
+        # Create list of indices
+        indices = list(range(len(self.dataset)))
+
+        # Shuffle if requested
+        if self.shuffle:
+            random.shuffle(indices)
+
+        # Yield batches
+        for i in range(0, len(indices), self.batch_size):
+            batch_indices = indices[i:i + self.batch_size]
+            batch = [self.dataset[idx] for idx in batch_indices]
+
+            # Collate batch - convert list of tuples to tuple of tensors
+            yield self._collate_batch(batch)
+        ### END SOLUTION
+
+    def _collate_batch(self, batch: List[Tuple[Tensor, ...]]) -> Tuple[Tensor, ...]:
+        """
+        Collate individual samples into batch tensors.
+
+        Args:
+            batch: List of sample tuples from dataset
+
+        Returns:
+            Tuple of batched tensors
+        """
+        ### BEGIN SOLUTION
+        if len(batch) == 0:
+            return ()
+
+        # Determine number of tensors per sample
+        num_tensors = len(batch[0])
+
+        # Group tensors by position
+        batched_tensors = []
+        for tensor_idx in range(num_tensors):
+            # Extract all tensors at this position
+            tensor_list = [sample[tensor_idx].data for sample in batch]
+
+            # Stack into batch tensor
+            batched_data = np.stack(tensor_list, axis=0)
+            batched_tensors.append(Tensor(batched_data))
+
+        return tuple(batched_tensors)
+        ### END SOLUTION
--- a/tinytorch/utils/data/init.py
+++ b/tinytorch/utils/data/init.py
@@ -0,0 +1,16 @@
+"""
+TinyTorch Data Loading Utilities
+
+Following torch.utils.data patterns, this module provides:
+- Dataset: Base class for all datasets
+- DataLoader: Batching and shuffling for training
+- Common datasets for learning
+
+This is Module 10 of TinyTorch.
+"""
+
+# Import from dataloader module
+from .dataloader import *
+
+# Make key classes easily accessible
+__all__ = ['Dataset', 'DataLoader', 'SimpleDataset', 'CIFAR10Dataset']