TinyTorch/tinytorch/profiling/profiler.py

# ╔═══════════════════════════════════════════════════════════════════════════════╗
# ║                        🚨 CRITICAL WARNING 🚨                                ║
# ║                     AUTOGENERATED! DO NOT EDIT!                              ║
# ║                                                                               ║
# ║  This file is AUTOMATICALLY GENERATED from source modules.                   ║
# ║  ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported!            ║
# ║                                                                               ║
# ║  ✅ TO EDIT: modules/source/XX_profiler/profiler_dev.py             ║
# ║  ✅ TO EXPORT: Run 'tito module complete <module_name>'                      ║
# ║                                                                               ║
# ║  🛡️ STUDENT PROTECTION: This file contains optimized implementations.        ║
# ║     Editing it directly may break module functionality and training.         ║
# ║                                                                               ║
# ║  🎓 LEARNING TIP: Work in modules/source/ - that's where real development    ║
# ║     happens! The tinytorch/ directory is just the compiled output.           ║
# ╚═══════════════════════════════════════════════════════════════════════════════╝
# %% auto 0
__all__ = ['Profiler', 'ProfilerComplete']

# %% ../../modules/source/15_profiling/profiling_dev.ipynb 1
import time
import numpy as np
import tracemalloc
from typing import Dict, List, Any, Optional, Tuple
from collections import defaultdict
import gc

# Import our TinyTorch components for profiling
from ..core.tensor import Tensor
from ..core.layers import Linear
from ..core.spatial import Conv2d

# %% ../../modules/source/15_profiling/profiling_dev.ipynb 5
class Profiler:
    """
    Professional-grade ML model profiler for performance analysis.

    Measures parameters, FLOPs, memory usage, and latency with statistical rigor.
    Used for optimization guidance and deployment planning.
    """

    def __init__(self):
        """Initialize profiler with measurement state."""
        ### BEGIN SOLUTION
        self.measurements = {}
        self.operation_counts = defaultdict(int)
        self.memory_tracker = None
        ### END SOLUTION

# %% ../../modules/source/15_profiling/profiling_dev.ipynb 37
class ProfilerComplete:
    """
    Complete profiler with all measurement capabilities for milestone use.

    This is the exported version students build through the module exercises.
    """

    def __init__(self):
        """Initialize profiler with measurement state."""
        self.measurements = {}
        self.operation_counts = defaultdict(int)
        self.memory_tracker = None

    def count_parameters(self, model) -> int:
        """Count total trainable parameters in a model."""
        total_params = 0

        if hasattr(model, 'parameters'):
            for param in model.parameters():
                total_params += param.data.size
        elif hasattr(model, 'weight'):
            total_params += model.weight.data.size
            if hasattr(model, 'bias') and model.bias is not None:
                total_params += model.bias.data.size

        return total_params

    def count_flops(self, model, input_shape: Tuple[int, ...]) -> int:
        """Count FLOPs for one forward pass."""
        dummy_input = Tensor(np.random.randn(*input_shape))
        total_flops = 0

        if hasattr(model, '__class__'):
            model_name = model.__class__.__name__

            if model_name == 'Linear':
                in_features = input_shape[-1]
                out_features = model.weight.shape[1] if hasattr(model, 'weight') else 1
                total_flops = in_features * out_features * 2

            elif model_name == 'Conv2d':
                total_flops = 1000000  # Simplified for now

        return total_flops

    def measure_memory(self, model, input_shape: Tuple[int, ...]) -> Dict[str, float]:
        """Measure memory usage during forward pass."""
        tracemalloc.start()
        baseline_memory = tracemalloc.get_traced_memory()[0]

        param_count = self.count_parameters(model)
        parameter_memory_bytes = param_count * 4
        parameter_memory_mb = parameter_memory_bytes / (1024 * 1024)

        dummy_input = Tensor(np.random.randn(*input_shape))

        try:
            if hasattr(model, 'forward'):
                output = model.forward(dummy_input)
            elif hasattr(model, '__call__'):
                output = model(dummy_input)
        except:
            output = dummy_input

        peak_memory, _ = tracemalloc.get_traced_memory()
        tracemalloc.stop()

        peak_memory_mb = peak_memory / (1024 * 1024)
        activation_memory_mb = max(0, peak_memory_mb - parameter_memory_mb)

        return {
            'parameter_memory_mb': parameter_memory_mb,
            'activation_memory_mb': activation_memory_mb,
            'peak_memory_mb': peak_memory_mb,
            'memory_efficiency': parameter_memory_mb / peak_memory_mb if peak_memory_mb > 0 else 0
        }

    def measure_latency(self, model, input_tensor, warmup: int = 10, iterations: int = 100) -> float:
        """Measure model inference latency with statistical rigor."""
        # Warmup
        for _ in range(warmup):
            try:
                if hasattr(model, 'forward'):
                    _ = model.forward(input_tensor)
                elif hasattr(model, '__call__'):
                    _ = model(input_tensor)
            except:
                pass

        # Measurement
        times = []
        for _ in range(iterations):
            start = time.perf_counter()
            try:
                if hasattr(model, 'forward'):
                    _ = model.forward(input_tensor)
                elif hasattr(model, '__call__'):
                    _ = model(input_tensor)
            except:
                pass
            end = time.perf_counter()
            times.append(end - start)

        median_latency_ms = np.median(times) * 1000
        return median_latency_ms