mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-05-03 07:02:53 -05:00
Add TinyTorch Profiler Utility
- Add tinytorch.utils.profiler following PyTorch's utils pattern - Includes SimpleProfiler class for educational performance measurement - Provides timing, memory usage, and system metrics - Follows PyTorch's torch.utils.* organizational pattern - Module 11: Kernels uses profiler for performance demonstrations Features: - Wall time and CPU time measurement - Memory usage tracking (peak, delta, percentages) - Array information (shape, size, dtype) - CPU and system metrics - Clean educational interface for ML performance learning Import pattern: from tinytorch.utils.profiler import SimpleProfiler
This commit is contained in:
226
modules/source/utils/profiler.py
Normal file
226
modules/source/utils/profiler.py
Normal file
@@ -0,0 +1,226 @@
|
||||
"""
|
||||
TinyTorch Utils: Simple Educational Profiler
|
||||
|
||||
A lightweight profiling utility for measuring performance of ML operations.
|
||||
Focused on measuring individual functions - students do their own comparisons.
|
||||
"""
|
||||
|
||||
import time
|
||||
import sys
|
||||
import gc
|
||||
import numpy as np
|
||||
from typing import Callable, Dict, Any, Optional
|
||||
|
||||
try:
|
||||
import psutil
|
||||
HAS_PSUTIL = True
|
||||
except ImportError:
|
||||
HAS_PSUTIL = False
|
||||
|
||||
try:
|
||||
import tracemalloc
|
||||
HAS_TRACEMALLOC = True
|
||||
except ImportError:
|
||||
HAS_TRACEMALLOC = False
|
||||
|
||||
class SimpleProfiler:
|
||||
"""
|
||||
Simple profiler for measuring individual function performance.
|
||||
|
||||
Measures timing, memory usage, and other key metrics for a single function.
|
||||
Students collect multiple measurements and compare results themselves.
|
||||
"""
|
||||
|
||||
def __init__(self, track_memory: bool = True, track_cpu: bool = True):
|
||||
self.track_memory = track_memory and HAS_TRACEMALLOC
|
||||
self.track_cpu = track_cpu and HAS_PSUTIL
|
||||
|
||||
if self.track_memory:
|
||||
tracemalloc.start()
|
||||
|
||||
def _get_memory_info(self) -> Dict[str, Any]:
|
||||
"""Get current memory information."""
|
||||
if not self.track_memory:
|
||||
return {}
|
||||
|
||||
try:
|
||||
current, peak = tracemalloc.get_traced_memory()
|
||||
return {
|
||||
'current_memory_mb': current / 1024 / 1024,
|
||||
'peak_memory_mb': peak / 1024 / 1024
|
||||
}
|
||||
except:
|
||||
return {}
|
||||
|
||||
def _get_cpu_info(self) -> Dict[str, Any]:
|
||||
"""Get current CPU information."""
|
||||
if not self.track_cpu:
|
||||
return {}
|
||||
|
||||
try:
|
||||
process = psutil.Process()
|
||||
return {
|
||||
'cpu_percent': process.cpu_percent(),
|
||||
'memory_percent': process.memory_percent(),
|
||||
'num_threads': process.num_threads()
|
||||
}
|
||||
except:
|
||||
return {}
|
||||
|
||||
def _get_array_info(self, result: Any) -> Dict[str, Any]:
|
||||
"""Get information about numpy arrays."""
|
||||
if not isinstance(result, np.ndarray):
|
||||
return {}
|
||||
|
||||
return {
|
||||
'result_shape': result.shape,
|
||||
'result_dtype': str(result.dtype),
|
||||
'result_size_mb': result.nbytes / 1024 / 1024,
|
||||
'result_elements': result.size
|
||||
}
|
||||
|
||||
def profile(self, func: Callable, *args, name: Optional[str] = None, warmup: bool = True, **kwargs) -> Dict[str, Any]:
|
||||
"""
|
||||
Profile a single function execution with comprehensive metrics.
|
||||
|
||||
Args:
|
||||
func: Function to profile
|
||||
*args: Arguments to pass to function
|
||||
name: Optional name for the function (defaults to func.__name__)
|
||||
warmup: Whether to do a warmup run (recommended for fair timing)
|
||||
**kwargs: Keyword arguments to pass to function
|
||||
|
||||
Returns:
|
||||
Dictionary with comprehensive performance metrics
|
||||
|
||||
Example:
|
||||
profiler = SimpleProfiler()
|
||||
result = profiler.profile(my_function, arg1, arg2, name="My Function")
|
||||
print(f"Time: {result['wall_time']:.4f}s")
|
||||
print(f"Memory: {result['memory_delta_mb']:.2f}MB")
|
||||
"""
|
||||
func_name = name or func.__name__
|
||||
|
||||
# Reset memory tracking
|
||||
if self.track_memory:
|
||||
tracemalloc.clear_traces()
|
||||
|
||||
# Warm up (important for fair comparison)
|
||||
if warmup:
|
||||
try:
|
||||
warmup_result = func(*args, **kwargs)
|
||||
del warmup_result
|
||||
except:
|
||||
pass
|
||||
|
||||
# Force garbage collection for clean measurement
|
||||
gc.collect()
|
||||
|
||||
# Get baseline measurements
|
||||
memory_before = self._get_memory_info()
|
||||
cpu_before = self._get_cpu_info()
|
||||
|
||||
# Time the actual execution
|
||||
start_time = time.time()
|
||||
start_cpu_time = time.process_time()
|
||||
|
||||
result = func(*args, **kwargs)
|
||||
|
||||
end_time = time.time()
|
||||
end_cpu_time = time.process_time()
|
||||
|
||||
# Get post-execution measurements
|
||||
memory_after = self._get_memory_info()
|
||||
cpu_after = self._get_cpu_info()
|
||||
|
||||
# Calculate metrics
|
||||
wall_time = end_time - start_time
|
||||
cpu_time = end_cpu_time - start_cpu_time
|
||||
|
||||
profile_result = {
|
||||
'name': func_name,
|
||||
'wall_time': wall_time,
|
||||
'cpu_time': cpu_time,
|
||||
'cpu_efficiency': (cpu_time / wall_time) if wall_time > 0 else 0,
|
||||
'result': result
|
||||
}
|
||||
|
||||
# Add memory metrics
|
||||
if self.track_memory and memory_before and memory_after:
|
||||
profile_result.update({
|
||||
'memory_before_mb': memory_before.get('current_memory_mb', 0),
|
||||
'memory_after_mb': memory_after.get('current_memory_mb', 0),
|
||||
'peak_memory_mb': memory_after.get('peak_memory_mb', 0),
|
||||
'memory_delta_mb': memory_after.get('current_memory_mb', 0) - memory_before.get('current_memory_mb', 0)
|
||||
})
|
||||
|
||||
# Add CPU metrics
|
||||
if self.track_cpu and cpu_after:
|
||||
profile_result.update({
|
||||
'cpu_percent': cpu_after.get('cpu_percent', 0),
|
||||
'memory_percent': cpu_after.get('memory_percent', 0),
|
||||
'num_threads': cpu_after.get('num_threads', 1)
|
||||
})
|
||||
|
||||
# Add array information
|
||||
profile_result.update(self._get_array_info(result))
|
||||
|
||||
return profile_result
|
||||
|
||||
def print_result(self, profile_result: Dict[str, Any], show_details: bool = False) -> None:
|
||||
"""
|
||||
Print profiling results in a readable format.
|
||||
|
||||
Args:
|
||||
profile_result: Result from profile() method
|
||||
show_details: Whether to show detailed metrics
|
||||
"""
|
||||
name = profile_result['name']
|
||||
wall_time = profile_result['wall_time']
|
||||
|
||||
print(f"📊 {name}: {wall_time:.4f}s")
|
||||
|
||||
if show_details:
|
||||
if 'memory_delta_mb' in profile_result:
|
||||
print(f" 💾 Memory: {profile_result['memory_delta_mb']:.2f}MB delta, {profile_result['peak_memory_mb']:.2f}MB peak")
|
||||
if 'result_size_mb' in profile_result:
|
||||
print(f" 🔢 Output: {profile_result['result_shape']} ({profile_result['result_size_mb']:.2f}MB)")
|
||||
if 'cpu_efficiency' in profile_result:
|
||||
print(f" ⚡ CPU: {profile_result['cpu_efficiency']:.2f} efficiency")
|
||||
|
||||
def get_capabilities(self) -> Dict[str, bool]:
|
||||
"""Get information about profiler capabilities."""
|
||||
return {
|
||||
'memory_tracking': self.track_memory,
|
||||
'cpu_tracking': self.track_cpu,
|
||||
'has_psutil': HAS_PSUTIL,
|
||||
'has_tracemalloc': HAS_TRACEMALLOC
|
||||
}
|
||||
|
||||
# Convenience function for quick profiling
|
||||
def profile_function(func: Callable, *args, name: Optional[str] = None,
|
||||
show_details: bool = False, **kwargs) -> Dict[str, Any]:
|
||||
"""
|
||||
Quick profiling of a single function.
|
||||
|
||||
Args:
|
||||
func: Function to profile
|
||||
*args: Arguments to pass to function
|
||||
name: Optional name for the function
|
||||
show_details: Whether to print detailed metrics
|
||||
**kwargs: Keyword arguments to pass to function
|
||||
|
||||
Returns:
|
||||
Dictionary with profiling results
|
||||
|
||||
Example:
|
||||
result = profile_function(my_matmul, A, B, name="Custom MatMul", show_details=True)
|
||||
print(f"Execution time: {result['wall_time']:.4f}s")
|
||||
"""
|
||||
profiler = SimpleProfiler(track_memory=True, track_cpu=True)
|
||||
result = profiler.profile(func, *args, name=name, **kwargs)
|
||||
|
||||
if show_details:
|
||||
profiler.print_result(result, show_details=True)
|
||||
|
||||
return result
|
||||
Reference in New Issue
Block a user