mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-05-05 21:07:32 -05:00
- Fixed FLOPs calculation to handle models with .layers attribute (not just Sequential) - Fixed quantization compression ratio to calculate theoretical INT8 size (1 byte per element) - Fixed pruning accuracy delta sign to correctly show +/- direction - Added missing export directives for Tensor and numpy imports in acceleration module Results now correctly show: - FLOPs: 4,736 (was incorrectly showing 64) - Quantization: 4.0x compression (was incorrectly showing 1.0x) - Pruning delta: correct +/- sign based on actual accuracy change
615 lines
24 KiB
Python
Generated
615 lines
24 KiB
Python
Generated
# ╔═══════════════════════════════════════════════════════════════════════════════╗
|
||
# ║ 🚨 CRITICAL WARNING 🚨 ║
|
||
# ║ AUTOGENERATED! DO NOT EDIT! ║
|
||
# ║ ║
|
||
# ║ This file is AUTOMATICALLY GENERATED from source modules. ║
|
||
# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║
|
||
# ║ ║
|
||
# ║ ✅ TO EDIT: src/XX_profiler/XX_profiler.py ║
|
||
# ║ ✅ TO EXPORT: Run 'tito module complete <module_name>' ║
|
||
# ║ ║
|
||
# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║
|
||
# ║ Editing it directly may break module functionality and training. ║
|
||
# ║ ║
|
||
# ║ 🎓 LEARNING TIP: Work in src/ (developers) or modules/ (learners) ║
|
||
# ║ The tinytorch/ directory is generated code - edit source files instead! ║
|
||
# ╚═══════════════════════════════════════════════════════════════════════════════╝
|
||
# %% auto 0
|
||
__all__ = ['BYTES_PER_FLOAT32', 'KB_TO_BYTES', 'MB_TO_BYTES', 'Profiler', 'quick_profile', 'analyze_weight_distribution']
|
||
|
||
# %% ../../modules/14_profiling/14_profiling.ipynb 1
|
||
import sys
|
||
import os
|
||
import time
|
||
import numpy as np
|
||
import tracemalloc
|
||
from typing import Dict, List, Any, Optional, Tuple
|
||
from collections import defaultdict
|
||
import gc
|
||
|
||
# Import from TinyTorch package (previous modules must be completed and exported)
|
||
from ..core.tensor import Tensor
|
||
from ..core.layers import Linear
|
||
from ..core.spatial import Conv2d
|
||
|
||
# Constants for memory and performance measurement
|
||
BYTES_PER_FLOAT32 = 4 # Standard float32 size in bytes
|
||
KB_TO_BYTES = 1024 # Kilobytes to bytes conversion
|
||
MB_TO_BYTES = 1024 * 1024 # Megabytes to bytes conversion
|
||
|
||
# %% ../../modules/14_profiling/14_profiling.ipynb 6
|
||
class Profiler:
|
||
"""
|
||
Professional-grade ML model profiler for performance analysis.
|
||
|
||
Measures parameters, FLOPs, memory usage, and latency with statistical rigor.
|
||
Used for optimization guidance and deployment planning.
|
||
"""
|
||
|
||
def __init__(self):
|
||
"""
|
||
Initialize profiler with measurement state.
|
||
|
||
TODO: Set up profiler tracking structures
|
||
|
||
APPROACH:
|
||
1. Create empty measurements dictionary
|
||
2. Initialize operation counters
|
||
3. Set up memory tracking state
|
||
|
||
EXAMPLE:
|
||
>>> profiler = Profiler()
|
||
>>> profiler.measurements
|
||
{}
|
||
|
||
HINTS:
|
||
- Use defaultdict(int) for operation counters
|
||
- measurements dict will store timing results
|
||
"""
|
||
### BEGIN SOLUTION
|
||
self.measurements = {}
|
||
self.operation_counts = defaultdict(int)
|
||
self.memory_tracker = None
|
||
### END SOLUTION
|
||
|
||
def count_parameters(self, model) -> int:
|
||
"""
|
||
Count total trainable parameters in a model.
|
||
|
||
TODO: Implement parameter counting for any model with parameters() method
|
||
|
||
APPROACH:
|
||
1. Get all parameters from model.parameters() if available
|
||
2. For single layers, count weight and bias directly
|
||
3. Sum total element count across all parameter tensors
|
||
|
||
EXAMPLE:
|
||
>>> linear = Linear(128, 64) # 128*64 + 64 = 8256 parameters
|
||
>>> profiler = Profiler()
|
||
>>> count = profiler.count_parameters(linear)
|
||
>>> print(count)
|
||
8256
|
||
|
||
HINTS:
|
||
- Use parameter.data.size for tensor element count
|
||
- Handle models with and without parameters() method
|
||
- Don't forget bias terms when present
|
||
"""
|
||
### BEGIN SOLUTION
|
||
total_params = 0
|
||
|
||
# Handle SimpleModel pattern (has .layers attribute)
|
||
if hasattr(model, 'layers'):
|
||
# SimpleModel: iterate through layers
|
||
for layer in model.layers:
|
||
for param in layer.parameters():
|
||
total_params += param.data.size
|
||
elif hasattr(model, 'parameters'):
|
||
# Model with direct parameters() method
|
||
for param in model.parameters():
|
||
total_params += param.data.size
|
||
elif hasattr(model, 'weight'):
|
||
# Single layer (Linear, Conv2d) - all have .weight
|
||
total_params += model.weight.data.size
|
||
# Check for bias (may be None)
|
||
if hasattr(model, 'bias') and model.bias is not None:
|
||
total_params += model.bias.data.size
|
||
else:
|
||
# No parameters (activations, etc.)
|
||
total_params = 0
|
||
|
||
return total_params
|
||
### END SOLUTION
|
||
|
||
def count_flops(self, model, input_shape: Tuple[int, ...]) -> int:
|
||
"""
|
||
Count FLOPs (Floating Point Operations) for one forward pass.
|
||
|
||
TODO: Implement FLOP counting for different layer types
|
||
|
||
APPROACH:
|
||
1. Create dummy input with given shape
|
||
2. Calculate FLOPs based on layer type and dimensions
|
||
3. Handle different model architectures (Linear, Conv2d, Sequential)
|
||
|
||
LAYER-SPECIFIC FLOP FORMULAS:
|
||
- Linear: input_features × output_features × 2 (matmul + bias)
|
||
- Conv2d: output_h × output_w × kernel_h × kernel_w × in_channels × out_channels × 2
|
||
- Activation: Usually 1 FLOP per element (ReLU, Sigmoid)
|
||
|
||
EXAMPLE:
|
||
>>> linear = Linear(128, 64)
|
||
>>> profiler = Profiler()
|
||
>>> flops = profiler.count_flops(linear, (1, 128))
|
||
>>> print(flops) # 128 * 64 * 2 = 16384
|
||
16384
|
||
|
||
HINTS:
|
||
- Batch dimension doesn't affect per-sample FLOPs
|
||
- Focus on major operations (matmul, conv) first
|
||
- For Sequential models, sum FLOPs of all layers
|
||
"""
|
||
### BEGIN SOLUTION
|
||
# Create dummy input (unused but kept for interface consistency)
|
||
_dummy_input = Tensor(np.random.randn(*input_shape))
|
||
total_flops = 0
|
||
|
||
# Handle different model types
|
||
if hasattr(model, '__class__'):
|
||
model_name = model.__class__.__name__
|
||
|
||
if model_name == 'Linear':
|
||
# Linear layer: input_features × output_features × 2
|
||
in_features = input_shape[-1]
|
||
out_features = model.weight.shape[1] if hasattr(model, 'weight') else 1
|
||
total_flops = in_features * out_features * 2
|
||
|
||
elif model_name == 'Conv2d':
|
||
# Conv2d layer: complex calculation based on output size
|
||
# Simplified: assume we know the output dimensions
|
||
if hasattr(model, 'kernel_size') and hasattr(model, 'in_channels'):
|
||
_batch_size = input_shape[0] if len(input_shape) > 3 else 1
|
||
in_channels = model.in_channels
|
||
out_channels = model.out_channels
|
||
kernel_h = kernel_w = model.kernel_size
|
||
|
||
# Estimate output size (simplified)
|
||
input_h, input_w = input_shape[-2], input_shape[-1]
|
||
output_h = input_h // (model.stride if hasattr(model, 'stride') else 1)
|
||
output_w = input_w // (model.stride if hasattr(model, 'stride') else 1)
|
||
|
||
total_flops = (output_h * output_w * kernel_h * kernel_w *
|
||
in_channels * out_channels * 2)
|
||
|
||
elif model_name == 'Sequential' or hasattr(model, 'layers'):
|
||
# Sequential model or model with layers: sum FLOPs of all layers
|
||
current_shape = input_shape
|
||
for layer in model.layers:
|
||
layer_flops = self.count_flops(layer, current_shape)
|
||
total_flops += layer_flops
|
||
# Update shape for next layer (simplified)
|
||
if hasattr(layer, 'weight'):
|
||
current_shape = current_shape[:-1] + (layer.weight.shape[1],)
|
||
|
||
else:
|
||
# Activation or other: assume 1 FLOP per element
|
||
total_flops = np.prod(input_shape)
|
||
|
||
return total_flops
|
||
### END SOLUTION
|
||
|
||
def measure_memory(self, model, input_shape: Tuple[int, ...]) -> Dict[str, float]:
|
||
"""
|
||
Measure memory usage during forward pass.
|
||
|
||
TODO: Implement memory tracking for model execution
|
||
|
||
APPROACH:
|
||
1. Use tracemalloc to track memory allocation
|
||
2. Measure baseline memory before model execution
|
||
3. Run forward pass and track peak usage
|
||
4. Calculate different memory components
|
||
|
||
RETURN DICTIONARY:
|
||
- 'parameter_memory_mb': Memory for model parameters
|
||
- 'activation_memory_mb': Memory for activations
|
||
- 'peak_memory_mb': Maximum memory usage
|
||
- 'memory_efficiency': Ratio of useful to total memory
|
||
|
||
EXAMPLE:
|
||
>>> linear = Linear(1024, 512)
|
||
>>> profiler = Profiler()
|
||
>>> memory = profiler.measure_memory(linear, (32, 1024))
|
||
>>> print(f"Parameters: {memory['parameter_memory_mb']:.1f} MB")
|
||
Parameters: 2.1 MB
|
||
|
||
HINTS:
|
||
- Use tracemalloc.start() and tracemalloc.get_traced_memory()
|
||
- Account for float32 = 4 bytes per parameter
|
||
- Activation memory scales with batch size
|
||
"""
|
||
### BEGIN SOLUTION
|
||
# Start memory tracking
|
||
tracemalloc.start()
|
||
|
||
# Measure baseline memory (unused but kept for completeness)
|
||
_baseline_memory = tracemalloc.get_traced_memory()[0]
|
||
|
||
# Calculate parameter memory
|
||
param_count = self.count_parameters(model)
|
||
parameter_memory_bytes = param_count * BYTES_PER_FLOAT32
|
||
parameter_memory_mb = parameter_memory_bytes / MB_TO_BYTES
|
||
|
||
# Create input and measure activation memory
|
||
dummy_input = Tensor(np.random.randn(*input_shape))
|
||
input_memory_bytes = dummy_input.data.nbytes
|
||
|
||
# Estimate activation memory (simplified)
|
||
activation_memory_bytes = input_memory_bytes * 2 # Rough estimate
|
||
activation_memory_mb = activation_memory_bytes / MB_TO_BYTES
|
||
|
||
# Run forward pass to measure peak memory usage
|
||
_ = model.forward(dummy_input)
|
||
|
||
# Get peak memory
|
||
_current_memory, peak_memory = tracemalloc.get_traced_memory()
|
||
peak_memory_mb = (peak_memory - _baseline_memory) / MB_TO_BYTES
|
||
|
||
tracemalloc.stop()
|
||
|
||
# Calculate efficiency
|
||
useful_memory = parameter_memory_mb + activation_memory_mb
|
||
memory_efficiency = useful_memory / max(peak_memory_mb, 0.001) # Avoid division by zero
|
||
|
||
return {
|
||
'parameter_memory_mb': parameter_memory_mb,
|
||
'activation_memory_mb': activation_memory_mb,
|
||
'peak_memory_mb': max(peak_memory_mb, useful_memory),
|
||
'memory_efficiency': min(memory_efficiency, 1.0)
|
||
}
|
||
### END SOLUTION
|
||
|
||
def measure_latency(self, model, input_tensor, warmup: int = 10, iterations: int = 100) -> float:
|
||
"""
|
||
Measure model inference latency with statistical rigor.
|
||
|
||
TODO: Implement accurate latency measurement
|
||
|
||
APPROACH:
|
||
1. Run warmup iterations to stabilize performance
|
||
2. Measure multiple iterations for statistical accuracy
|
||
3. Calculate median latency to handle outliers
|
||
4. Return latency in milliseconds
|
||
|
||
PARAMETERS:
|
||
- warmup: Number of warmup runs (default 10)
|
||
- iterations: Number of measurement runs (default 100)
|
||
|
||
EXAMPLE:
|
||
>>> linear = Linear(128, 64)
|
||
>>> input_tensor = Tensor(np.random.randn(1, 128))
|
||
>>> profiler = Profiler()
|
||
>>> latency = profiler.measure_latency(linear, input_tensor)
|
||
>>> print(f"Latency: {latency:.2f} ms")
|
||
Latency: 0.15 ms
|
||
|
||
HINTS:
|
||
- Use time.perf_counter() for high precision
|
||
- Use median instead of mean for robustness against outliers
|
||
- Handle different model interfaces (forward, __call__)
|
||
"""
|
||
### BEGIN SOLUTION
|
||
# Warmup runs to stabilize performance
|
||
for _ in range(warmup):
|
||
_ = model.forward(input_tensor)
|
||
|
||
# Measurement runs
|
||
times = []
|
||
for _ in range(iterations):
|
||
start_time = time.perf_counter()
|
||
_ = model.forward(input_tensor)
|
||
end_time = time.perf_counter()
|
||
times.append((end_time - start_time) * 1000) # Convert to milliseconds
|
||
|
||
# Calculate statistics - use median for robustness
|
||
times = np.array(times)
|
||
median_latency = np.median(times)
|
||
|
||
return float(median_latency)
|
||
### END SOLUTION
|
||
|
||
def profile_layer(self, layer, input_shape: Tuple[int, ...]) -> Dict[str, Any]:
|
||
"""
|
||
Profile a single layer comprehensively.
|
||
|
||
TODO: Implement layer-wise profiling
|
||
|
||
APPROACH:
|
||
1. Count parameters for this layer
|
||
2. Count FLOPs for this layer
|
||
3. Measure memory usage
|
||
4. Measure latency
|
||
5. Return comprehensive layer profile
|
||
|
||
EXAMPLE:
|
||
>>> linear = Linear(256, 128)
|
||
>>> profiler = Profiler()
|
||
>>> profile = profiler.profile_layer(linear, (32, 256))
|
||
>>> print(f"Layer uses {profile['parameters']} parameters")
|
||
Layer uses 32896 parameters
|
||
|
||
HINTS:
|
||
- Use existing profiler methods (count_parameters, count_flops, etc.)
|
||
- Create dummy input for latency measurement
|
||
- Include layer type information in profile
|
||
"""
|
||
### BEGIN SOLUTION
|
||
# Create dummy input for latency measurement
|
||
dummy_input = Tensor(np.random.randn(*input_shape))
|
||
|
||
# Gather all measurements
|
||
params = self.count_parameters(layer)
|
||
flops = self.count_flops(layer, input_shape)
|
||
memory = self.measure_memory(layer, input_shape)
|
||
latency = self.measure_latency(layer, dummy_input, warmup=3, iterations=10)
|
||
|
||
# Compute derived metrics
|
||
gflops_per_second = (flops / 1e9) / max(latency / 1000, 1e-6)
|
||
|
||
return {
|
||
'layer_type': layer.__class__.__name__,
|
||
'parameters': params,
|
||
'flops': flops,
|
||
'latency_ms': latency,
|
||
'gflops_per_second': gflops_per_second,
|
||
**memory
|
||
}
|
||
### END SOLUTION
|
||
|
||
def profile_forward_pass(self, model, input_tensor) -> Dict[str, Any]:
|
||
"""
|
||
Comprehensive profiling of a model's forward pass.
|
||
|
||
TODO: Implement complete forward pass analysis
|
||
|
||
APPROACH:
|
||
1. Use Profiler class to gather all measurements
|
||
2. Create comprehensive performance profile
|
||
3. Add derived metrics and insights
|
||
4. Return structured analysis results
|
||
|
||
RETURN METRICS:
|
||
- All basic profiler measurements
|
||
- FLOPs per second (computational efficiency)
|
||
- Memory bandwidth utilization
|
||
- Performance bottleneck identification
|
||
|
||
EXAMPLE:
|
||
>>> model = Linear(256, 128)
|
||
>>> input_data = Tensor(np.random.randn(32, 256))
|
||
>>> profiler = Profiler()
|
||
>>> profile = profiler.profile_forward_pass(model, input_data)
|
||
>>> print(f"Throughput: {profile['gflops_per_second']:.2f} GFLOP/s")
|
||
Throughput: 2.45 GFLOP/s
|
||
|
||
HINTS:
|
||
- GFLOP/s = (FLOPs / 1e9) / (latency_ms / 1000)
|
||
- Memory bandwidth = memory_mb / (latency_ms / 1000)
|
||
- Consider realistic hardware limits for efficiency calculations
|
||
"""
|
||
### BEGIN SOLUTION
|
||
# Basic measurements
|
||
param_count = self.count_parameters(model)
|
||
flops = self.count_flops(model, input_tensor.shape)
|
||
memory_stats = self.measure_memory(model, input_tensor.shape)
|
||
latency_ms = self.measure_latency(model, input_tensor, warmup=5, iterations=20)
|
||
|
||
# Derived metrics
|
||
latency_seconds = latency_ms / 1000.0
|
||
gflops_per_second = (flops / 1e9) / max(latency_seconds, 1e-6)
|
||
|
||
# Memory bandwidth (MB/s)
|
||
memory_bandwidth = memory_stats['peak_memory_mb'] / max(latency_seconds, 1e-6)
|
||
|
||
# Efficiency metrics
|
||
theoretical_peak_gflops = 100.0 # Assume 100 GFLOP/s theoretical peak for CPU
|
||
computational_efficiency = min(gflops_per_second / theoretical_peak_gflops, 1.0)
|
||
|
||
# Bottleneck analysis
|
||
is_memory_bound = memory_bandwidth > gflops_per_second * 100 # Rough heuristic
|
||
is_compute_bound = not is_memory_bound
|
||
|
||
return {
|
||
# Basic measurements
|
||
'parameters': param_count,
|
||
'flops': flops,
|
||
'latency_ms': latency_ms,
|
||
**memory_stats,
|
||
|
||
# Derived metrics
|
||
'gflops_per_second': gflops_per_second,
|
||
'memory_bandwidth_mbs': memory_bandwidth,
|
||
'computational_efficiency': computational_efficiency,
|
||
|
||
# Bottleneck analysis
|
||
'is_memory_bound': is_memory_bound,
|
||
'is_compute_bound': is_compute_bound,
|
||
'bottleneck': 'memory' if is_memory_bound else 'compute'
|
||
}
|
||
### END SOLUTION
|
||
|
||
def profile_backward_pass(self, model, input_tensor, _loss_fn=None) -> Dict[str, Any]:
|
||
"""
|
||
Profile both forward and backward passes for training analysis.
|
||
|
||
TODO: Implement training-focused profiling
|
||
|
||
APPROACH:
|
||
1. Profile forward pass first
|
||
2. Estimate backward pass costs (typically 2× forward)
|
||
3. Calculate total training iteration metrics
|
||
4. Analyze memory requirements for gradients and optimizers
|
||
|
||
BACKWARD PASS ESTIMATES:
|
||
- FLOPs: ~2× forward pass (gradient computation)
|
||
- Memory: +1× parameters (gradient storage)
|
||
- Latency: ~2× forward pass (more complex operations)
|
||
|
||
EXAMPLE:
|
||
>>> model = Linear(128, 64)
|
||
>>> input_data = Tensor(np.random.randn(16, 128))
|
||
>>> profiler = Profiler()
|
||
>>> profile = profiler.profile_backward_pass(model, input_data)
|
||
>>> print(f"Training iteration: {profile['total_latency_ms']:.2f} ms")
|
||
Training iteration: 0.45 ms
|
||
|
||
HINTS:
|
||
- Total memory = parameters + activations + gradients
|
||
- Optimizer memory depends on algorithm (SGD: 0×, Adam: 2×)
|
||
- Consider gradient accumulation effects
|
||
"""
|
||
### BEGIN SOLUTION
|
||
# Get forward pass profile
|
||
forward_profile = self.profile_forward_pass(model, input_tensor)
|
||
|
||
# Estimate backward pass (typically 2× forward)
|
||
backward_flops = forward_profile['flops'] * 2
|
||
backward_latency_ms = forward_profile['latency_ms'] * 2
|
||
|
||
# Gradient memory (equal to parameter memory)
|
||
gradient_memory_mb = forward_profile['parameter_memory_mb']
|
||
|
||
# Total training iteration
|
||
total_flops = forward_profile['flops'] + backward_flops
|
||
total_latency_ms = forward_profile['latency_ms'] + backward_latency_ms
|
||
total_memory_mb = (forward_profile['parameter_memory_mb'] +
|
||
forward_profile['activation_memory_mb'] +
|
||
gradient_memory_mb)
|
||
|
||
# Training efficiency
|
||
total_gflops_per_second = (total_flops / 1e9) / (total_latency_ms / 1000.0)
|
||
|
||
# Optimizer memory estimates
|
||
optimizer_memory_estimates = {
|
||
'sgd': 0, # No extra memory
|
||
'adam': gradient_memory_mb * 2, # Momentum + velocity
|
||
'adamw': gradient_memory_mb * 2, # Same as Adam
|
||
}
|
||
|
||
return {
|
||
# Forward pass
|
||
'forward_flops': forward_profile['flops'],
|
||
'forward_latency_ms': forward_profile['latency_ms'],
|
||
'forward_memory_mb': forward_profile['peak_memory_mb'],
|
||
|
||
# Backward pass estimates
|
||
'backward_flops': backward_flops,
|
||
'backward_latency_ms': backward_latency_ms,
|
||
'gradient_memory_mb': gradient_memory_mb,
|
||
|
||
# Total training iteration
|
||
'total_flops': total_flops,
|
||
'total_latency_ms': total_latency_ms,
|
||
'total_memory_mb': total_memory_mb,
|
||
'total_gflops_per_second': total_gflops_per_second,
|
||
|
||
# Optimizer memory requirements
|
||
'optimizer_memory_estimates': optimizer_memory_estimates,
|
||
|
||
# Training insights
|
||
'memory_efficiency': forward_profile['memory_efficiency'],
|
||
'bottleneck': forward_profile['bottleneck']
|
||
}
|
||
### END SOLUTION
|
||
|
||
# %% ../../modules/14_profiling/14_profiling.ipynb 8
|
||
def quick_profile(model, input_tensor, profiler=None):
|
||
"""
|
||
Quick profiling function for immediate insights.
|
||
|
||
Provides a simplified interface for profiling that displays key metrics
|
||
in a student-friendly format.
|
||
|
||
Args:
|
||
model: Model to profile
|
||
input_tensor: Input data for profiling
|
||
profiler: Optional Profiler instance (creates new one if None)
|
||
|
||
Returns:
|
||
dict: Profile results with key metrics
|
||
|
||
Example:
|
||
>>> model = Linear(128, 64)
|
||
>>> input_data = Tensor(np.random.randn(16, 128))
|
||
>>> results = quick_profile(model, input_data)
|
||
>>> # Displays formatted output automatically
|
||
"""
|
||
if profiler is None:
|
||
profiler = Profiler()
|
||
|
||
profile = profiler.profile_forward_pass(model, input_tensor)
|
||
|
||
# Display formatted results
|
||
print("🔬 Quick Profile Results:")
|
||
print(f" Parameters: {profile['parameters']:,}")
|
||
print(f" FLOPs: {profile['flops']:,}")
|
||
print(f" Latency: {profile['latency_ms']:.2f} ms")
|
||
print(f" Memory: {profile['peak_memory_mb']:.2f} MB")
|
||
print(f" Bottleneck: {profile['bottleneck']}")
|
||
print(f" Efficiency: {profile['computational_efficiency']*100:.1f}%")
|
||
|
||
return profile
|
||
|
||
# %% ../../modules/14_profiling/14_profiling.ipynb 9
|
||
def analyze_weight_distribution(model, percentiles=[10, 25, 50, 75, 90]):
|
||
"""
|
||
Analyze weight distribution for compression insights.
|
||
|
||
Helps understand which weights are small and might be prunable.
|
||
Used by Module 17 (Compression) to motivate pruning.
|
||
|
||
Args:
|
||
model: Model to analyze
|
||
percentiles: List of percentiles to compute
|
||
|
||
Returns:
|
||
dict: Weight distribution statistics
|
||
|
||
Example:
|
||
>>> model = Linear(512, 512)
|
||
>>> stats = analyze_weight_distribution(model)
|
||
>>> print(f"Weights < 0.01: {stats['below_threshold_001']:.1f}%")
|
||
"""
|
||
# Collect all weights
|
||
weights = []
|
||
if hasattr(model, 'parameters'):
|
||
for param in model.parameters():
|
||
weights.extend(param.data.flatten().tolist())
|
||
elif hasattr(model, 'weight'):
|
||
weights.extend(model.weight.data.flatten().tolist())
|
||
else:
|
||
return {'error': 'No weights found'}
|
||
|
||
weights = np.array(weights)
|
||
abs_weights = np.abs(weights)
|
||
|
||
# Calculate statistics
|
||
stats = {
|
||
'total_weights': len(weights),
|
||
'mean': float(np.mean(abs_weights)),
|
||
'std': float(np.std(abs_weights)),
|
||
'min': float(np.min(abs_weights)),
|
||
'max': float(np.max(abs_weights)),
|
||
}
|
||
|
||
# Percentile analysis
|
||
for p in percentiles:
|
||
stats[f'percentile_{p}'] = float(np.percentile(abs_weights, p))
|
||
|
||
# Threshold analysis (useful for pruning)
|
||
for threshold in [0.001, 0.01, 0.1]:
|
||
below = np.sum(abs_weights < threshold) / len(weights) * 100
|
||
stats[f'below_threshold_{str(threshold).replace(".", "")}'] = below
|
||
|
||
return stats
|