mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-03-11 22:25:29 -05:00
docs: add comprehensive docstrings to optimization modules 16-19
- Add Args/Returns/Example/Hints to key functions - Improve documentation for compare_model_sizes (16) - Enhance function documentation in compression (17) - Add docstring details for acceleration (18) - Improve benchmarking function docs (19)
This commit is contained in:
@@ -1231,6 +1231,25 @@ def compare_model_sizes(original_model, quantized_model) -> Dict[str, float]:
|
||||
2. Calculate bytes used (FP32 vs INT8)
|
||||
3. Include quantization overhead
|
||||
4. Return comparison metrics
|
||||
|
||||
Args:
|
||||
original_model: Model before quantization
|
||||
quantized_model: Model after quantization
|
||||
|
||||
Returns:
|
||||
Dictionary with 'original_mb', 'quantized_mb', 'reduction_ratio', 'memory_saved_mb'
|
||||
|
||||
EXAMPLE:
|
||||
>>> model = Sequential(Linear(100, 50), Linear(50, 10))
|
||||
>>> quantize_model(model)
|
||||
>>> stats = compare_model_sizes(model, model) # Same model after in-place quantization
|
||||
>>> print(f"Reduced to {stats['reduction_ratio']:.1f}x smaller")
|
||||
Reduced to 4.0x smaller
|
||||
|
||||
HINTS:
|
||||
- FP32 uses 4 bytes per parameter, INT8 uses 1 byte
|
||||
- Include scale/zero_point overhead (2 values per quantized layer)
|
||||
- Expected ratio: ~4x for INT8 quantization
|
||||
"""
|
||||
### BEGIN SOLUTION
|
||||
# Count original model parameters
|
||||
|
||||
@@ -331,6 +331,12 @@ def measure_sparsity(model) -> float:
|
||||
3. Count total parameters
|
||||
4. Return percentage: zeros / total * 100
|
||||
|
||||
Args:
|
||||
model: Model with .parameters() method
|
||||
|
||||
Returns:
|
||||
Sparsity percentage (0.0-100.0)
|
||||
|
||||
EXAMPLE:
|
||||
>>> model = Sequential(Linear(10, 5), Linear(5, 2))
|
||||
>>> sparsity = measure_sparsity(model)
|
||||
|
||||
@@ -267,6 +267,13 @@ def vectorized_matmul(a: Tensor, b: Tensor) -> Tensor:
|
||||
2. Use NumPy's optimized dot product (calls BLAS GEMM)
|
||||
3. Return result wrapped in Tensor
|
||||
|
||||
Args:
|
||||
a: First tensor for multiplication (M×K or batch×M×K)
|
||||
b: Second tensor for multiplication (K×N or batch×K×N)
|
||||
|
||||
Returns:
|
||||
Result tensor of shape (M×N or batch×M×N)
|
||||
|
||||
EXAMPLE:
|
||||
Matrix multiplication visualization:
|
||||
>>> a = Tensor([[1, 2], [3, 4]]) # 2×2
|
||||
@@ -443,6 +450,12 @@ def fused_gelu(x: Tensor) -> Tensor:
|
||||
2. Avoid creating temporary arrays
|
||||
3. Let NumPy's broadcasting handle vectorization
|
||||
|
||||
Args:
|
||||
x: Input tensor to apply GELU activation
|
||||
|
||||
Returns:
|
||||
GELU-activated tensor (same shape as input)
|
||||
|
||||
EXAMPLE:
|
||||
>>> x = Tensor([-2, -1, 0, 1, 2])
|
||||
>>> result = fused_gelu(x)
|
||||
@@ -538,11 +551,27 @@ def unfused_gelu(x: Tensor) -> Tensor:
|
||||
2. Create temporary Tensor objects for each step
|
||||
3. This simulates real memory allocation overhead
|
||||
|
||||
Args:
|
||||
x: Input tensor
|
||||
|
||||
Returns:
|
||||
GELU-activated tensor (same shape as input)
|
||||
|
||||
EXAMPLE:
|
||||
>>> x = Tensor([0.5, 1.0, -0.5])
|
||||
>>> result = unfused_gelu(x)
|
||||
>>> print(result.shape)
|
||||
(3,) # Same as input
|
||||
|
||||
PERFORMANCE IMPACT:
|
||||
- Creates 7 temporary arrays
|
||||
- Each array allocation/deallocation has overhead
|
||||
- More memory bandwidth usage
|
||||
- Potential cache misses between operations
|
||||
|
||||
HINTS:
|
||||
- Create each step as: temp = Tensor(operation)
|
||||
- This forces memory allocation for educational comparison
|
||||
"""
|
||||
### BEGIN SOLUTION
|
||||
# Unfused version - creates many intermediate arrays
|
||||
|
||||
@@ -402,6 +402,9 @@ def precise_timer():
|
||||
3. Return elapsed time when context exits
|
||||
4. Provide warmup capability for JIT compilation
|
||||
|
||||
Yields:
|
||||
Timer object with .elapsed attribute (set after context exits)
|
||||
|
||||
EXAMPLE:
|
||||
>>> with precise_timer() as timer:
|
||||
... time.sleep(0.1) # Some operation
|
||||
@@ -1721,6 +1724,14 @@ def compare_optimization_techniques(base_model: Any, optimized_models: List[Any]
|
||||
3. Generate insights about which optimizations work best
|
||||
4. Create recommendation matrix for different use cases
|
||||
|
||||
Args:
|
||||
base_model: Baseline model (unoptimized)
|
||||
optimized_models: List of models with different optimizations applied
|
||||
datasets: List of datasets for evaluation
|
||||
|
||||
Returns:
|
||||
Dictionary with 'base_metrics', 'optimized_results', 'improvements', 'recommendations'
|
||||
|
||||
EXAMPLE:
|
||||
>>> models = [base_model, quantized_model, pruned_model, distilled_model]
|
||||
>>> results = compare_optimization_techniques(base_model, models[1:], datasets)
|
||||
|
||||
Reference in New Issue
Block a user