docs: add comprehensive docstrings to optimization modules 16-19

- Add Args/Returns/Example/Hints to key functions - Improve documentation for compare_model_sizes (16) - Enhance function documentation in compression (17) - Add docstring details for acceleration (18) - Improve benchmarking function docs (19)
2026-03-11 22:25:29 -05:00 · 2025-11-09 14:38:44 -05:00
parent a6e57ff379
commit 1c299cddb0
4 changed files with 65 additions and 0 deletions
--- a/modules/source/16_quantization/quantization_dev.py
+++ b/modules/source/16_quantization/quantization_dev.py
@@ -1231,6 +1231,25 @@ def compare_model_sizes(original_model, quantized_model) -> Dict[str, float]:
    2. Calculate bytes used (FP32 vs INT8)
    3. Include quantization overhead
    4. Return comparison metrics
+
+    Args:
+        original_model: Model before quantization
+        quantized_model: Model after quantization
+
+    Returns:
+        Dictionary with 'original_mb', 'quantized_mb', 'reduction_ratio', 'memory_saved_mb'
+
+    EXAMPLE:
+    >>> model = Sequential(Linear(100, 50), Linear(50, 10))
+    >>> quantize_model(model)
+    >>> stats = compare_model_sizes(model, model)  # Same model after in-place quantization
+    >>> print(f"Reduced to {stats['reduction_ratio']:.1f}x smaller")
+    Reduced to 4.0x smaller
+
+    HINTS:
+    - FP32 uses 4 bytes per parameter, INT8 uses 1 byte
+    - Include scale/zero_point overhead (2 values per quantized layer)
+    - Expected ratio: ~4x for INT8 quantization
    """
    ### BEGIN SOLUTION
    # Count original model parameters
--- a/modules/source/17_compression/compression_dev.py
+++ b/modules/source/17_compression/compression_dev.py
@@ -331,6 +331,12 @@ def measure_sparsity(model) -> float:
    3. Count total parameters
    4. Return percentage: zeros / total * 100

+    Args:
+        model: Model with .parameters() method
+
+    Returns:
+        Sparsity percentage (0.0-100.0)
+
    EXAMPLE:
    >>> model = Sequential(Linear(10, 5), Linear(5, 2))
    >>> sparsity = measure_sparsity(model)
--- a/modules/source/18_acceleration/acceleration_dev.py
+++ b/modules/source/18_acceleration/acceleration_dev.py
@@ -267,6 +267,13 @@ def vectorized_matmul(a: Tensor, b: Tensor) -> Tensor:
    2. Use NumPy's optimized dot product (calls BLAS GEMM)
    3. Return result wrapped in Tensor

+    Args:
+        a: First tensor for multiplication (M×K or batch×M×K)
+        b: Second tensor for multiplication (K×N or batch×K×N)
+
+    Returns:
+        Result tensor of shape (M×N or batch×M×N)
+
    EXAMPLE:
    Matrix multiplication visualization:
    >>> a = Tensor([[1, 2], [3, 4]])  # 2×2
@@ -443,6 +450,12 @@ def fused_gelu(x: Tensor) -> Tensor:
    2. Avoid creating temporary arrays
    3. Let NumPy's broadcasting handle vectorization

+    Args:
+        x: Input tensor to apply GELU activation
+
+    Returns:
+        GELU-activated tensor (same shape as input)
+
    EXAMPLE:
    >>> x = Tensor([-2, -1, 0, 1, 2])
    >>> result = fused_gelu(x)
@@ -538,11 +551,27 @@ def unfused_gelu(x: Tensor) -> Tensor:
    2. Create temporary Tensor objects for each step
    3. This simulates real memory allocation overhead

+    Args:
+        x: Input tensor
+
+    Returns:
+        GELU-activated tensor (same shape as input)
+
+    EXAMPLE:
+    >>> x = Tensor([0.5, 1.0, -0.5])
+    >>> result = unfused_gelu(x)
+    >>> print(result.shape)
+    (3,)  # Same as input
+
    PERFORMANCE IMPACT:
    - Creates 7 temporary arrays
    - Each array allocation/deallocation has overhead
    - More memory bandwidth usage
    - Potential cache misses between operations
+
+    HINTS:
+    - Create each step as: temp = Tensor(operation)
+    - This forces memory allocation for educational comparison
    """
    ### BEGIN SOLUTION
    # Unfused version - creates many intermediate arrays
--- a/modules/source/19_benchmarking/benchmarking_dev.py
+++ b/modules/source/19_benchmarking/benchmarking_dev.py
@@ -402,6 +402,9 @@ def precise_timer():
    3. Return elapsed time when context exits
    4. Provide warmup capability for JIT compilation

+    Yields:
+        Timer object with .elapsed attribute (set after context exits)
+
    EXAMPLE:
    >>> with precise_timer() as timer:
    ...     time.sleep(0.1)  # Some operation
@@ -1721,6 +1724,14 @@ def compare_optimization_techniques(base_model: Any, optimized_models: List[Any]
    3. Generate insights about which optimizations work best
    4. Create recommendation matrix for different use cases

+    Args:
+        base_model: Baseline model (unoptimized)
+        optimized_models: List of models with different optimizations applied
+        datasets: List of datasets for evaluation
+
+    Returns:
+        Dictionary with 'base_metrics', 'optimized_results', 'improvements', 'recommendations'
+
    EXAMPLE:
    >>> models = [base_model, quantized_model, pruned_model, distilled_model]
    >>> results = compare_optimization_techniques(base_model, models[1:], datasets)