mirror of
https://github.com/harvard-edge/cs249r_book.git
synced 2026-05-22 14:03:46 -05:00
fix(module16): correct sparsity percentage bugs in compression module
- Fix incorrect percentile claim in pruning ASCII diagram (rewrote with 20 values and correct 50th percentile threshold) - Fix 7000% sparsity display in demo_compression_with_profiler where measure_sparsity() returns percentage (0-100) but code treated it as fraction (0-1), causing double multiplication Closes harvard-edge/cs249r_book#1168
This commit is contained in:
@@ -465,38 +465,39 @@ Think of magnitude pruning like editing a document - you remove words that don't
|
||||
```
|
||||
Magnitude Pruning Process:
|
||||
|
||||
Step 1: Collect All Weights
|
||||
Step 1: Collect All Weights (20 total across 2 layers)
|
||||
┌──────────────────────────────────────────────────┐
|
||||
│ Layer 1: [2.1, 0.1, -1.8, 0.05, 3.2, -0.02] │
|
||||
│ Layer 2: [1.5, -0.03, 2.8, 0.08, -2.1, 0.01] │
|
||||
│ Layer 3: [0.7, 2.4, -0.06, 1.9, 0.04, -1.3] │
|
||||
│ Layer 1: [2.1, 0.08, -1.8, 0.04, 3.2, │
|
||||
│ -0.02, 1.5, -0.03, 2.8, 0.06] │
|
||||
│ Layer 2: [0.7, 2.4, -0.05, 1.9, 0.01, │
|
||||
│ -1.3, 0.03, 2.1, -0.07, 0.09] │
|
||||
└──────────────────────────────────────────────────┘
|
||||
↓
|
||||
Step 2: Calculate Magnitudes
|
||||
┌──────────────────────────────────────────────────┐
|
||||
│ Magnitudes: [2.1, 0.1, 1.8, 0.05, 3.2, 0.02, │
|
||||
│ 1.5, 0.03, 2.8, 0.08, 2.1, 0.01, │
|
||||
│ 0.7, 2.4, 0.06, 1.9, 0.04, 1.3] │
|
||||
│ Sorted: [0.01, 0.02, 0.03, 0.03, 0.04, 0.05, │
|
||||
│ 0.06, 0.07, 0.08, 0.09, 0.7, 1.3, │
|
||||
│ 1.5, 1.8, 1.9, 2.1, 2.1, 2.4, 2.8, 3.2] │
|
||||
└──────────────────────────────────────────────────┘
|
||||
↓
|
||||
Step 3: Find Threshold (e.g., 70th percentile)
|
||||
Step 3: Find Threshold (e.g., 50th percentile)
|
||||
┌──────────────────────────────────────────────────┐
|
||||
│ Sorted: [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, │
|
||||
│ 0.08, 0.1, 0.7, 1.3, 1.5, 1.8, │ Threshold: 0.1
|
||||
│ 1.9, 2.1, 2.1, 2.4, 2.8, 3.2] │ (70% of weights removed)
|
||||
│ 20 values → 50th pctile between 10th and 11th │ Threshold ≈ 0.4
|
||||
│ Values ≤ 0.4: ten small weights get zeroed │ (50% of weights removed)
|
||||
└──────────────────────────────────────────────────┘
|
||||
↓
|
||||
Step 4: Apply Pruning Mask
|
||||
┌──────────────────────────────────────────────────┐
|
||||
│ Layer 1: [2.1, 0.0, -1.8, 0.0, 3.2, 0.0] │
|
||||
│ Layer 2: [1.5, 0.0, 2.8, 0.0, -2.1, 0.0] │ 70% weights → 0
|
||||
│ Layer 3: [0.7, 2.4, 0.0, 1.9, 0.0, -1.3] │ 30% preserved
|
||||
│ Layer 1: [2.1, 0.0, -1.8, 0.0, 3.2, │
|
||||
│ 0.0, 1.5, 0.0, 2.8, 0.0] │ 50% weights → 0
|
||||
│ Layer 2: [0.7, 2.4, 0.0, 1.9, 0.0, │ 50% preserved
|
||||
│ -1.3, 0.0, 2.1, 0.0, 0.0] │
|
||||
└──────────────────────────────────────────────────┘
|
||||
|
||||
Memory Impact:
|
||||
- Dense storage: 18 values
|
||||
- Sparse storage: 6 values + 6 indices = 12 values (33% savings)
|
||||
- Theoretical limit: 70% savings with perfect sparse format
|
||||
- Dense storage: 20 values × 4 bytes = 80 bytes
|
||||
- Sparse storage: 10 values + 10 indices = 80 bytes (no savings!)
|
||||
- At 90% sparsity: 2 values + 2 indices = 16 bytes (80% savings)
|
||||
```
|
||||
|
||||
### Why Global Thresholding Works
|
||||
@@ -1447,9 +1448,9 @@ def demo_compression_with_profiler():
|
||||
memory_before = profiler.measure_memory(model, input_shape)
|
||||
|
||||
print(f" Parameters: {param_count_before:,}")
|
||||
print(f" Sparsity: {sparsity_before*100:.1f}% (zeros)")
|
||||
print(f" Sparsity: {sparsity_before:.1f}% (zeros)")
|
||||
print(f" Memory: {memory_before['parameter_memory_mb']:.2f} MB")
|
||||
print(f" Active parameters: {int(param_count_before * (1 - sparsity_before)):,}")
|
||||
print(f" Active parameters: {int(param_count_before * (1 - sparsity_before / 100)):,}")
|
||||
|
||||
# Apply magnitude pruning
|
||||
target_sparsity = 0.7 # Remove 70% of parameters
|
||||
@@ -1466,19 +1467,19 @@ def demo_compression_with_profiler():
|
||||
memory_after = profiler.measure_memory(pruned_model, input_shape)
|
||||
|
||||
print(f" Parameters: {param_count_after:,} (same, but many are zero)")
|
||||
print(f" Sparsity: {sparsity_after*100:.1f}% (zeros)")
|
||||
print(f" Sparsity: {sparsity_after:.1f}% (zeros)")
|
||||
print(f" Memory: {memory_after['parameter_memory_mb']:.2f} MB (same storage)")
|
||||
print(f" Active parameters: {int(param_count_after * (1 - sparsity_after)):,}")
|
||||
print(f" Active parameters: {int(param_count_after * (1 - sparsity_after / 100)):,}")
|
||||
|
||||
print("\n📈 COMPRESSION RESULTS")
|
||||
print("=" * 70)
|
||||
sparsity_gain = (sparsity_after - sparsity_before) * 100
|
||||
active_before = int(param_count_before * (1 - sparsity_before))
|
||||
active_after = int(param_count_after * (1 - sparsity_after))
|
||||
sparsity_gain = sparsity_after - sparsity_before
|
||||
active_before = int(param_count_before * (1 - sparsity_before / 100))
|
||||
active_after = int(param_count_after * (1 - sparsity_after / 100))
|
||||
reduction_ratio = active_before / active_after if active_after > 0 else 1
|
||||
params_removed = active_before - active_after
|
||||
|
||||
print(f" Sparsity increased: {sparsity_before*100:.1f}% → {sparsity_after*100:.1f}%")
|
||||
print(f" Sparsity increased: {sparsity_before:.1f}% → {sparsity_after:.1f}%")
|
||||
print(f" Active params reduced: {active_before:,} → {active_after:,}")
|
||||
print(f" Parameters removed: {params_removed:,} ({sparsity_gain:.1f}% of total)")
|
||||
print(f" Compression ratio: {reduction_ratio:.1f}x fewer active parameters")
|
||||
|
||||
Reference in New Issue
Block a user