Files
TinyTorch/tinytorch/core/compression.py
Vijay Janapa Reddi f9309e8b9d 🔧 Complete module restructuring and integration fixes
📦 Module File Organization:
- Renamed networks_dev.py → dense_dev.py in 05_dense module
- Renamed cnn_dev.py → spatial_dev.py in 06_spatial module
- Added new 07_attention module with attention_dev.py
- Updated module.yaml files to reference correct filenames
- Updated #| default_exp directives for proper package exports

🔄 Core Package Updates:
- Added tinytorch.core.dense (Sequential, MLP architectures)
- Added tinytorch.core.spatial (Conv2D, pooling operations)
- Added tinytorch.core.attention (self-attention mechanisms)
- Updated all core modules with latest implementations
- Fixed tensor assignment issues in compression module

🧪 Test Integration Fixes:
- Updated integration tests to use correct module imports
- Fixed tensor activation tests for new module structure
- Ensured compatibility with renamed components
- Maintained 100% individual module test success rate

Result: Complete 14-module TinyTorch framework with proper organization,
working integrations, and comprehensive test coverage ready for production use.
2025-07-18 02:10:49 -04:00

875 lines
33 KiB
Python

# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/12_compression/compression_dev.ipynb.
# %% auto 0
__all__ = ['setup_import_paths', 'CompressionMetrics', 'prune_weights_by_magnitude', 'calculate_sparsity',
'quantize_layer_weights', 'DistillationLoss', 'compute_neuron_importance', 'prune_layer_neurons',
'compare_compression_techniques']
# %% ../../modules/source/12_compression/compression_dev.ipynb 1
import numpy as np
import sys
import os
import math
from typing import List, Dict, Any, Optional, Union, Tuple
from collections import defaultdict
# Helper function to set up import paths
def setup_import_paths():
"""Set up import paths for development modules."""
import sys
import os
# Add module directories to path
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
module_dirs = [
'01_tensor', '02_activations', '03_layers', '04_networks',
'05_cnn', '06_dataloader', '07_autograd', '08_optimizers', '09_training'
]
for module_dir in module_dirs:
sys.path.append(os.path.join(base_dir, module_dir))
# Set up paths
setup_import_paths()
# Import all the building blocks we need
try:
from tinytorch.core.tensor import Tensor
from tinytorch.core.layers import Dense
from tinytorch.core.networks import Sequential
from tinytorch.core.training import CrossEntropyLoss, Trainer
except ImportError:
# For development, create mock classes or import from local modules
try:
from tensor_dev import Tensor
from layers_dev import Dense
from networks_dev import Sequential
from training_dev import CrossEntropyLoss, Trainer
except ImportError:
# Create minimal mock classes for development
class Tensor:
def __init__(self, data):
self.data = np.array(data)
self.shape = self.data.shape
def __str__(self):
return f"Tensor({self.data})"
class Dense:
def __init__(self, input_size, output_size):
self.input_size = input_size
self.output_size = output_size
self.weights = Tensor(np.random.randn(input_size, output_size) * 0.1)
self.bias = Tensor(np.zeros(output_size))
def __str__(self):
return f"Dense({self.input_size}, {self.output_size})"
class Sequential:
def __init__(self, layers=None):
self.layers = layers or []
class CrossEntropyLoss:
def __init__(self):
pass
class Trainer:
def __init__(self, model, optimizer, loss_function):
self.model = model
self.optimizer = optimizer
self.loss_function = loss_function
# %% ../../modules/source/12_compression/compression_dev.ipynb 6
class CompressionMetrics:
"""
Utilities for measuring model size, sparsity, and compression efficiency.
This class provides tools to analyze neural network models and understand
their memory footprint, parameter distribution, and compression potential.
"""
def __init__(self):
"""Initialize compression metrics analyzer."""
pass
def count_parameters(self, model: Sequential) -> Dict[str, int]:
"""
Count parameters in a neural network model.
Args:
model: Sequential model to analyze
Returns:
Dictionary with parameter counts per layer and total
TODO: Implement parameter counting for neural network analysis.
STEP-BY-STEP IMPLEMENTATION:
1. Initialize counters for different parameter types
2. Iterate through each layer in the model
3. Count weights and biases for each layer
4. Calculate total parameters across all layers
5. Return detailed breakdown dictionary
EXAMPLE OUTPUT:
{
'layer_0_weights': 100352,
'layer_0_bias': 128,
'layer_1_weights': 8192,
'layer_1_bias': 64,
'layer_2_weights': 640,
'layer_2_bias': 10,
'total_parameters': 109386,
'total_weights': 109184,
'total_bias': 202
}
IMPLEMENTATION HINTS:
- Use hasattr() to check if layer has weights/bias attributes
- Weight matrices have shape (input_size, output_size)
- Bias vectors have shape (output_size,)
- Use np.prod() to calculate total elements from shape
- Track layer index for detailed reporting
LEARNING CONNECTIONS:
- This is like `model.numel()` in PyTorch
- Understanding where parameters are concentrated
- Foundation for compression target selection
"""
### BEGIN SOLUTION
param_counts = {}
total_params = 0
total_weights = 0
total_bias = 0
for i, layer in enumerate(model.layers):
# Count weights if layer has them
if hasattr(layer, 'weights') and layer.weights is not None:
# Handle different weight formats
if hasattr(layer.weights, 'shape'):
weight_count = np.prod(layer.weights.shape)
else:
weight_count = np.prod(layer.weights.data.shape)
param_counts[f'layer_{i}_weights'] = weight_count
total_weights += weight_count
total_params += weight_count
# Count bias if layer has them
if hasattr(layer, 'bias') and layer.bias is not None:
# Handle different bias formats
if hasattr(layer.bias, 'shape'):
bias_count = np.prod(layer.bias.shape)
else:
bias_count = np.prod(layer.bias.data.shape)
param_counts[f'layer_{i}_bias'] = bias_count
total_bias += bias_count
total_params += bias_count
# Add summary statistics
param_counts['total_parameters'] = total_params
param_counts['total_weights'] = total_weights
param_counts['total_bias'] = total_bias
return param_counts
### END SOLUTION
def calculate_model_size(self, model: Sequential, dtype: str = 'float32') -> Dict[str, Any]:
"""
Calculate memory footprint of a neural network model.
Args:
model: Sequential model to analyze
dtype: Data type for size calculation ('float32', 'float16', 'int8')
Returns:
Dictionary with size information in different units
"""
# Get parameter count
param_info = self.count_parameters(model)
total_params = param_info['total_parameters']
# Determine bytes per parameter
bytes_per_param = {
'float32': 4,
'float16': 2,
'int8': 1
}.get(dtype, 4)
# Calculate sizes
total_bytes = total_params * bytes_per_param
size_kb = total_bytes / 1024
size_mb = size_kb / 1024
return {
'total_parameters': total_params,
'bytes_per_parameter': bytes_per_param,
'total_bytes': total_bytes,
'size_kb': round(size_kb, 2),
'size_mb': round(size_mb, 2),
'dtype': dtype
}
# %% ../../modules/source/12_compression/compression_dev.ipynb 9
def prune_weights_by_magnitude(layer: Dense, pruning_ratio: float = 0.5) -> Tuple[Dense, Dict[str, Any]]:
"""
Prune weights in a Dense layer by magnitude.
Args:
layer: Dense layer to prune
pruning_ratio: Fraction of weights to remove (0.0 to 1.0)
Returns:
Tuple of (pruned_layer, pruning_info)
TODO: Implement magnitude-based weight pruning.
STEP-BY-STEP IMPLEMENTATION:
1. Get weight matrix from layer
2. Calculate absolute values (magnitudes)
3. Find threshold using percentile
4. Create binary mask for weights above threshold
5. Apply mask to weights (set small weights to zero)
6. Update layer weights and return pruning statistics
EXAMPLE USAGE:
```python
layer = Dense(784, 128)
pruned_layer, info = prune_weights_by_magnitude(layer, pruning_ratio=0.3)
print(f"Pruned {info['weights_removed']} weights, sparsity: {info['sparsity']:.2f}")
```
IMPLEMENTATION HINTS:
- Use np.percentile() with pruning_ratio * 100 for threshold
- Create mask with np.abs(weights) > threshold
- Apply mask by element-wise multiplication
- Count zeros to calculate sparsity
- Return original layer (modified) and statistics
LEARNING CONNECTIONS:
- This is the foundation of network pruning
- Magnitude pruning is simplest but effective
- Sparsity = fraction of weights that are zero
- Threshold selection affects accuracy vs compression trade-off
"""
### BEGIN SOLUTION
# Get current weights and ensure they're numpy arrays
weights = layer.weights.data
if not isinstance(weights, np.ndarray):
weights = np.array(weights)
original_weights = weights.copy()
# Calculate magnitudes and threshold
magnitudes = np.abs(weights)
threshold = np.percentile(magnitudes, pruning_ratio * 100)
# Create mask and apply pruning
mask = magnitudes > threshold
pruned_weights = weights * mask
# Update layer weights
layer.weights.data = pruned_weights
# Calculate pruning statistics
total_weights = weights.size
zero_weights = np.sum(pruned_weights == 0)
weights_removed = zero_weights - np.sum(original_weights == 0)
sparsity = zero_weights / total_weights
pruning_info = {
'pruning_ratio': pruning_ratio,
'threshold': float(threshold),
'total_weights': total_weights,
'weights_removed': weights_removed,
'remaining_weights': total_weights - zero_weights,
'sparsity': float(sparsity),
'compression_ratio': 1 / (1 - sparsity) if sparsity < 1 else float('inf')
}
return layer, pruning_info
### END SOLUTION
# %% ../../modules/source/12_compression/compression_dev.ipynb 10
def calculate_sparsity(layer: Dense) -> float:
"""
Calculate sparsity (fraction of zero weights) in a Dense layer.
Args:
layer: Dense layer to analyze
Returns:
Sparsity as float between 0.0 and 1.0
TODO: Implement sparsity calculation.
STEP-BY-STEP IMPLEMENTATION:
1. Get weight matrix from layer
2. Count total number of weights
3. Count number of zero weights
4. Calculate sparsity = zero_weights / total_weights
5. Return as float
EXAMPLE USAGE:
```python
layer = Dense(100, 50)
sparsity = calculate_sparsity(layer)
print(f"Layer sparsity: {sparsity:.2%}")
```
IMPLEMENTATION HINTS:
- Use np.sum() with condition to count zeros
- Use .size attribute for total elements
- Return 0.0 if no weights (edge case)
- Sparsity of 0.0 = dense, 1.0 = completely sparse
LEARNING CONNECTIONS:
- Sparsity is key metric for compression
- Higher sparsity = more compression
- Sparsity patterns affect hardware efficiency
"""
### BEGIN SOLUTION
if not hasattr(layer, 'weights') or layer.weights is None:
return 0.0
weights = layer.weights.data
if not isinstance(weights, np.ndarray):
weights = np.array(weights)
total_weights = weights.size
zero_weights = np.sum(weights == 0)
return zero_weights / total_weights if total_weights > 0 else 0.0
### END SOLUTION
# %% ../../modules/source/12_compression/compression_dev.ipynb 13
def quantize_layer_weights(layer: Dense, bits: int = 8) -> Tuple[Dense, Dict[str, Any]]:
"""
Quantize layer weights to reduce precision.
Args:
layer: Dense layer to quantize
bits: Number of bits for quantization (8, 16, etc.)
Returns:
Tuple of (quantized_layer, quantization_info)
TODO: Implement weight quantization for memory efficiency.
STEP-BY-STEP IMPLEMENTATION:
1. Get weight matrix from layer
2. Find min and max values for quantization range
3. Calculate scale factor: (max - min) / (2^bits - 1)
4. Quantize: round((weights - min) / scale)
5. Dequantize back to float: quantized * scale + min
6. Update layer weights and return statistics
EXAMPLE USAGE:
```python
layer = Dense(784, 128)
quantized_layer, info = quantize_layer_weights(layer, bits=8)
print(f"Memory reduction: {info['memory_reduction']:.1f}x")
```
IMPLEMENTATION HINTS:
- Use np.min() and np.max() to find weight range
- Clamp quantized values to valid range [0, 2^bits-1]
- Store original dtype for memory calculation
- Calculate theoretical memory savings
LEARNING CONNECTIONS:
- This is how mobile AI frameworks work
- Hardware accelerators optimize for INT8
- Precision-performance trade-off is key
"""
### BEGIN SOLUTION
# Get current weights and ensure they're numpy arrays
weights = layer.weights.data
if not isinstance(weights, np.ndarray):
weights = np.array(weights)
original_weights = weights.copy()
original_dtype = weights.dtype
# Find min and max for quantization range
w_min, w_max = np.min(weights), np.max(weights)
# Calculate scale factor
scale = (w_max - w_min) / (2**bits - 1)
# Quantize weights
quantized = np.round((weights - w_min) / scale)
quantized = np.clip(quantized, 0, 2**bits - 1) # Clamp to valid range
# Dequantize back to float (simulation of quantized inference)
dequantized = quantized * scale + w_min
# Update layer weights
layer.weights.data = dequantized.astype(np.float32)
# Calculate quantization statistics
total_weights = weights.size
original_bytes = total_weights * 4 # FP32 = 4 bytes
quantized_bytes = total_weights * (bits // 8) # bits/8 bytes per weight
memory_reduction = original_bytes / quantized_bytes if quantized_bytes > 0 else 1.0
# Calculate quantization error
mse_error = np.mean((original_weights - dequantized) ** 2)
max_error = np.max(np.abs(original_weights - dequantized))
quantization_info = {
'bits': bits,
'scale': float(scale),
'min_val': float(w_min),
'max_val': float(w_max),
'total_weights': total_weights,
'original_bytes': original_bytes,
'quantized_bytes': quantized_bytes,
'memory_reduction': float(memory_reduction),
'mse_error': float(mse_error),
'max_error': float(max_error),
'original_dtype': str(original_dtype)
}
return layer, quantization_info
### END SOLUTION
# %% ../../modules/source/12_compression/compression_dev.ipynb 16
class DistillationLoss:
"""
Combined loss function for knowledge distillation.
This loss combines standard classification loss (hard targets) with
distillation loss (soft targets from teacher) for training compact models.
"""
def __init__(self, temperature: float = 3.0, alpha: float = 0.5):
"""
Initialize distillation loss.
Args:
temperature: Temperature for softening probability distributions
alpha: Weight for hard loss (1-alpha for soft loss)
"""
self.temperature = temperature
self.alpha = alpha
self.ce_loss = CrossEntropyLoss()
def __call__(self, student_logits: np.ndarray, teacher_logits: np.ndarray,
true_labels: np.ndarray) -> float:
"""
Calculate combined distillation loss.
Args:
student_logits: Raw outputs from student model
teacher_logits: Raw outputs from teacher model
true_labels: Ground truth labels
Returns:
Combined loss value
TODO: Implement knowledge distillation loss function.
STEP-BY-STEP IMPLEMENTATION:
1. Calculate hard loss using standard cross-entropy
2. Apply temperature scaling to both logits
3. Calculate soft targets from teacher logits
4. Calculate soft loss between student and teacher distributions
5. Combine hard and soft losses with alpha weighting
6. Return total loss
EXAMPLE USAGE:
```python
distill_loss = DistillationLoss(temperature=3.0, alpha=0.5)
loss = distill_loss(student_out, teacher_out, labels)
```
IMPLEMENTATION HINTS:
- Use temperature scaling before softmax: logits / temperature
- Implement stable softmax to avoid numerical issues
- Scale soft loss by temperature^2 (standard practice)
- Ensure proper normalization for both losses
LEARNING CONNECTIONS:
- This is how DistilBERT was trained
- Temperature controls knowledge transfer richness
- Alpha balances accuracy vs compression
"""
### BEGIN SOLUTION
# Convert inputs to numpy arrays if needed
if not isinstance(student_logits, np.ndarray):
student_logits = np.array(student_logits)
if not isinstance(teacher_logits, np.ndarray):
teacher_logits = np.array(teacher_logits)
if not isinstance(true_labels, np.ndarray):
true_labels = np.array(true_labels)
# Hard loss: standard classification loss
hard_loss = self._cross_entropy_loss(student_logits, true_labels)
# Soft loss: distillation from teacher
# Apply temperature scaling
teacher_soft = self._softmax(teacher_logits / self.temperature)
student_soft = self._softmax(student_logits / self.temperature)
# Calculate soft loss (KL divergence)
soft_loss = -np.mean(np.sum(teacher_soft * np.log(student_soft + 1e-10), axis=-1))
# Scale soft loss by temperature^2 (standard practice)
soft_loss *= (self.temperature ** 2)
# Combine losses
total_loss = self.alpha * hard_loss + (1 - self.alpha) * soft_loss
return float(total_loss)
### END SOLUTION
def _softmax(self, logits: np.ndarray) -> np.ndarray:
"""Numerically stable softmax."""
# Subtract max for numerical stability
exp_logits = np.exp(logits - np.max(logits, axis=-1, keepdims=True))
return exp_logits / np.sum(exp_logits, axis=-1, keepdims=True)
def _cross_entropy_loss(self, logits: np.ndarray, labels: np.ndarray) -> float:
"""Simple cross-entropy loss implementation."""
# Convert labels to one-hot if needed
if labels.ndim == 1:
num_classes = logits.shape[-1]
one_hot = np.zeros((labels.shape[0], num_classes))
one_hot[np.arange(labels.shape[0]), labels] = 1
labels = one_hot
# Apply softmax and calculate cross-entropy
probs = self._softmax(logits)
return -np.mean(np.sum(labels * np.log(probs + 1e-10), axis=-1))
# %% ../../modules/source/12_compression/compression_dev.ipynb 19
def compute_neuron_importance(layer: Dense, method: str = 'weight_magnitude') -> np.ndarray:
"""
Compute importance scores for each neuron in a Dense layer.
Args:
layer: Dense layer to analyze
method: Importance computation method
Returns:
Array of importance scores for each output neuron
TODO: Implement neuron importance calculation.
STEP-BY-STEP IMPLEMENTATION:
1. Get weight matrix from layer
2. Choose importance metric based on method
3. Calculate per-neuron importance scores
4. Return array of scores (one per output neuron)
AVAILABLE METHODS:
- 'weight_magnitude': Sum of absolute weights per neuron
- 'weight_variance': Variance of weights per neuron
- 'random': Random importance (for baseline comparison)
IMPLEMENTATION HINTS:
- Weights shape is (input_size, output_size)
- Each column represents one output neuron
- Use axis=0 for operations across input dimensions
- Higher scores = more important neurons
LEARNING CONNECTIONS:
- This is how neural architecture search works
- Different metrics capture different aspects of importance
- Importance ranking is crucial for effective pruning
"""
### BEGIN SOLUTION
# Get weights and ensure they're numpy arrays
weights = layer.weights.data
if not isinstance(weights, np.ndarray):
weights = np.array(weights)
if method == 'weight_magnitude':
# Sum of absolute weights per neuron (column)
importance = np.sum(np.abs(weights), axis=0)
elif method == 'weight_variance':
# Variance of weights per neuron (column)
importance = np.var(weights, axis=0)
elif method == 'random':
# Random importance for baseline comparison
importance = np.random.rand(weights.shape[1])
else:
raise ValueError(f"Unknown importance method: {method}")
return importance
### END SOLUTION
# %% ../../modules/source/12_compression/compression_dev.ipynb 20
def prune_layer_neurons(layer: Dense, keep_ratio: float = 0.7,
importance_method: str = 'weight_magnitude') -> Tuple[Dense, Dict[str, Any]]:
"""
Remove least important neurons from a Dense layer.
Args:
layer: Dense layer to prune
keep_ratio: Fraction of neurons to keep (0.0 to 1.0)
importance_method: Method for computing neuron importance
Returns:
Tuple of (pruned_layer, pruning_info)
TODO: Implement structured neuron pruning.
STEP-BY-STEP IMPLEMENTATION:
1. Compute importance scores for all neurons
2. Determine how many neurons to keep
3. Select indices of most important neurons
4. Create new layer with reduced dimensions
5. Copy weights and biases for selected neurons
6. Return pruned layer and statistics
EXAMPLE USAGE:
```python
layer = Dense(784, 128)
pruned_layer, info = prune_layer_neurons(layer, keep_ratio=0.75)
print(f"Reduced from {info['original_neurons']} to {info['remaining_neurons']} neurons")
```
IMPLEMENTATION HINTS:
- Use np.argsort() to rank neurons by importance
- Take the top keep_count neurons: indices[-keep_count:]
- Create new layer with reduced output size
- Copy both weights and bias for selected neurons
- Track original and new sizes for statistics
LEARNING CONNECTIONS:
- This is actual model architecture modification
- Hardware gets real speedup from smaller matrices
- Must consider cascade effects on next layers
"""
### BEGIN SOLUTION
# Compute neuron importance
importance_scores = compute_neuron_importance(layer, importance_method)
# Determine how many neurons to keep
original_neurons = layer.output_size
keep_count = max(1, int(original_neurons * keep_ratio)) # Keep at least 1 neuron
# Select most important neurons
sorted_indices = np.argsort(importance_scores)
keep_indices = sorted_indices[-keep_count:] # Take top keep_count neurons
keep_indices = np.sort(keep_indices) # Sort for consistent ordering
# Get current weights and biases
weights = layer.weights.data
if not isinstance(weights, np.ndarray):
weights = np.array(weights)
bias = layer.bias.data if layer.bias is not None else None
if bias is not None and not isinstance(bias, np.ndarray):
bias = np.array(bias)
# Create new layer with reduced dimensions
pruned_layer = Dense(layer.input_size, keep_count)
# Copy weights for selected neurons
pruned_weights = weights[:, keep_indices]
pruned_layer.weights.data = np.ascontiguousarray(pruned_weights)
# Copy bias for selected neurons
if bias is not None:
pruned_bias = bias[keep_indices]
pruned_layer.bias.data = np.ascontiguousarray(pruned_bias)
# Calculate pruning statistics
neurons_removed = original_neurons - keep_count
compression_ratio = original_neurons / keep_count if keep_count > 0 else float('inf')
# Calculate parameter reduction
original_params = layer.input_size * original_neurons + (original_neurons if bias is not None else 0)
new_params = layer.input_size * keep_count + (keep_count if bias is not None else 0)
param_reduction = (original_params - new_params) / original_params
pruning_info = {
'keep_ratio': keep_ratio,
'importance_method': importance_method,
'original_neurons': original_neurons,
'remaining_neurons': keep_count,
'neurons_removed': neurons_removed,
'compression_ratio': float(compression_ratio),
'original_params': original_params,
'new_params': new_params,
'param_reduction': float(param_reduction),
'keep_indices': keep_indices.tolist()
}
return pruned_layer, pruning_info
### END SOLUTION
# %% ../../modules/source/12_compression/compression_dev.ipynb 23
def compare_compression_techniques(original_model: Sequential) -> Dict[str, Dict[str, Any]]:
"""
Compare all compression techniques on the same model.
Args:
original_model: Base model to compress using different techniques
Returns:
Dictionary comparing results from different compression approaches
TODO: Implement comprehensive compression comparison.
STEP-BY-STEP IMPLEMENTATION:
1. Set up baseline metrics from original model
2. Apply each compression technique individually
3. Apply combined compression techniques
4. Measure and compare all results
5. Return comprehensive comparison data
COMPARISON DIMENSIONS:
- Model size (MB)
- Parameter count
- Compression ratio
- Memory reduction
- Estimated speedup (for structured techniques)
IMPLEMENTATION HINTS:
- Create separate model copies for each technique
- Use consistent parameters across techniques
- Track both individual and combined effects
- Include baseline for reference
LEARNING CONNECTIONS:
- This is how research papers compare compression methods
- Production systems need this analysis for deployment decisions
- Understanding trade-offs guides technique selection
"""
### BEGIN SOLUTION
results = {}
metrics = CompressionMetrics()
# Baseline: Original model
baseline_params = metrics.count_parameters(original_model)
baseline_size = metrics.calculate_model_size(original_model)
results['baseline'] = {
'technique': 'Original Model',
'parameters': baseline_params['total_parameters'],
'size_mb': baseline_size['size_mb'],
'compression_ratio': 1.0,
'memory_reduction': 0.0
}
# Technique 1: Magnitude-based pruning only
model_pruning = Sequential([Dense(layer.input_size, layer.output_size) for layer in original_model.layers])
for i, layer in enumerate(model_pruning.layers):
layer.weights.data = original_model.layers[i].weights.data.copy() if hasattr(original_model.layers[i].weights.data, 'copy') else np.array(original_model.layers[i].weights.data)
if hasattr(layer, 'bias') and original_model.layers[i].bias is not None:
layer.bias.data = original_model.layers[i].bias.data.copy() if hasattr(original_model.layers[i].bias.data, 'copy') else np.array(original_model.layers[i].bias.data)
# Apply magnitude pruning to each layer
total_sparsity = 0
for i, layer in enumerate(model_pruning.layers):
if isinstance(layer, Dense):
_, prune_info = prune_weights_by_magnitude(layer, pruning_ratio=0.3)
total_sparsity += prune_info['sparsity']
avg_sparsity = total_sparsity / len(model_pruning.layers)
pruning_params = metrics.count_parameters(model_pruning)
pruning_size = metrics.calculate_model_size(model_pruning)
results['magnitude_pruning'] = {
'technique': 'Magnitude Pruning (30%)',
'parameters': pruning_params['total_parameters'],
'size_mb': pruning_size['size_mb'],
'compression_ratio': baseline_size['size_mb'] / pruning_size['size_mb'],
'memory_reduction': (baseline_size['size_mb'] - pruning_size['size_mb']) / baseline_size['size_mb'],
'sparsity': avg_sparsity
}
# Technique 2: Quantization only
model_quantization = Sequential([Dense(layer.input_size, layer.output_size) for layer in original_model.layers])
for i, layer in enumerate(model_quantization.layers):
layer.weights.data = original_model.layers[i].weights.data.copy() if hasattr(original_model.layers[i].weights.data, 'copy') else np.array(original_model.layers[i].weights.data)
if hasattr(layer, 'bias') and original_model.layers[i].bias is not None:
layer.bias.data = original_model.layers[i].bias.data.copy() if hasattr(original_model.layers[i].bias.data, 'copy') else np.array(original_model.layers[i].bias.data)
# Apply quantization to each layer
total_memory_reduction = 0
for i, layer in enumerate(model_quantization.layers):
if isinstance(layer, Dense):
_, quant_info = quantize_layer_weights(layer, bits=8)
total_memory_reduction += quant_info['memory_reduction']
avg_memory_reduction = total_memory_reduction / len(model_quantization.layers)
quantization_size = metrics.calculate_model_size(model_quantization, dtype='int8')
results['quantization'] = {
'technique': 'Quantization (INT8)',
'parameters': baseline_params['total_parameters'],
'size_mb': quantization_size['size_mb'],
'compression_ratio': baseline_size['size_mb'] / quantization_size['size_mb'],
'memory_reduction': (baseline_size['size_mb'] - quantization_size['size_mb']) / baseline_size['size_mb'],
'avg_memory_reduction_factor': avg_memory_reduction
}
# Technique 3: Structured pruning only
model_structured = Sequential([Dense(layer.input_size, layer.output_size) for layer in original_model.layers])
for i, layer in enumerate(model_structured.layers):
layer.weights.data = original_model.layers[i].weights.data.copy() if hasattr(original_model.layers[i].weights.data, 'copy') else np.array(original_model.layers[i].weights.data)
if hasattr(layer, 'bias') and original_model.layers[i].bias is not None:
layer.bias.data = original_model.layers[i].bias.data.copy() if hasattr(original_model.layers[i].bias.data, 'copy') else np.array(original_model.layers[i].bias.data)
# Apply structured pruning to each layer
total_param_reduction = 0
for i, layer in enumerate(model_structured.layers):
if isinstance(layer, Dense):
pruned_layer, struct_info = prune_layer_neurons(layer, keep_ratio=0.75)
model_structured.layers[i] = pruned_layer
total_param_reduction += struct_info['param_reduction']
avg_param_reduction = total_param_reduction / len(model_structured.layers)
structured_params = metrics.count_parameters(model_structured)
structured_size = metrics.calculate_model_size(model_structured)
results['structured_pruning'] = {
'technique': 'Structured Pruning (75% neurons kept)',
'parameters': structured_params['total_parameters'],
'size_mb': structured_size['size_mb'],
'compression_ratio': baseline_size['size_mb'] / structured_size['size_mb'],
'memory_reduction': (baseline_size['size_mb'] - structured_size['size_mb']) / baseline_size['size_mb'],
'param_reduction': avg_param_reduction
}
# Technique 4: Combined approach
model_combined = Sequential([Dense(layer.input_size, layer.output_size) for layer in original_model.layers])
for i, layer in enumerate(model_combined.layers):
layer.weights.data = original_model.layers[i].weights.data.copy() if hasattr(original_model.layers[i].weights.data, 'copy') else np.array(original_model.layers[i].weights.data)
if hasattr(layer, 'bias') and original_model.layers[i].bias is not None:
layer.bias.data = original_model.layers[i].bias.data.copy() if hasattr(original_model.layers[i].bias.data, 'copy') else np.array(original_model.layers[i].bias.data)
# Apply magnitude pruning + quantization + structured pruning
for i, layer in enumerate(model_combined.layers):
if isinstance(layer, Dense):
# Step 1: Magnitude pruning
_, _ = prune_weights_by_magnitude(layer, pruning_ratio=0.2)
# Step 2: Quantization
_, _ = quantize_layer_weights(layer, bits=8)
# Step 3: Structured pruning
pruned_layer, _ = prune_layer_neurons(layer, keep_ratio=0.8)
model_combined.layers[i] = pruned_layer
combined_params = metrics.count_parameters(model_combined)
combined_size = metrics.calculate_model_size(model_combined, dtype='int8')
results['combined'] = {
'technique': 'Combined (Pruning + Quantization + Structured)',
'parameters': combined_params['total_parameters'],
'size_mb': combined_size['size_mb'],
'compression_ratio': baseline_size['size_mb'] / combined_size['size_mb'],
'memory_reduction': (baseline_size['size_mb'] - combined_size['size_mb']) / baseline_size['size_mb']
}
return results
### END SOLUTION