TinyTorch/tinytorch/core/compression.py

# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/12_compression/compression_dev.ipynb.

# %% auto 0
__all__ = ['setup_import_paths', 'CompressionMetrics', 'prune_weights_by_magnitude', 'calculate_sparsity',
           'quantize_layer_weights', 'DistillationLoss', 'compute_neuron_importance', 'prune_layer_neurons',
           'compare_compression_techniques']

# %% ../../modules/source/12_compression/compression_dev.ipynb 1
import numpy as np
import sys
import os
import math
from typing import List, Dict, Any, Optional, Union, Tuple
from collections import defaultdict

# Helper function to set up import paths
def setup_import_paths():
    """Set up import paths for development modules."""
    import sys
    import os

    # Add module directories to path
    base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    module_dirs = [
        '01_tensor', '02_activations', '03_layers', '04_networks',
        '05_cnn', '06_dataloader', '07_autograd', '08_optimizers', '09_training'
    ]

    for module_dir in module_dirs:
        sys.path.append(os.path.join(base_dir, module_dir))

# Set up paths
setup_import_paths()

# Import all the building blocks we need
try:
    from tinytorch.core.tensor import Tensor
    from tinytorch.core.layers import Dense
    from tinytorch.core.networks import Sequential
    from tinytorch.core.training import CrossEntropyLoss, Trainer
except ImportError:
    # For development, create mock classes or import from local modules
    try:
        from tensor_dev import Tensor
        from layers_dev import Dense
        from networks_dev import Sequential
        from training_dev import CrossEntropyLoss, Trainer
    except ImportError:
        # Create minimal mock classes for development
        class Tensor:
            def __init__(self, data):
                self.data = np.array(data)
                self.shape = self.data.shape

            def __str__(self):
                return f"Tensor({self.data})"

        class Dense:
            def __init__(self, input_size, output_size):
                self.input_size = input_size
                self.output_size = output_size
                self.weights = Tensor(np.random.randn(input_size, output_size) * 0.1)
                self.bias = Tensor(np.zeros(output_size))

            def __str__(self):
                return f"Dense({self.input_size}, {self.output_size})"

        class Sequential:
            def __init__(self, layers=None):
                self.layers = layers or []

        class CrossEntropyLoss:
            def __init__(self):
                pass

        class Trainer:
            def __init__(self, model, optimizer, loss_function):
                self.model = model
                self.optimizer = optimizer
                self.loss_function = loss_function

# %% ../../modules/source/12_compression/compression_dev.ipynb 6
class CompressionMetrics:
    """
    Utilities for measuring model size, sparsity, and compression efficiency.

    This class provides tools to analyze neural network models and understand
    their memory footprint, parameter distribution, and compression potential.
    """

    def __init__(self):
        """Initialize compression metrics analyzer."""
        pass

    def count_parameters(self, model: Sequential) -> Dict[str, int]:
        """
        Count parameters in a neural network model.

        Args:
            model: Sequential model to analyze

        Returns:
            Dictionary with parameter counts per layer and total

        TODO: Implement parameter counting for neural network analysis.

        STEP-BY-STEP IMPLEMENTATION:
        1. Initialize counters for different parameter types
        2. Iterate through each layer in the model
        3. Count weights and biases for each layer
        4. Calculate total parameters across all layers
        5. Return detailed breakdown dictionary

        EXAMPLE OUTPUT:
        {
            'layer_0_weights': 100352,
            'layer_0_bias': 128,
            'layer_1_weights': 8192,
            'layer_1_bias': 64,
            'layer_2_weights': 640,
            'layer_2_bias': 10,
            'total_parameters': 109386,
            'total_weights': 109184,
            'total_bias': 202
        }

        IMPLEMENTATION HINTS:
        - Use hasattr() to check if layer has weights/bias attributes
        - Weight matrices have shape (input_size, output_size)
        - Bias vectors have shape (output_size,)
        - Use np.prod() to calculate total elements from shape
        - Track layer index for detailed reporting

        LEARNING CONNECTIONS:
        - This is like `model.numel()` in PyTorch
        - Understanding where parameters are concentrated
        - Foundation for compression target selection
        """
        ### BEGIN SOLUTION
        param_counts = {}
        total_params = 0
        total_weights = 0
        total_bias = 0

        for i, layer in enumerate(model.layers):
            # Count weights if layer has them
            if hasattr(layer, 'weights') and layer.weights is not None:
                # Handle different weight formats
                if hasattr(layer.weights, 'shape'):
                    weight_count = np.prod(layer.weights.shape)
                else:
                    weight_count = np.prod(layer.weights.data.shape)

                param_counts[f'layer_{i}_weights'] = weight_count
                total_weights += weight_count
                total_params += weight_count

            # Count bias if layer has them
            if hasattr(layer, 'bias') and layer.bias is not None:
                # Handle different bias formats
                if hasattr(layer.bias, 'shape'):
                    bias_count = np.prod(layer.bias.shape)
                else:
                    bias_count = np.prod(layer.bias.data.shape)

                param_counts[f'layer_{i}_bias'] = bias_count
                total_bias += bias_count
                total_params += bias_count

        # Add summary statistics
        param_counts['total_parameters'] = total_params
        param_counts['total_weights'] = total_weights
        param_counts['total_bias'] = total_bias

        return param_counts
        ### END SOLUTION

    def calculate_model_size(self, model: Sequential, dtype: str = 'float32') -> Dict[str, Any]:
        """
        Calculate memory footprint of a neural network model.

        Args:
            model: Sequential model to analyze
            dtype: Data type for size calculation ('float32', 'float16', 'int8')

        Returns:
            Dictionary with size information in different units
        """
        # Get parameter count
        param_info = self.count_parameters(model)
        total_params = param_info['total_parameters']

        # Determine bytes per parameter
        bytes_per_param = {
            'float32': 4,
            'float16': 2,
            'int8': 1
        }.get(dtype, 4)

        # Calculate sizes
        total_bytes = total_params * bytes_per_param
        size_kb = total_bytes / 1024
        size_mb = size_kb / 1024

        return {
            'total_parameters': total_params,
            'bytes_per_parameter': bytes_per_param,
            'total_bytes': total_bytes,
            'size_kb': round(size_kb, 2),
            'size_mb': round(size_mb, 2),
            'dtype': dtype
        }

# %% ../../modules/source/12_compression/compression_dev.ipynb 9
def prune_weights_by_magnitude(layer: Dense, pruning_ratio: float = 0.5) -> Tuple[Dense, Dict[str, Any]]:
    """
    Prune weights in a Dense layer by magnitude.

    Args:
        layer: Dense layer to prune
        pruning_ratio: Fraction of weights to remove (0.0 to 1.0)

    Returns:
        Tuple of (pruned_layer, pruning_info)

    TODO: Implement magnitude-based weight pruning.

    STEP-BY-STEP IMPLEMENTATION:
    1. Get weight matrix from layer
    2. Calculate absolute values (magnitudes)
    3. Find threshold using percentile
    4. Create binary mask for weights above threshold
    5. Apply mask to weights (set small weights to zero)
    6. Update layer weights and return pruning statistics

    EXAMPLE USAGE:
    ```python
    layer = Dense(784, 128)
    pruned_layer, info = prune_weights_by_magnitude(layer, pruning_ratio=0.3)
    print(f"Pruned {info['weights_removed']} weights, sparsity: {info['sparsity']:.2f}")
    ```

    IMPLEMENTATION HINTS:
    - Use np.percentile() with pruning_ratio * 100 for threshold
    - Create mask with np.abs(weights) > threshold
    - Apply mask by element-wise multiplication
    - Count zeros to calculate sparsity
    - Return original layer (modified) and statistics

    LEARNING CONNECTIONS:
    - This is the foundation of network pruning
    - Magnitude pruning is simplest but effective
    - Sparsity = fraction of weights that are zero
    - Threshold selection affects accuracy vs compression trade-off
    """
    ### BEGIN SOLUTION
    # Get current weights and ensure they're numpy arrays
    weights = layer.weights.data
    if not isinstance(weights, np.ndarray):
        weights = np.array(weights)

    original_weights = weights.copy()

    # Calculate magnitudes and threshold
    magnitudes = np.abs(weights)
    threshold = np.percentile(magnitudes, pruning_ratio * 100)

    # Create mask and apply pruning
    mask = magnitudes > threshold
    pruned_weights = weights * mask

    # Update layer weights
    layer.weights.data = pruned_weights

    # Calculate pruning statistics
    total_weights = weights.size
    zero_weights = np.sum(pruned_weights == 0)
    weights_removed = zero_weights - np.sum(original_weights == 0)
    sparsity = zero_weights / total_weights

    pruning_info = {
        'pruning_ratio': pruning_ratio,
        'threshold': float(threshold),
        'total_weights': total_weights,
        'weights_removed': weights_removed,
        'remaining_weights': total_weights - zero_weights,
        'sparsity': float(sparsity),
        'compression_ratio': 1 / (1 - sparsity) if sparsity < 1 else float('inf')
    }

    return layer, pruning_info
    ### END SOLUTION

# %% ../../modules/source/12_compression/compression_dev.ipynb 10
def calculate_sparsity(layer: Dense) -> float:
    """
    Calculate sparsity (fraction of zero weights) in a Dense layer.

    Args:
        layer: Dense layer to analyze

    Returns:
        Sparsity as float between 0.0 and 1.0

    TODO: Implement sparsity calculation.

    STEP-BY-STEP IMPLEMENTATION:
    1. Get weight matrix from layer
    2. Count total number of weights
    3. Count number of zero weights
    4. Calculate sparsity = zero_weights / total_weights
    5. Return as float

    EXAMPLE USAGE:
    ```python
    layer = Dense(100, 50)
    sparsity = calculate_sparsity(layer)
    print(f"Layer sparsity: {sparsity:.2%}")
    ```

    IMPLEMENTATION HINTS:
    - Use np.sum() with condition to count zeros
    - Use .size attribute for total elements
    - Return 0.0 if no weights (edge case)
    - Sparsity of 0.0 = dense, 1.0 = completely sparse

    LEARNING CONNECTIONS:
    - Sparsity is key metric for compression
    - Higher sparsity = more compression
    - Sparsity patterns affect hardware efficiency
    """
    ### BEGIN SOLUTION
    if not hasattr(layer, 'weights') or layer.weights is None:
        return 0.0

    weights = layer.weights.data
    if not isinstance(weights, np.ndarray):
        weights = np.array(weights)

    total_weights = weights.size
    zero_weights = np.sum(weights == 0)

    return zero_weights / total_weights if total_weights > 0 else 0.0
    ### END SOLUTION

# %% ../../modules/source/12_compression/compression_dev.ipynb 13
def quantize_layer_weights(layer: Dense, bits: int = 8) -> Tuple[Dense, Dict[str, Any]]:
    """
    Quantize layer weights to reduce precision.

    Args:
        layer: Dense layer to quantize
        bits: Number of bits for quantization (8, 16, etc.)

    Returns:
        Tuple of (quantized_layer, quantization_info)

    TODO: Implement weight quantization for memory efficiency.

    STEP-BY-STEP IMPLEMENTATION:
    1. Get weight matrix from layer
    2. Find min and max values for quantization range
    3. Calculate scale factor: (max - min) / (2^bits - 1)
    4. Quantize: round((weights - min) / scale)
    5. Dequantize back to float: quantized * scale + min
    6. Update layer weights and return statistics

    EXAMPLE USAGE:
    ```python
    layer = Dense(784, 128)
    quantized_layer, info = quantize_layer_weights(layer, bits=8)
    print(f"Memory reduction: {info['memory_reduction']:.1f}x")
    ```

    IMPLEMENTATION HINTS:
    - Use np.min() and np.max() to find weight range
    - Clamp quantized values to valid range [0, 2^bits-1]
    - Store original dtype for memory calculation
    - Calculate theoretical memory savings

    LEARNING CONNECTIONS:
    - This is how mobile AI frameworks work
    - Hardware accelerators optimize for INT8
    - Precision-performance trade-off is key
    """
    ### BEGIN SOLUTION
    # Get current weights and ensure they're numpy arrays
    weights = layer.weights.data
    if not isinstance(weights, np.ndarray):
        weights = np.array(weights)

    original_weights = weights.copy()
    original_dtype = weights.dtype

    # Find min and max for quantization range
    w_min, w_max = np.min(weights), np.max(weights)

    # Calculate scale factor
    scale = (w_max - w_min) / (2**bits - 1)

    # Quantize weights
    quantized = np.round((weights - w_min) / scale)
    quantized = np.clip(quantized, 0, 2**bits - 1)  # Clamp to valid range

    # Dequantize back to float (simulation of quantized inference)
    dequantized = quantized * scale + w_min

    # Update layer weights
    layer.weights.data = dequantized.astype(np.float32)

    # Calculate quantization statistics
    total_weights = weights.size
    original_bytes = total_weights * 4  # FP32 = 4 bytes
    quantized_bytes = total_weights * (bits // 8)  # bits/8 bytes per weight
    memory_reduction = original_bytes / quantized_bytes if quantized_bytes > 0 else 1.0

    # Calculate quantization error
    mse_error = np.mean((original_weights - dequantized) ** 2)
    max_error = np.max(np.abs(original_weights - dequantized))

    quantization_info = {
        'bits': bits,
        'scale': float(scale),
        'min_val': float(w_min),
        'max_val': float(w_max),
        'total_weights': total_weights,
        'original_bytes': original_bytes,
        'quantized_bytes': quantized_bytes,
        'memory_reduction': float(memory_reduction),
        'mse_error': float(mse_error),
        'max_error': float(max_error),
        'original_dtype': str(original_dtype)
    }

    return layer, quantization_info
    ### END SOLUTION

# %% ../../modules/source/12_compression/compression_dev.ipynb 16
class DistillationLoss:
    """
    Combined loss function for knowledge distillation.

    This loss combines standard classification loss (hard targets) with
    distillation loss (soft targets from teacher) for training compact models.
    """

    def __init__(self, temperature: float = 3.0, alpha: float = 0.5):
        """
        Initialize distillation loss.

        Args:
            temperature: Temperature for softening probability distributions
            alpha: Weight for hard loss (1-alpha for soft loss)
        """
        self.temperature = temperature
        self.alpha = alpha
        self.ce_loss = CrossEntropyLoss()

    def __call__(self, student_logits: np.ndarray, teacher_logits: np.ndarray,
                 true_labels: np.ndarray) -> float:
        """
        Calculate combined distillation loss.

        Args:
            student_logits: Raw outputs from student model
            teacher_logits: Raw outputs from teacher model
            true_labels: Ground truth labels

        Returns:
            Combined loss value

        TODO: Implement knowledge distillation loss function.

        STEP-BY-STEP IMPLEMENTATION:
        1. Calculate hard loss using standard cross-entropy
        2. Apply temperature scaling to both logits
        3. Calculate soft targets from teacher logits
        4. Calculate soft loss between student and teacher distributions
        5. Combine hard and soft losses with alpha weighting
        6. Return total loss

        EXAMPLE USAGE:
        ```python
        distill_loss = DistillationLoss(temperature=3.0, alpha=0.5)
        loss = distill_loss(student_out, teacher_out, labels)
        ```

        IMPLEMENTATION HINTS:
        - Use temperature scaling before softmax: logits / temperature
        - Implement stable softmax to avoid numerical issues
        - Scale soft loss by temperature^2 (standard practice)
        - Ensure proper normalization for both losses

        LEARNING CONNECTIONS:
        - This is how DistilBERT was trained
        - Temperature controls knowledge transfer richness
        - Alpha balances accuracy vs compression
        """
        ### BEGIN SOLUTION
        # Convert inputs to numpy arrays if needed
        if not isinstance(student_logits, np.ndarray):
            student_logits = np.array(student_logits)
        if not isinstance(teacher_logits, np.ndarray):
            teacher_logits = np.array(teacher_logits)
        if not isinstance(true_labels, np.ndarray):
            true_labels = np.array(true_labels)

        # Hard loss: standard classification loss
        hard_loss = self._cross_entropy_loss(student_logits, true_labels)

        # Soft loss: distillation from teacher
        # Apply temperature scaling
        teacher_soft = self._softmax(teacher_logits / self.temperature)
        student_soft = self._softmax(student_logits / self.temperature)

        # Calculate soft loss (KL divergence)
        soft_loss = -np.mean(np.sum(teacher_soft * np.log(student_soft + 1e-10), axis=-1))

        # Scale soft loss by temperature^2 (standard practice)
        soft_loss *= (self.temperature ** 2)

        # Combine losses
        total_loss = self.alpha * hard_loss + (1 - self.alpha) * soft_loss

        return float(total_loss)
        ### END SOLUTION

    def _softmax(self, logits: np.ndarray) -> np.ndarray:
        """Numerically stable softmax."""
        # Subtract max for numerical stability
        exp_logits = np.exp(logits - np.max(logits, axis=-1, keepdims=True))
        return exp_logits / np.sum(exp_logits, axis=-1, keepdims=True)

    def _cross_entropy_loss(self, logits: np.ndarray, labels: np.ndarray) -> float:
        """Simple cross-entropy loss implementation."""
        # Convert labels to one-hot if needed
        if labels.ndim == 1:
            num_classes = logits.shape[-1]
            one_hot = np.zeros((labels.shape[0], num_classes))
            one_hot[np.arange(labels.shape[0]), labels] = 1
            labels = one_hot

        # Apply softmax and calculate cross-entropy
        probs = self._softmax(logits)
        return -np.mean(np.sum(labels * np.log(probs + 1e-10), axis=-1))

# %% ../../modules/source/12_compression/compression_dev.ipynb 19
def compute_neuron_importance(layer: Dense, method: str = 'weight_magnitude') -> np.ndarray:
    """
    Compute importance scores for each neuron in a Dense layer.

    Args:
        layer: Dense layer to analyze
        method: Importance computation method

    Returns:
        Array of importance scores for each output neuron

    TODO: Implement neuron importance calculation.

    STEP-BY-STEP IMPLEMENTATION:
    1. Get weight matrix from layer
    2. Choose importance metric based on method
    3. Calculate per-neuron importance scores
    4. Return array of scores (one per output neuron)

    AVAILABLE METHODS:
    - 'weight_magnitude': Sum of absolute weights per neuron
    - 'weight_variance': Variance of weights per neuron
    - 'random': Random importance (for baseline comparison)

    IMPLEMENTATION HINTS:
    - Weights shape is (input_size, output_size)
    - Each column represents one output neuron
    - Use axis=0 for operations across input dimensions
    - Higher scores = more important neurons

    LEARNING CONNECTIONS:
    - This is how neural architecture search works
    - Different metrics capture different aspects of importance
    - Importance ranking is crucial for effective pruning
    """
    ### BEGIN SOLUTION
    # Get weights and ensure they're numpy arrays
    weights = layer.weights.data
    if not isinstance(weights, np.ndarray):
        weights = np.array(weights)

    if method == 'weight_magnitude':
        # Sum of absolute weights per neuron (column)
        importance = np.sum(np.abs(weights), axis=0)

    elif method == 'weight_variance':
        # Variance of weights per neuron (column)
        importance = np.var(weights, axis=0)

    elif method == 'random':
        # Random importance for baseline comparison
        importance = np.random.rand(weights.shape[1])

    else:
        raise ValueError(f"Unknown importance method: {method}")

    return importance
    ### END SOLUTION

# %% ../../modules/source/12_compression/compression_dev.ipynb 20
def prune_layer_neurons(layer: Dense, keep_ratio: float = 0.7,
                       importance_method: str = 'weight_magnitude') -> Tuple[Dense, Dict[str, Any]]:
    """
    Remove least important neurons from a Dense layer.

    Args:
        layer: Dense layer to prune
        keep_ratio: Fraction of neurons to keep (0.0 to 1.0)
        importance_method: Method for computing neuron importance

    Returns:
        Tuple of (pruned_layer, pruning_info)

    TODO: Implement structured neuron pruning.

    STEP-BY-STEP IMPLEMENTATION:
    1. Compute importance scores for all neurons
    2. Determine how many neurons to keep
    3. Select indices of most important neurons
    4. Create new layer with reduced dimensions
    5. Copy weights and biases for selected neurons
    6. Return pruned layer and statistics

    EXAMPLE USAGE:
    ```python
    layer = Dense(784, 128)
    pruned_layer, info = prune_layer_neurons(layer, keep_ratio=0.75)
    print(f"Reduced from {info['original_neurons']} to {info['remaining_neurons']} neurons")
    ```

    IMPLEMENTATION HINTS:
    - Use np.argsort() to rank neurons by importance
    - Take the top keep_count neurons: indices[-keep_count:]
    - Create new layer with reduced output size
    - Copy both weights and bias for selected neurons
    - Track original and new sizes for statistics

    LEARNING CONNECTIONS:
    - This is actual model architecture modification
    - Hardware gets real speedup from smaller matrices
    - Must consider cascade effects on next layers
    """
    ### BEGIN SOLUTION
    # Compute neuron importance
    importance_scores = compute_neuron_importance(layer, importance_method)

    # Determine how many neurons to keep
    original_neurons = layer.output_size
    keep_count = max(1, int(original_neurons * keep_ratio))  # Keep at least 1 neuron

    # Select most important neurons
    sorted_indices = np.argsort(importance_scores)
    keep_indices = sorted_indices[-keep_count:]  # Take top keep_count neurons
    keep_indices = np.sort(keep_indices)  # Sort for consistent ordering

    # Get current weights and biases
    weights = layer.weights.data
    if not isinstance(weights, np.ndarray):
        weights = np.array(weights)

    bias = layer.bias.data if layer.bias is not None else None
    if bias is not None and not isinstance(bias, np.ndarray):
        bias = np.array(bias)

    # Create new layer with reduced dimensions
    pruned_layer = Dense(layer.input_size, keep_count)

    # Copy weights for selected neurons
    pruned_weights = weights[:, keep_indices]
    pruned_layer.weights.data = np.ascontiguousarray(pruned_weights)

    # Copy bias for selected neurons
    if bias is not None:
        pruned_bias = bias[keep_indices]
        pruned_layer.bias.data = np.ascontiguousarray(pruned_bias)

    # Calculate pruning statistics
    neurons_removed = original_neurons - keep_count
    compression_ratio = original_neurons / keep_count if keep_count > 0 else float('inf')

    # Calculate parameter reduction
    original_params = layer.input_size * original_neurons + (original_neurons if bias is not None else 0)
    new_params = layer.input_size * keep_count + (keep_count if bias is not None else 0)
    param_reduction = (original_params - new_params) / original_params

    pruning_info = {
        'keep_ratio': keep_ratio,
        'importance_method': importance_method,
        'original_neurons': original_neurons,
        'remaining_neurons': keep_count,
        'neurons_removed': neurons_removed,
        'compression_ratio': float(compression_ratio),
        'original_params': original_params,
        'new_params': new_params,
        'param_reduction': float(param_reduction),
        'keep_indices': keep_indices.tolist()
    }

    return pruned_layer, pruning_info
    ### END SOLUTION

# %% ../../modules/source/12_compression/compression_dev.ipynb 23
def compare_compression_techniques(original_model: Sequential) -> Dict[str, Dict[str, Any]]:
    """
    Compare all compression techniques on the same model.

    Args:
        original_model: Base model to compress using different techniques

    Returns:
        Dictionary comparing results from different compression approaches

    TODO: Implement comprehensive compression comparison.

    STEP-BY-STEP IMPLEMENTATION:
    1. Set up baseline metrics from original model
    2. Apply each compression technique individually
    3. Apply combined compression techniques
    4. Measure and compare all results
    5. Return comprehensive comparison data

    COMPARISON DIMENSIONS:
    - Model size (MB)
    - Parameter count
    - Compression ratio
    - Memory reduction
    - Estimated speedup (for structured techniques)

    IMPLEMENTATION HINTS:
    - Create separate model copies for each technique
    - Use consistent parameters across techniques
    - Track both individual and combined effects
    - Include baseline for reference

    LEARNING CONNECTIONS:
    - This is how research papers compare compression methods
    - Production systems need this analysis for deployment decisions
    - Understanding trade-offs guides technique selection
    """
    ### BEGIN SOLUTION
    results = {}
    metrics = CompressionMetrics()

    # Baseline: Original model
    baseline_params = metrics.count_parameters(original_model)
    baseline_size = metrics.calculate_model_size(original_model)

    results['baseline'] = {
        'technique': 'Original Model',
        'parameters': baseline_params['total_parameters'],
        'size_mb': baseline_size['size_mb'],
        'compression_ratio': 1.0,
        'memory_reduction': 0.0
    }

    # Technique 1: Magnitude-based pruning only
    model_pruning = Sequential([Dense(layer.input_size, layer.output_size) for layer in original_model.layers])
    for i, layer in enumerate(model_pruning.layers):
        layer.weights.data = original_model.layers[i].weights.data.copy() if hasattr(original_model.layers[i].weights.data, 'copy') else np.array(original_model.layers[i].weights.data)
        if hasattr(layer, 'bias') and original_model.layers[i].bias is not None:
            layer.bias.data = original_model.layers[i].bias.data.copy() if hasattr(original_model.layers[i].bias.data, 'copy') else np.array(original_model.layers[i].bias.data)

    # Apply magnitude pruning to each layer
    total_sparsity = 0
    for i, layer in enumerate(model_pruning.layers):
        if isinstance(layer, Dense):
            _, prune_info = prune_weights_by_magnitude(layer, pruning_ratio=0.3)
            total_sparsity += prune_info['sparsity']

    avg_sparsity = total_sparsity / len(model_pruning.layers)
    pruning_params = metrics.count_parameters(model_pruning)
    pruning_size = metrics.calculate_model_size(model_pruning)

    results['magnitude_pruning'] = {
        'technique': 'Magnitude Pruning (30%)',
        'parameters': pruning_params['total_parameters'],
        'size_mb': pruning_size['size_mb'],
        'compression_ratio': baseline_size['size_mb'] / pruning_size['size_mb'],
        'memory_reduction': (baseline_size['size_mb'] - pruning_size['size_mb']) / baseline_size['size_mb'],
        'sparsity': avg_sparsity
    }

    # Technique 2: Quantization only
    model_quantization = Sequential([Dense(layer.input_size, layer.output_size) for layer in original_model.layers])
    for i, layer in enumerate(model_quantization.layers):
        layer.weights.data = original_model.layers[i].weights.data.copy() if hasattr(original_model.layers[i].weights.data, 'copy') else np.array(original_model.layers[i].weights.data)
        if hasattr(layer, 'bias') and original_model.layers[i].bias is not None:
            layer.bias.data = original_model.layers[i].bias.data.copy() if hasattr(original_model.layers[i].bias.data, 'copy') else np.array(original_model.layers[i].bias.data)

    # Apply quantization to each layer
    total_memory_reduction = 0
    for i, layer in enumerate(model_quantization.layers):
        if isinstance(layer, Dense):
            _, quant_info = quantize_layer_weights(layer, bits=8)
            total_memory_reduction += quant_info['memory_reduction']

    avg_memory_reduction = total_memory_reduction / len(model_quantization.layers)
    quantization_size = metrics.calculate_model_size(model_quantization, dtype='int8')

    results['quantization'] = {
        'technique': 'Quantization (INT8)',
        'parameters': baseline_params['total_parameters'],
        'size_mb': quantization_size['size_mb'],
        'compression_ratio': baseline_size['size_mb'] / quantization_size['size_mb'],
        'memory_reduction': (baseline_size['size_mb'] - quantization_size['size_mb']) / baseline_size['size_mb'],
        'avg_memory_reduction_factor': avg_memory_reduction
    }

    # Technique 3: Structured pruning only
    model_structured = Sequential([Dense(layer.input_size, layer.output_size) for layer in original_model.layers])
    for i, layer in enumerate(model_structured.layers):
        layer.weights.data = original_model.layers[i].weights.data.copy() if hasattr(original_model.layers[i].weights.data, 'copy') else np.array(original_model.layers[i].weights.data)
        if hasattr(layer, 'bias') and original_model.layers[i].bias is not None:
            layer.bias.data = original_model.layers[i].bias.data.copy() if hasattr(original_model.layers[i].bias.data, 'copy') else np.array(original_model.layers[i].bias.data)

    # Apply structured pruning to each layer
    total_param_reduction = 0
    for i, layer in enumerate(model_structured.layers):
        if isinstance(layer, Dense):
            pruned_layer, struct_info = prune_layer_neurons(layer, keep_ratio=0.75)
            model_structured.layers[i] = pruned_layer
            total_param_reduction += struct_info['param_reduction']

    avg_param_reduction = total_param_reduction / len(model_structured.layers)
    structured_params = metrics.count_parameters(model_structured)
    structured_size = metrics.calculate_model_size(model_structured)

    results['structured_pruning'] = {
        'technique': 'Structured Pruning (75% neurons kept)',
        'parameters': structured_params['total_parameters'],
        'size_mb': structured_size['size_mb'],
        'compression_ratio': baseline_size['size_mb'] / structured_size['size_mb'],
        'memory_reduction': (baseline_size['size_mb'] - structured_size['size_mb']) / baseline_size['size_mb'],
        'param_reduction': avg_param_reduction
    }

    # Technique 4: Combined approach
    model_combined = Sequential([Dense(layer.input_size, layer.output_size) for layer in original_model.layers])
    for i, layer in enumerate(model_combined.layers):
        layer.weights.data = original_model.layers[i].weights.data.copy() if hasattr(original_model.layers[i].weights.data, 'copy') else np.array(original_model.layers[i].weights.data)
        if hasattr(layer, 'bias') and original_model.layers[i].bias is not None:
            layer.bias.data = original_model.layers[i].bias.data.copy() if hasattr(original_model.layers[i].bias.data, 'copy') else np.array(original_model.layers[i].bias.data)

    # Apply magnitude pruning + quantization + structured pruning
    for i, layer in enumerate(model_combined.layers):
        if isinstance(layer, Dense):
            # Step 1: Magnitude pruning
            _, _ = prune_weights_by_magnitude(layer, pruning_ratio=0.2)
            # Step 2: Quantization
            _, _ = quantize_layer_weights(layer, bits=8)
            # Step 3: Structured pruning
            pruned_layer, _ = prune_layer_neurons(layer, keep_ratio=0.8)
            model_combined.layers[i] = pruned_layer

    combined_params = metrics.count_parameters(model_combined)
    combined_size = metrics.calculate_model_size(model_combined, dtype='int8')

    results['combined'] = {
        'technique': 'Combined (Pruning + Quantization + Structured)',
        'parameters': combined_params['total_parameters'],
        'size_mb': combined_size['size_mb'],
        'compression_ratio': baseline_size['size_mb'] / combined_size['size_mb'],
        'memory_reduction': (baseline_size['size_mb'] - combined_size['size_mb']) / baseline_size['size_mb']
    }

    return results
    ### END SOLUTION