mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-03-11 18:33:34 -05:00
Regenerate tinytorch package from all module exports
- Run tito export --all to update all exported code - Fix file permissions (chmod u+w) to allow export writes - Update 12 modified files with latest module code - Add 3 new files (tinygpt, acceleration, compression) - All 21 modules successfully exported
This commit is contained in:
22
tinytorch/optimization/acceleration.py
generated
Normal file
22
tinytorch/optimization/acceleration.py
generated
Normal file
@@ -0,0 +1,22 @@
|
||||
# ╔═══════════════════════════════════════════════════════════════════════════════╗
|
||||
# ║ 🚨 CRITICAL WARNING 🚨 ║
|
||||
# ║ AUTOGENERATED! DO NOT EDIT! ║
|
||||
# ║ ║
|
||||
# ║ This file is AUTOMATICALLY GENERATED from source modules. ║
|
||||
# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║
|
||||
# ║ ║
|
||||
# ║ ✅ TO EDIT: modules/source/XX_acceleration/acceleration_dev.py ║
|
||||
# ║ ✅ TO EXPORT: Run 'tito module complete <module_name>' ║
|
||||
# ║ ║
|
||||
# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║
|
||||
# ║ Editing it directly may break module functionality and training. ║
|
||||
# ║ ║
|
||||
# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║
|
||||
# ║ happens! The tinytorch/ directory is just the compiled output. ║
|
||||
# ╚═══════════════════════════════════════════════════════════════════════════════╝
|
||||
# %% auto 0
|
||||
__all__ = []
|
||||
|
||||
# %% ../../modules/source/18_acceleration/acceleration_dev.ipynb 0
|
||||
#| default_exp optimization.acceleration
|
||||
#| export
|
||||
300
tinytorch/optimization/compression.py
generated
Normal file
300
tinytorch/optimization/compression.py
generated
Normal file
@@ -0,0 +1,300 @@
|
||||
# ╔═══════════════════════════════════════════════════════════════════════════════╗
|
||||
# ║ 🚨 CRITICAL WARNING 🚨 ║
|
||||
# ║ AUTOGENERATED! DO NOT EDIT! ║
|
||||
# ║ ║
|
||||
# ║ This file is AUTOMATICALLY GENERATED from source modules. ║
|
||||
# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║
|
||||
# ║ ║
|
||||
# ║ ✅ TO EDIT: modules/source/XX_compression/compression_dev.py ║
|
||||
# ║ ✅ TO EXPORT: Run 'tito module complete <module_name>' ║
|
||||
# ║ ║
|
||||
# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║
|
||||
# ║ Editing it directly may break module functionality and training. ║
|
||||
# ║ ║
|
||||
# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║
|
||||
# ║ happens! The tinytorch/ directory is just the compiled output. ║
|
||||
# ╚═══════════════════════════════════════════════════════════════════════════════╝
|
||||
# %% auto 0
|
||||
__all__ = ['Sequential', 'KnowledgeDistillation', 'test_unit_knowledge_distillation', 'CompressionComplete', 'measure_sparsity',
|
||||
'magnitude_prune', 'structured_prune', 'compress_model']
|
||||
|
||||
# %% ../../modules/source/17_compression/compression_dev.ipynb 1
|
||||
import numpy as np
|
||||
import copy
|
||||
from typing import List, Dict, Any, Tuple, Optional
|
||||
import time
|
||||
|
||||
# Import from TinyTorch modules
|
||||
from ..core.tensor import Tensor
|
||||
from ..core.layers import Linear
|
||||
|
||||
# Sequential container for model compression
|
||||
class Sequential:
|
||||
"""Sequential container for compression (not exported from core layers)."""
|
||||
def __init__(self, *layers):
|
||||
self.layers = list(layers)
|
||||
|
||||
def forward(self, x):
|
||||
for layer in self.layers:
|
||||
x = layer.forward(x) if hasattr(layer, 'forward') else layer(x)
|
||||
return x
|
||||
|
||||
def __call__(self, x):
|
||||
return self.forward(x)
|
||||
|
||||
def parameters(self):
|
||||
params = []
|
||||
for layer in self.layers:
|
||||
if hasattr(layer, 'parameters'):
|
||||
params.extend(layer.parameters())
|
||||
return params
|
||||
|
||||
# %% ../../modules/source/17_compression/compression_dev.ipynb 15
|
||||
class KnowledgeDistillation:
|
||||
"""
|
||||
Knowledge distillation for model compression.
|
||||
|
||||
Train a smaller student model to mimic a larger teacher model.
|
||||
"""
|
||||
|
||||
def __init__(self, teacher_model, student_model, temperature=3.0, alpha=0.7):
|
||||
"""
|
||||
Initialize knowledge distillation.
|
||||
|
||||
TODO: Set up teacher and student models with distillation parameters
|
||||
|
||||
APPROACH:
|
||||
1. Store teacher and student models
|
||||
2. Set temperature for softening probability distributions
|
||||
3. Set alpha for balancing hard vs soft targets
|
||||
|
||||
EXAMPLE:
|
||||
>>> teacher = Sequential(Linear(100, 200), Linear(200, 50))
|
||||
>>> student = Sequential(Linear(100, 50))
|
||||
>>> kd = KnowledgeDistillation(teacher, student, temperature=4.0, alpha=0.8)
|
||||
>>> print(f"Temperature: {kd.temperature}, Alpha: {kd.alpha}")
|
||||
Temperature: 4.0, Alpha: 0.8
|
||||
|
||||
HINTS:
|
||||
- Simply assign the parameters to instance variables
|
||||
- Temperature typically ranges from 3-5 for effective softening
|
||||
- Alpha of 0.7 means 70% soft targets, 30% hard targets
|
||||
|
||||
Args:
|
||||
teacher_model: Large, pre-trained model
|
||||
student_model: Smaller model to train
|
||||
temperature: Softening parameter for distributions
|
||||
alpha: Weight for soft target loss (1-alpha for hard targets)
|
||||
"""
|
||||
### BEGIN SOLUTION
|
||||
self.teacher_model = teacher_model
|
||||
self.student_model = student_model
|
||||
self.temperature = temperature
|
||||
self.alpha = alpha
|
||||
### END SOLUTION
|
||||
|
||||
def distillation_loss(self, student_logits, teacher_logits, true_labels):
|
||||
"""
|
||||
Calculate combined distillation loss.
|
||||
|
||||
TODO: Implement knowledge distillation loss function
|
||||
|
||||
APPROACH:
|
||||
1. Calculate hard target loss (student vs true labels)
|
||||
2. Calculate soft target loss (student vs teacher, with temperature)
|
||||
3. Combine losses: alpha * soft_loss + (1-alpha) * hard_loss
|
||||
|
||||
EXAMPLE:
|
||||
>>> kd = KnowledgeDistillation(teacher, student)
|
||||
>>> loss = kd.distillation_loss(student_out, teacher_out, labels)
|
||||
>>> print(f"Distillation loss: {loss:.4f}")
|
||||
|
||||
HINTS:
|
||||
- Use temperature to soften distributions: logits/temperature
|
||||
- Soft targets use KL divergence or cross-entropy
|
||||
- Hard targets use standard classification loss
|
||||
"""
|
||||
### BEGIN SOLUTION
|
||||
# Convert to numpy for this implementation
|
||||
if hasattr(student_logits, 'data'):
|
||||
student_logits = student_logits.data
|
||||
if hasattr(teacher_logits, 'data'):
|
||||
teacher_logits = teacher_logits.data
|
||||
if hasattr(true_labels, 'data'):
|
||||
true_labels = true_labels.data
|
||||
|
||||
# Soften distributions with temperature
|
||||
student_soft = self._softmax(student_logits / self.temperature)
|
||||
teacher_soft = self._softmax(teacher_logits / self.temperature)
|
||||
|
||||
# Soft target loss (KL divergence)
|
||||
soft_loss = self._kl_divergence(student_soft, teacher_soft)
|
||||
|
||||
# Hard target loss (cross-entropy)
|
||||
student_hard = self._softmax(student_logits)
|
||||
hard_loss = self._cross_entropy(student_hard, true_labels)
|
||||
|
||||
# Combined loss
|
||||
total_loss = self.alpha * soft_loss + (1 - self.alpha) * hard_loss
|
||||
|
||||
return total_loss
|
||||
### END SOLUTION
|
||||
|
||||
def _softmax(self, logits):
|
||||
"""Compute softmax with numerical stability."""
|
||||
exp_logits = np.exp(logits - np.max(logits, axis=-1, keepdims=True))
|
||||
return exp_logits / np.sum(exp_logits, axis=-1, keepdims=True)
|
||||
|
||||
def _kl_divergence(self, p, q):
|
||||
"""Compute KL divergence between distributions."""
|
||||
return np.sum(p * np.log(p / (q + 1e-8) + 1e-8))
|
||||
|
||||
def _cross_entropy(self, predictions, labels):
|
||||
"""Compute cross-entropy loss."""
|
||||
# Simple implementation for integer labels
|
||||
if labels.ndim == 1:
|
||||
return -np.mean(np.log(predictions[np.arange(len(labels)), labels] + 1e-8))
|
||||
else:
|
||||
return -np.mean(np.sum(labels * np.log(predictions + 1e-8), axis=1))
|
||||
|
||||
def test_unit_knowledge_distillation():
|
||||
"""🔬 Test knowledge distillation functionality."""
|
||||
print("🔬 Unit Test: Knowledge Distillation...")
|
||||
|
||||
# Create teacher and student models
|
||||
teacher = Sequential(Linear(10, 20), Linear(20, 5))
|
||||
student = Sequential(Linear(10, 5)) # Smaller model
|
||||
|
||||
# Initialize knowledge distillation
|
||||
kd = KnowledgeDistillation(teacher, student, temperature=3.0, alpha=0.7)
|
||||
|
||||
# Create dummy data
|
||||
input_data = Tensor(np.random.randn(8, 10)) # Batch of 8
|
||||
true_labels = np.array([0, 1, 2, 3, 4, 0, 1, 2]) # Class labels
|
||||
|
||||
# Forward passes
|
||||
teacher_output = teacher.forward(input_data)
|
||||
student_output = student.forward(input_data)
|
||||
|
||||
# Calculate distillation loss
|
||||
loss = kd.distillation_loss(student_output, teacher_output, true_labels)
|
||||
|
||||
# Verify loss is reasonable
|
||||
assert isinstance(loss, (float, np.floating)), f"Loss should be float, got {type(loss)}"
|
||||
assert loss > 0, f"Loss should be positive, got {loss}"
|
||||
assert not np.isnan(loss), "Loss should not be NaN"
|
||||
|
||||
print("✅ knowledge_distillation works correctly!")
|
||||
|
||||
test_unit_knowledge_distillation()
|
||||
|
||||
# %% ../../modules/source/17_compression/compression_dev.ipynb 29
|
||||
class CompressionComplete:
|
||||
"""
|
||||
Complete compression system for milestone use.
|
||||
|
||||
Provides pruning, distillation, and low-rank approximation techniques.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def measure_sparsity(model) -> float:
|
||||
"""Measure the sparsity of a model (fraction of zero weights)."""
|
||||
total_params = 0
|
||||
zero_params = 0
|
||||
|
||||
if hasattr(model, 'parameters'):
|
||||
for param in model.parameters():
|
||||
total_params += param.size
|
||||
zero_params += np.sum(param.data == 0)
|
||||
|
||||
return zero_params / total_params if total_params > 0 else 0.0
|
||||
|
||||
@staticmethod
|
||||
def magnitude_prune(model, sparsity=0.5):
|
||||
"""
|
||||
Prune model weights by magnitude (smallest weights set to zero).
|
||||
|
||||
Args:
|
||||
model: Model with parameters() method
|
||||
sparsity: Fraction of weights to prune (0-1)
|
||||
"""
|
||||
if hasattr(model, 'parameters'):
|
||||
for param in model.parameters():
|
||||
threshold = np.percentile(np.abs(param.data), sparsity * 100)
|
||||
param.data[np.abs(param.data) < threshold] = 0
|
||||
|
||||
return model
|
||||
|
||||
@staticmethod
|
||||
def structured_prune(model, prune_ratio=0.5):
|
||||
"""
|
||||
Prune entire neurons/channels (structured pruning).
|
||||
|
||||
Args:
|
||||
model: Model to prune
|
||||
prune_ratio: Fraction of structures to prune (0-1)
|
||||
"""
|
||||
if hasattr(model, 'parameters'):
|
||||
params = list(model.parameters())
|
||||
if len(params) > 0 and hasattr(params[0], 'data'):
|
||||
weight = params[0]
|
||||
if len(weight.shape) == 2: # Linear layer
|
||||
# Prune output neurons
|
||||
neuron_norms = np.linalg.norm(weight.data, axis=0)
|
||||
threshold = np.percentile(neuron_norms, prune_ratio * 100)
|
||||
mask = neuron_norms >= threshold
|
||||
weight.data[:, ~mask] = 0
|
||||
|
||||
return model
|
||||
|
||||
@staticmethod
|
||||
def compress_model(model, compression_config: Dict[str, Any]):
|
||||
"""
|
||||
Apply complete compression pipeline to a model.
|
||||
|
||||
Args:
|
||||
model: Model to compress
|
||||
compression_config: Dictionary with compression settings
|
||||
- 'magnitude_sparsity': float (0-1)
|
||||
- 'structured_prune_ratio': float (0-1)
|
||||
|
||||
Returns:
|
||||
Compressed model with sparsity stats
|
||||
"""
|
||||
stats = {
|
||||
'original_sparsity': CompressionComplete.measure_sparsity(model)
|
||||
}
|
||||
|
||||
# Apply magnitude pruning
|
||||
if 'magnitude_sparsity' in compression_config:
|
||||
model = CompressionComplete.magnitude_prune(
|
||||
model, compression_config['magnitude_sparsity']
|
||||
)
|
||||
|
||||
# Apply structured pruning
|
||||
if 'structured_prune_ratio' in compression_config:
|
||||
model = CompressionComplete.structured_prune(
|
||||
model, compression_config['structured_prune_ratio']
|
||||
)
|
||||
|
||||
stats['final_sparsity'] = CompressionComplete.measure_sparsity(model)
|
||||
stats['compression_ratio'] = 1.0 / (1.0 - stats['final_sparsity']) if stats['final_sparsity'] < 1.0 else float('inf')
|
||||
|
||||
return model, stats
|
||||
|
||||
# Convenience functions for backward compatibility
|
||||
def measure_sparsity(model) -> float:
|
||||
"""Measure model sparsity."""
|
||||
return CompressionComplete.measure_sparsity(model)
|
||||
|
||||
def magnitude_prune(model, sparsity=0.5):
|
||||
"""Apply magnitude-based pruning."""
|
||||
return CompressionComplete.magnitude_prune(model, sparsity)
|
||||
|
||||
def structured_prune(model, prune_ratio=0.5):
|
||||
"""Apply structured pruning."""
|
||||
return CompressionComplete.structured_prune(model, prune_ratio)
|
||||
|
||||
def compress_model(model, compression_config: Dict[str, Any]):
|
||||
"""Apply complete compression pipeline."""
|
||||
return CompressionComplete.compress_model(model, compression_config)
|
||||
95
tinytorch/optimization/quantization.py
generated
95
tinytorch/optimization/quantization.py
generated
@@ -15,9 +15,9 @@
|
||||
# ║ happens! The tinytorch/ directory is just the compiled output. ║
|
||||
# ╚═══════════════════════════════════════════════════════════════════════════════╝
|
||||
# %% auto 0
|
||||
__all__ = ['QuantizationComplete', 'quantize_int8', 'dequantize_int8', 'quantize_model']
|
||||
__all__ = []
|
||||
|
||||
# %% ../../modules/source/17_quantization/quantization_dev.ipynb 3
|
||||
# %% ../../modules/source/16_quantization/quantization_dev.ipynb 3
|
||||
import numpy as np
|
||||
import time
|
||||
from typing import Tuple, Dict, List, Optional
|
||||
@@ -29,94 +29,3 @@ from ..core.layers import Linear
|
||||
from ..core.activations import ReLU
|
||||
|
||||
print("✅ Quantization module imports complete")
|
||||
|
||||
# %% ../../modules/source/17_quantization/quantization_dev.ipynb 34
|
||||
class QuantizationComplete:
|
||||
"""
|
||||
Complete quantization system for milestone use.
|
||||
|
||||
Provides INT8 quantization with calibration for 4× memory reduction.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def quantize_tensor(tensor: Tensor) -> Tuple[Tensor, float, int]:
|
||||
"""Quantize FP32 tensor to INT8."""
|
||||
data = tensor.data
|
||||
min_val = float(np.min(data))
|
||||
max_val = float(np.max(data))
|
||||
|
||||
if abs(max_val - min_val) < 1e-8:
|
||||
return Tensor(np.zeros_like(data, dtype=np.int8)), 1.0, 0
|
||||
|
||||
scale = (max_val - min_val) / 255.0
|
||||
zero_point = int(np.round(-128 - min_val / scale))
|
||||
zero_point = int(np.clip(zero_point, -128, 127))
|
||||
|
||||
quantized_data = np.round(data / scale + zero_point)
|
||||
quantized_data = np.clip(quantized_data, -128, 127).astype(np.int8)
|
||||
|
||||
return Tensor(quantized_data), scale, zero_point
|
||||
|
||||
@staticmethod
|
||||
def dequantize_tensor(q_tensor: Tensor, scale: float, zero_point: int) -> Tensor:
|
||||
"""Dequantize INT8 tensor back to FP32."""
|
||||
dequantized_data = (q_tensor.data.astype(np.float32) - zero_point) * scale
|
||||
return Tensor(dequantized_data)
|
||||
|
||||
@staticmethod
|
||||
def quantize_model(model, calibration_data: Optional[List[Tensor]] = None) -> Dict[str, any]:
|
||||
"""
|
||||
Quantize all Linear layers in a model.
|
||||
|
||||
Returns dictionary with quantization info and memory savings.
|
||||
"""
|
||||
quantized_layers = {}
|
||||
original_size = 0
|
||||
quantized_size = 0
|
||||
|
||||
# Iterate through model parameters
|
||||
if hasattr(model, 'parameters'):
|
||||
for i, param in enumerate(model.parameters()):
|
||||
param_size = param.data.nbytes
|
||||
original_size += param_size
|
||||
|
||||
# Quantize parameter
|
||||
q_param, scale, zp = QuantizationComplete.quantize_tensor(param)
|
||||
quantized_size += q_param.data.nbytes
|
||||
|
||||
quantized_layers[f'param_{i}'] = {
|
||||
'quantized': q_param,
|
||||
'scale': scale,
|
||||
'zero_point': zp,
|
||||
'original_shape': param.data.shape
|
||||
}
|
||||
|
||||
return {
|
||||
'quantized_layers': quantized_layers,
|
||||
'original_size_mb': original_size / (1024 * 1024),
|
||||
'quantized_size_mb': quantized_size / (1024 * 1024),
|
||||
'compression_ratio': original_size / quantized_size if quantized_size > 0 else 1.0
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def compare_models(original_model, quantized_info: Dict) -> Dict[str, float]:
|
||||
"""Compare memory usage between original and quantized models."""
|
||||
return {
|
||||
'original_mb': quantized_info['original_size_mb'],
|
||||
'quantized_mb': quantized_info['quantized_size_mb'],
|
||||
'compression_ratio': quantized_info['compression_ratio'],
|
||||
'memory_saved_mb': quantized_info['original_size_mb'] - quantized_info['quantized_size_mb']
|
||||
}
|
||||
|
||||
# Convenience functions for backward compatibility
|
||||
def quantize_int8(tensor: Tensor) -> Tuple[Tensor, float, int]:
|
||||
"""Quantize FP32 tensor to INT8."""
|
||||
return QuantizationComplete.quantize_tensor(tensor)
|
||||
|
||||
def dequantize_int8(q_tensor: Tensor, scale: float, zero_point: int) -> Tensor:
|
||||
"""Dequantize INT8 tensor back to FP32."""
|
||||
return QuantizationComplete.dequantize_tensor(q_tensor, scale, zero_point)
|
||||
|
||||
def quantize_model(model, calibration_data: Optional[List[Tensor]] = None) -> Dict[str, any]:
|
||||
"""Quantize entire model to INT8."""
|
||||
return QuantizationComplete.quantize_model(model, calibration_data)
|
||||
|
||||
Reference in New Issue
Block a user