Update tinytorch and tito with module exports

Re-exported all modules after restructuring:
- Updated _modidx.py with new module locations
- Removed outdated autogeneration headers
- Updated all core modules (tensor, autograd, layers, etc.)
- Updated optimization modules (quantization, compression, etc.)
- Updated TITO commands for new structure

Changes include:
- 24 tinytorch/ module files
- 24 tito/ command and core files
- Updated references from modules/source/ to modules/

All modules re-exported via nbdev from their new locations.
This commit is contained in:
Vijay Janapa Reddi
2025-11-10 19:42:03 -05:00
parent 9fdfa4317c
commit 41b132f55f
48 changed files with 681 additions and 2035 deletions

View File

@@ -1,21 +1,7 @@
# ╔═══════════════════════════════════════════════════════════════════════════════╗
# ║ 🚨 CRITICAL WARNING 🚨 ║
# ║ AUTOGENERATED! DO NOT EDIT! ║
# ║ ║
# ║ This file is AUTOMATICALLY GENERATED from source modules. ║
# ║ ANY CHANGES MADE HERE WILL BE LOST when modules are re-exported! ║
# ║ ║
# ║ ✅ TO EDIT: modules/source/XX_quantization/quantization_dev.py ║
# ║ ✅ TO EXPORT: Run 'tito module complete <module_name>' ║
# ║ ║
# ║ 🛡️ STUDENT PROTECTION: This file contains optimized implementations. ║
# ║ Editing it directly may break module functionality and training. ║
# ║ ║
# ║ 🎓 LEARNING TIP: Work in modules/source/ - that's where real development ║
# ║ happens! The tinytorch/ directory is just the compiled output. ║
# ╚═══════════════════════════════════════════════════════════════════════════════╝
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../modules/source/16_quantization/quantization_dev.ipynb.
# %% auto 0
__all__ = []
__all__ = ['QuantizationComplete', 'quantize_int8', 'dequantize_int8', 'quantize_model']
# %% ../../modules/source/16_quantization/quantization_dev.ipynb 3
import numpy as np
@@ -29,3 +15,94 @@ from ..core.layers import Linear
from ..core.activations import ReLU
print("✅ Quantization module imports complete")
# %% ../../modules/source/16_quantization/quantization_dev.ipynb 34
class QuantizationComplete:
"""
Complete quantization system for milestone use.
Provides INT8 quantization with calibration for 4× memory reduction.
"""
@staticmethod
def quantize_tensor(tensor: Tensor) -> Tuple[Tensor, float, int]:
"""Quantize FP32 tensor to INT8."""
data = tensor.data
min_val = float(np.min(data))
max_val = float(np.max(data))
if abs(max_val - min_val) < 1e-8:
return Tensor(np.zeros_like(data, dtype=np.int8)), 1.0, 0
scale = (max_val - min_val) / 255.0
zero_point = int(np.round(-128 - min_val / scale))
zero_point = int(np.clip(zero_point, -128, 127))
quantized_data = np.round(data / scale + zero_point)
quantized_data = np.clip(quantized_data, -128, 127).astype(np.int8)
return Tensor(quantized_data), scale, zero_point
@staticmethod
def dequantize_tensor(q_tensor: Tensor, scale: float, zero_point: int) -> Tensor:
"""Dequantize INT8 tensor back to FP32."""
dequantized_data = (q_tensor.data.astype(np.float32) - zero_point) * scale
return Tensor(dequantized_data)
@staticmethod
def quantize_model(model, calibration_data: Optional[List[Tensor]] = None) -> Dict[str, any]:
"""
Quantize all Linear layers in a model.
Returns dictionary with quantization info and memory savings.
"""
quantized_layers = {}
original_size = 0
quantized_size = 0
# Iterate through model parameters
if hasattr(model, 'parameters'):
for i, param in enumerate(model.parameters()):
param_size = param.data.nbytes
original_size += param_size
# Quantize parameter
q_param, scale, zp = QuantizationComplete.quantize_tensor(param)
quantized_size += q_param.data.nbytes
quantized_layers[f'param_{i}'] = {
'quantized': q_param,
'scale': scale,
'zero_point': zp,
'original_shape': param.data.shape
}
return {
'quantized_layers': quantized_layers,
'original_size_mb': original_size / (1024 * 1024),
'quantized_size_mb': quantized_size / (1024 * 1024),
'compression_ratio': original_size / quantized_size if quantized_size > 0 else 1.0
}
@staticmethod
def compare_models(original_model, quantized_info: Dict) -> Dict[str, float]:
"""Compare memory usage between original and quantized models."""
return {
'original_mb': quantized_info['original_size_mb'],
'quantized_mb': quantized_info['quantized_size_mb'],
'compression_ratio': quantized_info['compression_ratio'],
'memory_saved_mb': quantized_info['original_size_mb'] - quantized_info['quantized_size_mb']
}
# Convenience functions for backward compatibility
def quantize_int8(tensor: Tensor) -> Tuple[Tensor, float, int]:
"""Quantize FP32 tensor to INT8."""
return QuantizationComplete.quantize_tensor(tensor)
def dequantize_int8(q_tensor: Tensor, scale: float, zero_point: int) -> Tensor:
"""Dequantize INT8 tensor back to FP32."""
return QuantizationComplete.dequantize_tensor(q_tensor, scale, zero_point)
def quantize_model(model, calibration_data: Optional[List[Tensor]] = None) -> Dict[str, any]:
"""Quantize entire model to INT8."""
return QuantizationComplete.quantize_model(model, calibration_data)