diff --git a/modules/source/16_quantization/quantization_dev.py b/modules/source/16_quantization/quantization_dev.py index a227050b..5bfc9874 100644 --- a/modules/source/16_quantization/quantization_dev.py +++ b/modules/source/16_quantization/quantization_dev.py @@ -416,6 +416,14 @@ def quantize_int8(tensor: Tensor) -> Tuple[Tensor, float, int]: 4. Apply quantization formula: round((value - zero_point) / scale) 5. Clamp to INT8 range [-128, 127] + Args: + tensor: Input FP32 tensor to quantize + + Returns: + q_tensor: Quantized INT8 tensor + scale: Scaling factor (float) + zero_point: Zero point offset (int) + EXAMPLE: >>> tensor = Tensor([[-1.0, 0.0, 2.0], [0.5, 1.5, -0.5]]) >>> q_tensor, scale, zero_point = quantize_int8(tensor) @@ -542,6 +550,14 @@ def dequantize_int8(q_tensor: Tensor, scale: float, zero_point: int) -> Tensor: 1. Apply inverse quantization: scale * quantized_value + zero_point * scale 2. Return as new FP32 Tensor + Args: + q_tensor: Quantized INT8 tensor + scale: Scaling factor from quantization + zero_point: Zero point offset from quantization + + Returns: + Reconstructed FP32 tensor + EXAMPLE: >>> q_tensor = Tensor([[-42, 0, 85]]) # INT8 values >>> scale, zero_point = 0.0314, 64 @@ -1049,6 +1065,13 @@ def quantize_model(model, calibration_data: Optional[List[Tensor]] = None) -> No 3. If calibration data provided, calibrate input quantization 4. Handle Sequential containers properly + Args: + model: Model to quantize (with .layers or similar structure) + calibration_data: Optional list of sample inputs for calibration + + Returns: + None (modifies model in-place) + EXAMPLE: >>> model = Sequential(Linear(10, 5), ReLU(), Linear(5, 2)) >>> quantize_model(model)