mirror of
https://github.com/MLSysBook/TinyTorch.git
synced 2026-03-11 18:33:34 -05:00
docs: add Args/Returns docstrings to quantization functions
This commit is contained in:
@@ -416,6 +416,14 @@ def quantize_int8(tensor: Tensor) -> Tuple[Tensor, float, int]:
|
||||
4. Apply quantization formula: round((value - zero_point) / scale)
|
||||
5. Clamp to INT8 range [-128, 127]
|
||||
|
||||
Args:
|
||||
tensor: Input FP32 tensor to quantize
|
||||
|
||||
Returns:
|
||||
q_tensor: Quantized INT8 tensor
|
||||
scale: Scaling factor (float)
|
||||
zero_point: Zero point offset (int)
|
||||
|
||||
EXAMPLE:
|
||||
>>> tensor = Tensor([[-1.0, 0.0, 2.0], [0.5, 1.5, -0.5]])
|
||||
>>> q_tensor, scale, zero_point = quantize_int8(tensor)
|
||||
@@ -542,6 +550,14 @@ def dequantize_int8(q_tensor: Tensor, scale: float, zero_point: int) -> Tensor:
|
||||
1. Apply inverse quantization: scale * quantized_value + zero_point * scale
|
||||
2. Return as new FP32 Tensor
|
||||
|
||||
Args:
|
||||
q_tensor: Quantized INT8 tensor
|
||||
scale: Scaling factor from quantization
|
||||
zero_point: Zero point offset from quantization
|
||||
|
||||
Returns:
|
||||
Reconstructed FP32 tensor
|
||||
|
||||
EXAMPLE:
|
||||
>>> q_tensor = Tensor([[-42, 0, 85]]) # INT8 values
|
||||
>>> scale, zero_point = 0.0314, 64
|
||||
@@ -1049,6 +1065,13 @@ def quantize_model(model, calibration_data: Optional[List[Tensor]] = None) -> No
|
||||
3. If calibration data provided, calibrate input quantization
|
||||
4. Handle Sequential containers properly
|
||||
|
||||
Args:
|
||||
model: Model to quantize (with .layers or similar structure)
|
||||
calibration_data: Optional list of sample inputs for calibration
|
||||
|
||||
Returns:
|
||||
None (modifies model in-place)
|
||||
|
||||
EXAMPLE:
|
||||
>>> model = Sequential(Linear(10, 5), ReLU(), Linear(5, 2))
|
||||
>>> quantize_model(model)
|
||||
|
||||
Reference in New Issue
Block a user