Fix MLPerf milestone metrics: FLOPs calculation, quantization compression ratio, pruning delta sign

- Fixed FLOPs calculation to handle models with .layers attribute (not just Sequential)
- Fixed quantization compression ratio to calculate theoretical INT8 size (1 byte per element)
- Fixed pruning accuracy delta sign to correctly show +/- direction
- Added missing export directives for Tensor and numpy imports in acceleration module

Results now correctly show:
- FLOPs: 4,736 (was incorrectly showing 64)
- Quantization: 4.0x compression (was incorrectly showing 1.0x)
- Pruning delta: correct +/- sign based on actual accuracy change
This commit is contained in:
Vijay Janapa Reddi
2025-12-03 09:36:10 -08:00
parent 93e536e90d
commit 43ea5f9a65
6 changed files with 15 additions and 7 deletions

View File

@@ -381,11 +381,12 @@ def main():
f"{sparsity_after:.1%}",
f"[green]{sparsity_after:.0%} weights zeroed[/green]"
)
prune_acc_delta = pruned_acc - baseline_acc
table.add_row(
"Accuracy",
f"{baseline_acc:.1f}%",
f"{pruned_acc:.1f}%",
f"[{'green' if abs(baseline_acc - pruned_acc) < 10 else 'yellow'}]{baseline_acc - pruned_acc:+.1f}%[/]"
f"[{'green' if prune_acc_delta >= 0 else 'red'}]{prune_acc_delta:+.1f}%[/]"
)
console.print(table)

View File

@@ -390,8 +390,8 @@ class Profiler:
total_flops = (output_h * output_w * kernel_h * kernel_w *
in_channels * out_channels * 2)
elif model_name == 'Sequential':
# Sequential model: sum FLOPs of all layers
elif model_name == 'Sequential' or hasattr(model, 'layers'):
# Sequential model or model with layers: sum FLOPs of all layers
current_shape = input_shape
for layer in model.layers:
layer_flops = self.count_flops(layer, current_shape)

View File

@@ -1743,14 +1743,15 @@ class Quantizer:
# Iterate through model parameters
# SimpleModel has .layers, each layer has .parameters() method
param_idx = 0
total_elements = 0
for layer in model.layers:
for param in layer.parameters():
param_size = param.data.nbytes
original_size += param_size
total_elements += param.data.size
# Quantize parameter
q_param, scale, zp = Quantizer.quantize_tensor(param)
quantized_size += q_param.data.nbytes
quantized_layers[f'param_{param_idx}'] = {
'quantized': q_param,
@@ -1760,6 +1761,10 @@ class Quantizer:
}
param_idx += 1
# Calculate theoretical quantized size: 1 byte per element for INT8
# (Note: Tensor class converts to float32 internally, but INT8 storage would be 1 byte)
quantized_size = total_elements # 1 byte per element
return {
'quantized_layers': quantized_layers,
'original_size_mb': original_size / MB_TO_BYTES,

View File

@@ -65,6 +65,7 @@ from tinytorch.nn.acceleration import vectorized_matmul, fused_gelu
"""
# %% nbgrader={"grade": false, "grade_id": "cell-imports-core", "solution": false}
#| export
import numpy as np
import time
from typing import Dict, List, Tuple, Optional, Any, Union
@@ -148,6 +149,7 @@ Real-world performance wins:
"""
# %% nbgrader={"grade": false, "grade_id": "tensor-import", "solution": true}
#| export
# Import from TinyTorch package (previous modules must be completed and exported)
from tinytorch.core.tensor import Tensor

View File

@@ -147,7 +147,7 @@ class Tensor:
new_shape = tuple(new_shape)
if np.prod(new_shape) != self.size:
raise ValueError(
f"Total elements must match: {self.size} {np.prod(new_shape)}"
f"Cannot reshape tensor of size {self.size} to shape {new_shape}"
)
reshaped_data = np.reshape(self.data, new_shape)
result = Tensor(reshaped_data, requires_grad=self.requires_grad)

View File

@@ -181,8 +181,8 @@ class Profiler:
total_flops = (output_h * output_w * kernel_h * kernel_w *
in_channels * out_channels * 2)
elif model_name == 'Sequential':
# Sequential model: sum FLOPs of all layers
elif model_name == 'Sequential' or hasattr(model, 'layers'):
# Sequential model or model with layers: sum FLOPs of all layers
current_shape = input_shape
for layer in model.layers:
layer_flops = self.count_flops(layer, current_shape)