diff --git a/x/imagegen/safetensors/loader.go b/x/imagegen/safetensors/loader.go index d0426a2ef..fbd443e05 100644 --- a/x/imagegen/safetensors/loader.go +++ b/x/imagegen/safetensors/loader.go @@ -37,9 +37,11 @@ func QuantizationParams(quantization string) (groupSize, bits int, mode string) case "MXFP8": // Microsoft MX FP8: group_size=32, bits=8, E4M3 scales (no qbias) return 32, 8, "mxfp8" - case "FP8", "Q8", "INT8", "": + case "FP8", "Q8", "INT8": // 8-bit quantization with affine mode (default for quantized models) return 64, 8, "affine" + case "": + return 0, 0, "" default: return 32, 8, "affine" // Default to affine } diff --git a/x/mlxrunner/model/quant.go b/x/mlxrunner/model/quant.go index 3a17ab485..10896e4b4 100644 --- a/x/mlxrunner/model/quant.go +++ b/x/mlxrunner/model/quant.go @@ -17,8 +17,10 @@ func QuantizationParams(quantization string) (groupSize, bits int, mode string) return 32, 4, "affine" case "MXFP8": return 32, 8, "mxfp8" - case "FP8", "Q8", "INT8", "": + case "FP8", "Q8", "INT8": return 64, 8, "affine" + case "": + return 0, 0, "" default: return 32, 8, "affine" }