mirror of
https://github.com/ollama/ollama.git
synced 2026-03-09 07:16:38 -05:00
mlx: don't default to affine quantization for unquantized models
Otherwise the BF16 version of models trigger segfaults when they call into quantized kernels.
This commit is contained in:
@@ -37,9 +37,11 @@ func QuantizationParams(quantization string) (groupSize, bits int, mode string)
|
||||
case "MXFP8":
|
||||
// Microsoft MX FP8: group_size=32, bits=8, E4M3 scales (no qbias)
|
||||
return 32, 8, "mxfp8"
|
||||
case "FP8", "Q8", "INT8", "":
|
||||
case "FP8", "Q8", "INT8":
|
||||
// 8-bit quantization with affine mode (default for quantized models)
|
||||
return 64, 8, "affine"
|
||||
case "":
|
||||
return 0, 0, ""
|
||||
default:
|
||||
return 32, 8, "affine" // Default to affine
|
||||
}
|
||||
|
||||
@@ -17,8 +17,10 @@ func QuantizationParams(quantization string) (groupSize, bits int, mode string)
|
||||
return 32, 4, "affine"
|
||||
case "MXFP8":
|
||||
return 32, 8, "mxfp8"
|
||||
case "FP8", "Q8", "INT8", "":
|
||||
case "FP8", "Q8", "INT8":
|
||||
return 64, 8, "affine"
|
||||
case "":
|
||||
return 0, 0, ""
|
||||
default:
|
||||
return 32, 8, "affine"
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user