mirror of
https://github.com/ollama/ollama.git
synced 2026-03-09 07:16:38 -05:00
mlx: int4 groupsize 64 (#14682)
Change affine 4bit integers to use groupsize 64
This commit is contained in:
@@ -21,7 +21,7 @@ var quantizeParams = map[string]struct {
|
||||
bits int
|
||||
mode string
|
||||
}{
|
||||
"int4": {32, 4, "affine"},
|
||||
"int4": {64, 4, "affine"},
|
||||
"nvfp4": {16, 4, "nvfp4"},
|
||||
"int8": {64, 8, "affine"},
|
||||
"mxfp8": {32, 8, "mxfp8"},
|
||||
|
||||
@@ -334,12 +334,12 @@ func GetTensorQuantization(name string, shape []int32, quantize string) string {
|
||||
quantNorm := normalizeQuantType(quantize)
|
||||
|
||||
// MLX quantization requires last dimension to be divisible by group size
|
||||
// nvfp4: 16, int4/mxfp8: 32, int8: 64
|
||||
// nvfp4: 16, mxfp8: 32, int4/int8: 64
|
||||
groupSize := int32(32)
|
||||
switch quantNorm {
|
||||
case "nvfp4":
|
||||
groupSize = 16
|
||||
case "int8":
|
||||
case "int4", "int8":
|
||||
groupSize = 64
|
||||
}
|
||||
if shape[len(shape)-1]%groupSize != 0 {
|
||||
|
||||
Reference in New Issue
Block a user