bugfix: display the parameter count correctly in mlx for ollama show (#14285)

This commit is contained in:
Patrick Devine
2026-02-16 13:03:34 -08:00
committed by GitHub
parent 9b795698b8
commit 0d5da826d4
2 changed files with 236 additions and 1 deletions

View File

@@ -5,6 +5,7 @@ import (
"encoding/json"
"fmt"
"io"
"math"
"os"
"sort"
"strings"
@@ -58,7 +59,15 @@ func GetSafetensorsLLMInfo(name model.Name) (map[string]any, error) {
}
}
return buildModelInfo(config, totalBytes, tensorCount), nil
info := buildModelInfo(config, totalBytes, tensorCount)
// For quantized models, byte-based estimation can significantly undercount
// parameters. Prefer exact counting from tensor shapes in safetensors headers.
if paramCount, err := getParameterCountFromManifest(mf); err == nil && paramCount > 0 {
info["general.parameter_count"] = paramCount
}
return info, nil
}
// buildModelInfo constructs the model info map from config and tensor stats.
@@ -151,6 +160,51 @@ func buildModelInfo(config modelConfig, totalTensorBytes, tensorCount int64) map
return info
}
// getParameterCountFromManifest counts model parameters from tensor shapes.
// This accounts for quantized tensors by using unpacked shapes from
// getTensorInfoFromManifest.
func getParameterCountFromManifest(mf *manifest.Manifest) (int64, error) {
tensors, err := getTensorInfoFromManifest(mf)
if err != nil {
return 0, err
}
var total int64
for _, tensor := range tensors {
if len(tensor.Shape) == 0 {
continue
}
elements := int64(1)
for _, dim := range tensor.Shape {
if dim == 0 {
elements = 0
break
}
if dim > uint64(math.MaxInt64) {
return 0, fmt.Errorf("tensor %s dimension too large: %d", tensor.Name, dim)
}
d := int64(dim)
if elements > math.MaxInt64/d {
return 0, fmt.Errorf("tensor %s element count overflow", tensor.Name)
}
elements *= d
}
if elements == 0 {
continue
}
if total > math.MaxInt64-elements {
return 0, fmt.Errorf("total parameter count overflow")
}
total += elements
}
return total, nil
}
// GetSafetensorsTensorInfo extracts tensor information from safetensors model layers.
// Each tensor is stored as a minimal safetensors file with an 88-byte header containing metadata.
func GetSafetensorsTensorInfo(name model.Name) ([]api.Tensor, error) {

View File

@@ -714,6 +714,187 @@ func TestGetTensorInfoFromManifest_Quantized(t *testing.T) {
}
}
func TestGetParameterCountFromManifest(t *testing.T) {
// Create a temp directory for blobs and set OLLAMA_MODELS
tempDir := t.TempDir()
t.Setenv("OLLAMA_MODELS", tempDir)
blobDir := filepath.Join(tempDir, "blobs")
if err := os.MkdirAll(blobDir, 0o755); err != nil {
t.Fatalf("failed to create blobs dir: %v", err)
}
// Unquantized tensor: [4,5] = 20 params
header1 := map[string]any{
"model.embed_tokens.weight": map[string]any{
"dtype": "BF16",
"shape": []int64{4, 5},
"data_offsets": []int64{0, 40},
},
}
header1JSON, _ := json.Marshal(header1)
var buf1 bytes.Buffer
binary.Write(&buf1, binary.LittleEndian, uint64(len(header1JSON)))
buf1.Write(header1JSON)
digest1 := "sha256:1111111111111111111111111111111111111111111111111111111111111111"
blobPath1, err := manifest.BlobsPath(digest1)
if err != nil {
t.Fatalf("failed to get blob path: %v", err)
}
if err := os.WriteFile(blobPath1, buf1.Bytes(), 0o644); err != nil {
t.Fatalf("failed to write blob1: %v", err)
}
// Quantized int4 tensor with packed shape [10,2] -> unpacked [10,16] = 160 params
header2 := map[string]any{
"__metadata__": map[string]string{
"quant_type": "int4",
"group_size": "32",
},
"model.layers.0.mlp.up_proj.weight": map[string]any{
"dtype": "U32",
"shape": []int64{10, 2},
"data_offsets": []int64{0, 80},
},
"model.layers.0.mlp.up_proj.weight.scale": map[string]any{
"dtype": "BF16",
"shape": []int64{10, 1},
"data_offsets": []int64{80, 100},
},
"model.layers.0.mlp.up_proj.weight.bias": map[string]any{
"dtype": "BF16",
"shape": []int64{10, 1},
"data_offsets": []int64{100, 120},
},
}
header2JSON, _ := json.Marshal(header2)
var buf2 bytes.Buffer
binary.Write(&buf2, binary.LittleEndian, uint64(len(header2JSON)))
buf2.Write(header2JSON)
digest2 := "sha256:2222222222222222222222222222222222222222222222222222222222222222"
blobPath2, err := manifest.BlobsPath(digest2)
if err != nil {
t.Fatalf("failed to get blob path: %v", err)
}
if err := os.WriteFile(blobPath2, buf2.Bytes(), 0o644); err != nil {
t.Fatalf("failed to write blob2: %v", err)
}
mf := &manifest.Manifest{
SchemaVersion: 2,
MediaType: "application/vnd.docker.distribution.manifest.v2+json",
Layers: []manifest.Layer{
{
MediaType: manifest.MediaTypeImageTensor,
Digest: digest1,
Size: int64(buf1.Len() + 40),
Name: "model.embed_tokens.weight",
},
{
MediaType: manifest.MediaTypeImageTensor,
Digest: digest2,
Size: int64(buf2.Len() + 120),
Name: "model.layers.0.mlp.up_proj.weight",
},
},
}
paramCount, err := getParameterCountFromManifest(mf)
if err != nil {
t.Fatalf("getParameterCountFromManifest() error = %v", err)
}
const want int64 = 180 // 20 + 160
if paramCount != want {
t.Errorf("parameter_count = %d, want %d", paramCount, want)
}
}
func TestGetParameterCountFromManifest_MixedQuantizedPacked(t *testing.T) {
// Create a temp directory for blobs and set OLLAMA_MODELS
tempDir := t.TempDir()
t.Setenv("OLLAMA_MODELS", tempDir)
blobDir := filepath.Join(tempDir, "blobs")
if err := os.MkdirAll(blobDir, 0o755); err != nil {
t.Fatalf("failed to create blobs dir: %v", err)
}
// Packed mixed-precision blob (no global metadata):
// - gate_proj: int4 packed [5,8] + scale [5,2] => unpacked [5,64] = 320 params
// - down_proj: int8 packed [5,16] + scale [5,1] => unpacked [5,64] = 320 params
header := map[string]any{
"model.layers.0.mlp.experts.0.gate_proj.weight": map[string]any{
"dtype": "U32",
"shape": []int64{5, 8},
"data_offsets": []int64{0, 160},
},
"model.layers.0.mlp.experts.0.gate_proj.weight.scale": map[string]any{
"dtype": "BF16",
"shape": []int64{5, 2},
"data_offsets": []int64{160, 180},
},
"model.layers.0.mlp.experts.0.gate_proj.weight.bias": map[string]any{
"dtype": "BF16",
"shape": []int64{5, 2},
"data_offsets": []int64{180, 200},
},
"model.layers.0.mlp.experts.0.down_proj.weight": map[string]any{
"dtype": "U32",
"shape": []int64{5, 16},
"data_offsets": []int64{200, 520},
},
"model.layers.0.mlp.experts.0.down_proj.weight.scale": map[string]any{
"dtype": "BF16",
"shape": []int64{5, 1},
"data_offsets": []int64{520, 530},
},
"model.layers.0.mlp.experts.0.down_proj.weight.bias": map[string]any{
"dtype": "BF16",
"shape": []int64{5, 1},
"data_offsets": []int64{530, 540},
},
}
headerJSON, _ := json.Marshal(header)
var buf bytes.Buffer
binary.Write(&buf, binary.LittleEndian, uint64(len(headerJSON)))
buf.Write(headerJSON)
digest := "sha256:3333333333333333333333333333333333333333333333333333333333333333"
blobPath, err := manifest.BlobsPath(digest)
if err != nil {
t.Fatalf("failed to get blob path: %v", err)
}
if err := os.WriteFile(blobPath, buf.Bytes(), 0o644); err != nil {
t.Fatalf("failed to write blob: %v", err)
}
mf := &manifest.Manifest{
SchemaVersion: 2,
MediaType: "application/vnd.docker.distribution.manifest.v2+json",
Layers: []manifest.Layer{
{
MediaType: manifest.MediaTypeImageTensor,
Digest: digest,
Size: int64(buf.Len() + 540),
Name: "model.layers.0.mlp.experts",
},
},
}
paramCount, err := getParameterCountFromManifest(mf)
if err != nil {
t.Fatalf("getParameterCountFromManifest() error = %v", err)
}
const want int64 = 640 // 320 + 320
if paramCount != want {
t.Errorf("parameter_count = %d, want %d", paramCount, want)
}
}
func TestParseSafetensorsAllHeaders(t *testing.T) {
tests := []struct {
name string