bugfix: display the parameter count correctly in mlx for ollama show (#14285)

2026-03-11 17:34:04 -05:00 · 2026-02-16 13:03:34 -08:00
parent 9b795698b8
commit 0d5da826d4
2 changed files with 236 additions and 1 deletions
--- a/x/server/show.go
+++ b/x/server/show.go
@@ -5,6 +5,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
+	"math"
 	"os"
 	"sort"
 	"strings"
@@ -58,7 +59,15 @@ func GetSafetensorsLLMInfo(name model.Name) (map[string]any, error) {
 		}
 	}

-	return buildModelInfo(config, totalBytes, tensorCount), nil
+	info := buildModelInfo(config, totalBytes, tensorCount)
+
+	// For quantized models, byte-based estimation can significantly undercount
+	// parameters. Prefer exact counting from tensor shapes in safetensors headers.
+	if paramCount, err := getParameterCountFromManifest(mf); err == nil && paramCount > 0 {
+		info["general.parameter_count"] = paramCount
+	}
+
+	return info, nil
 }

 // buildModelInfo constructs the model info map from config and tensor stats.
@@ -151,6 +160,51 @@ func buildModelInfo(config modelConfig, totalTensorBytes, tensorCount int64) map
 	return info
 }

+// getParameterCountFromManifest counts model parameters from tensor shapes.
+// This accounts for quantized tensors by using unpacked shapes from
+// getTensorInfoFromManifest.
+func getParameterCountFromManifest(mf *manifest.Manifest) (int64, error) {
+	tensors, err := getTensorInfoFromManifest(mf)
+	if err != nil {
+		return 0, err
+	}
+
+	var total int64
+	for _, tensor := range tensors {
+		if len(tensor.Shape) == 0 {
+			continue
+		}
+
+		elements := int64(1)
+		for _, dim := range tensor.Shape {
+			if dim == 0 {
+				elements = 0
+				break
+			}
+
+			if dim > uint64(math.MaxInt64) {
+				return 0, fmt.Errorf("tensor %s dimension too large: %d", tensor.Name, dim)
+			}
+
+			d := int64(dim)
+			if elements > math.MaxInt64/d {
+				return 0, fmt.Errorf("tensor %s element count overflow", tensor.Name)
+			}
+			elements *= d
+		}
+
+		if elements == 0 {
+			continue
+		}
+		if total > math.MaxInt64-elements {
+			return 0, fmt.Errorf("total parameter count overflow")
+		}
+		total += elements
+	}
+
+	return total, nil
+}
+
 // GetSafetensorsTensorInfo extracts tensor information from safetensors model layers.
 // Each tensor is stored as a minimal safetensors file with an 88-byte header containing metadata.
 func GetSafetensorsTensorInfo(name model.Name) ([]api.Tensor, error) {
--- a/x/server/show_test.go
+++ b/x/server/show_test.go
@@ -714,6 +714,187 @@ func TestGetTensorInfoFromManifest_Quantized(t *testing.T) {
 	}
 }

+func TestGetParameterCountFromManifest(t *testing.T) {
+	// Create a temp directory for blobs and set OLLAMA_MODELS
+	tempDir := t.TempDir()
+	t.Setenv("OLLAMA_MODELS", tempDir)
+
+	blobDir := filepath.Join(tempDir, "blobs")
+	if err := os.MkdirAll(blobDir, 0o755); err != nil {
+		t.Fatalf("failed to create blobs dir: %v", err)
+	}
+
+	// Unquantized tensor: [4,5] = 20 params
+	header1 := map[string]any{
+		"model.embed_tokens.weight": map[string]any{
+			"dtype":        "BF16",
+			"shape":        []int64{4, 5},
+			"data_offsets": []int64{0, 40},
+		},
+	}
+	header1JSON, _ := json.Marshal(header1)
+	var buf1 bytes.Buffer
+	binary.Write(&buf1, binary.LittleEndian, uint64(len(header1JSON)))
+	buf1.Write(header1JSON)
+
+	digest1 := "sha256:1111111111111111111111111111111111111111111111111111111111111111"
+	blobPath1, err := manifest.BlobsPath(digest1)
+	if err != nil {
+		t.Fatalf("failed to get blob path: %v", err)
+	}
+	if err := os.WriteFile(blobPath1, buf1.Bytes(), 0o644); err != nil {
+		t.Fatalf("failed to write blob1: %v", err)
+	}
+
+	// Quantized int4 tensor with packed shape [10,2] -> unpacked [10,16] = 160 params
+	header2 := map[string]any{
+		"__metadata__": map[string]string{
+			"quant_type": "int4",
+			"group_size": "32",
+		},
+		"model.layers.0.mlp.up_proj.weight": map[string]any{
+			"dtype":        "U32",
+			"shape":        []int64{10, 2},
+			"data_offsets": []int64{0, 80},
+		},
+		"model.layers.0.mlp.up_proj.weight.scale": map[string]any{
+			"dtype":        "BF16",
+			"shape":        []int64{10, 1},
+			"data_offsets": []int64{80, 100},
+		},
+		"model.layers.0.mlp.up_proj.weight.bias": map[string]any{
+			"dtype":        "BF16",
+			"shape":        []int64{10, 1},
+			"data_offsets": []int64{100, 120},
+		},
+	}
+	header2JSON, _ := json.Marshal(header2)
+	var buf2 bytes.Buffer
+	binary.Write(&buf2, binary.LittleEndian, uint64(len(header2JSON)))
+	buf2.Write(header2JSON)
+
+	digest2 := "sha256:2222222222222222222222222222222222222222222222222222222222222222"
+	blobPath2, err := manifest.BlobsPath(digest2)
+	if err != nil {
+		t.Fatalf("failed to get blob path: %v", err)
+	}
+	if err := os.WriteFile(blobPath2, buf2.Bytes(), 0o644); err != nil {
+		t.Fatalf("failed to write blob2: %v", err)
+	}
+
+	mf := &manifest.Manifest{
+		SchemaVersion: 2,
+		MediaType:     "application/vnd.docker.distribution.manifest.v2+json",
+		Layers: []manifest.Layer{
+			{
+				MediaType: manifest.MediaTypeImageTensor,
+				Digest:    digest1,
+				Size:      int64(buf1.Len() + 40),
+				Name:      "model.embed_tokens.weight",
+			},
+			{
+				MediaType: manifest.MediaTypeImageTensor,
+				Digest:    digest2,
+				Size:      int64(buf2.Len() + 120),
+				Name:      "model.layers.0.mlp.up_proj.weight",
+			},
+		},
+	}
+
+	paramCount, err := getParameterCountFromManifest(mf)
+	if err != nil {
+		t.Fatalf("getParameterCountFromManifest() error = %v", err)
+	}
+
+	const want int64 = 180 // 20 + 160
+	if paramCount != want {
+		t.Errorf("parameter_count = %d, want %d", paramCount, want)
+	}
+}
+
+func TestGetParameterCountFromManifest_MixedQuantizedPacked(t *testing.T) {
+	// Create a temp directory for blobs and set OLLAMA_MODELS
+	tempDir := t.TempDir()
+	t.Setenv("OLLAMA_MODELS", tempDir)
+
+	blobDir := filepath.Join(tempDir, "blobs")
+	if err := os.MkdirAll(blobDir, 0o755); err != nil {
+		t.Fatalf("failed to create blobs dir: %v", err)
+	}
+
+	// Packed mixed-precision blob (no global metadata):
+	// - gate_proj: int4 packed [5,8] + scale [5,2] => unpacked [5,64] = 320 params
+	// - down_proj: int8 packed [5,16] + scale [5,1] => unpacked [5,64] = 320 params
+	header := map[string]any{
+		"model.layers.0.mlp.experts.0.gate_proj.weight": map[string]any{
+			"dtype":        "U32",
+			"shape":        []int64{5, 8},
+			"data_offsets": []int64{0, 160},
+		},
+		"model.layers.0.mlp.experts.0.gate_proj.weight.scale": map[string]any{
+			"dtype":        "BF16",
+			"shape":        []int64{5, 2},
+			"data_offsets": []int64{160, 180},
+		},
+		"model.layers.0.mlp.experts.0.gate_proj.weight.bias": map[string]any{
+			"dtype":        "BF16",
+			"shape":        []int64{5, 2},
+			"data_offsets": []int64{180, 200},
+		},
+		"model.layers.0.mlp.experts.0.down_proj.weight": map[string]any{
+			"dtype":        "U32",
+			"shape":        []int64{5, 16},
+			"data_offsets": []int64{200, 520},
+		},
+		"model.layers.0.mlp.experts.0.down_proj.weight.scale": map[string]any{
+			"dtype":        "BF16",
+			"shape":        []int64{5, 1},
+			"data_offsets": []int64{520, 530},
+		},
+		"model.layers.0.mlp.experts.0.down_proj.weight.bias": map[string]any{
+			"dtype":        "BF16",
+			"shape":        []int64{5, 1},
+			"data_offsets": []int64{530, 540},
+		},
+	}
+	headerJSON, _ := json.Marshal(header)
+	var buf bytes.Buffer
+	binary.Write(&buf, binary.LittleEndian, uint64(len(headerJSON)))
+	buf.Write(headerJSON)
+
+	digest := "sha256:3333333333333333333333333333333333333333333333333333333333333333"
+	blobPath, err := manifest.BlobsPath(digest)
+	if err != nil {
+		t.Fatalf("failed to get blob path: %v", err)
+	}
+	if err := os.WriteFile(blobPath, buf.Bytes(), 0o644); err != nil {
+		t.Fatalf("failed to write blob: %v", err)
+	}
+
+	mf := &manifest.Manifest{
+		SchemaVersion: 2,
+		MediaType:     "application/vnd.docker.distribution.manifest.v2+json",
+		Layers: []manifest.Layer{
+			{
+				MediaType: manifest.MediaTypeImageTensor,
+				Digest:    digest,
+				Size:      int64(buf.Len() + 540),
+				Name:      "model.layers.0.mlp.experts",
+			},
+		},
+	}
+
+	paramCount, err := getParameterCountFromManifest(mf)
+	if err != nil {
+		t.Fatalf("getParameterCountFromManifest() error = %v", err)
+	}
+
+	const want int64 = 640 // 320 + 320
+	if paramCount != want {
+		t.Errorf("parameter_count = %d, want %d", paramCount, want)
+	}
+}
+
 func TestParseSafetensorsAllHeaders(t *testing.T) {
 	tests := []struct {
 		name       string