ollama-ollama/x/server/show_test.go

package server

import (
	"bytes"
	"encoding/binary"
	"encoding/json"
	"os"
	"path/filepath"
	"testing"

	"github.com/ollama/ollama/manifest"
	"github.com/ollama/ollama/types/model"
)

func TestBuildModelInfo(t *testing.T) {
	tests := []struct {
		name             string
		config           modelConfig
		totalTensorBytes int64
		tensorCount      int64
		wantArch         string
		wantContextLen   int
		wantEmbedLen     int
		wantBlockCount   int
		wantParamCount   int64
	}{
		{
			name: "gemma3 model with model_type",
			config: modelConfig{
				ModelType:             "gemma3",
				HiddenSize:            2560,
				NumHiddenLayers:       34,
				MaxPositionEmbeddings: 131072,
				IntermediateSize:      10240,
				NumAttentionHeads:     8,
				NumKeyValueHeads:      4,
				VocabSize:             262144,
				TorchDtype:            "bfloat16",
			},
			totalTensorBytes: 8_600_000_150, // ~4.3B params * 2 bytes + 150 bytes header
			tensorCount:      1,
			wantArch:         "gemma3",
			wantContextLen:   131072,
			wantEmbedLen:     2560,
			wantBlockCount:   34,
			wantParamCount:   4_300_000_000,
		},
		{
			name: "llama model with architectures array",
			config: modelConfig{
				Architectures:         []string{"LlamaForCausalLM"},
				HiddenSize:            4096,
				NumHiddenLayers:       32,
				MaxPositionEmbeddings: 4096,
				IntermediateSize:      11008,
				NumAttentionHeads:     32,
				NumKeyValueHeads:      32,
				VocabSize:             32000,
				TorchDtype:            "float16",
			},
			totalTensorBytes: 14_000_000_150, // ~7B params * 2 bytes + 150 bytes header
			tensorCount:      1,
			wantArch:         "llama",
			wantContextLen:   4096,
			wantEmbedLen:     4096,
			wantBlockCount:   32,
			wantParamCount:   7_000_000_000,
		},
		{
			name: "multimodal model with text_config",
			config: modelConfig{
				Architectures: []string{"Gemma3ForConditionalGeneration"},
				HiddenSize:    1152, // vision hidden size
				TextConfig: &struct {
					HiddenSize            int `json:"hidden_size"`
					MaxPositionEmbeddings int `json:"max_position_embeddings"`
					NumHiddenLayers       int `json:"num_hidden_layers"`
				}{
					HiddenSize:            2560,
					MaxPositionEmbeddings: 131072,
					NumHiddenLayers:       34,
				},
				NumAttentionHeads: 8,
				NumKeyValueHeads:  4,
				VocabSize:         262144,
				TorchDtype:        "bfloat16",
			},
			totalTensorBytes: 8_600_000_150,
			tensorCount:      1,
			wantArch:         "gemma3",
			wantContextLen:   131072,
			wantEmbedLen:     2560,
			wantBlockCount:   34,
			wantParamCount:   4_300_000_000,
		},
		{
			name: "float32 model",
			config: modelConfig{
				ModelType:             "test",
				HiddenSize:            512,
				NumHiddenLayers:       6,
				MaxPositionEmbeddings: 2048,
				TorchDtype:            "float32",
			},
			totalTensorBytes: 400_000_150, // 100M params * 4 bytes + 150 bytes header
			tensorCount:      1,
			wantArch:         "test",
			wantContextLen:   2048,
			wantEmbedLen:     512,
			wantBlockCount:   6,
			wantParamCount:   100_000_000,
		},
		{
			name: "multiple tensors with header overhead",
			config: modelConfig{
				ModelType:             "test",
				HiddenSize:            256,
				NumHiddenLayers:       4,
				MaxPositionEmbeddings: 1024,
				TorchDtype:            "bfloat16",
			},
			totalTensorBytes: 2_001_500, // 1M params * 2 bytes + 10 tensors * 150 bytes
			tensorCount:      10,
			wantArch:         "test",
			wantContextLen:   1024,
			wantEmbedLen:     256,
			wantBlockCount:   4,
			wantParamCount:   1_000_000,
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			info := buildModelInfo(tt.config, tt.totalTensorBytes, tt.tensorCount)

			// Check architecture
			if arch, ok := info["general.architecture"].(string); !ok || arch != tt.wantArch {
				t.Errorf("architecture = %v, want %v", info["general.architecture"], tt.wantArch)
			}

			// Check context length
			contextKey := tt.wantArch + ".context_length"
			if contextLen, ok := info[contextKey].(int); !ok || contextLen != tt.wantContextLen {
				t.Errorf("context_length = %v, want %v", info[contextKey], tt.wantContextLen)
			}

			// Check embedding length
			embedKey := tt.wantArch + ".embedding_length"
			if embedLen, ok := info[embedKey].(int); !ok || embedLen != tt.wantEmbedLen {
				t.Errorf("embedding_length = %v, want %v", info[embedKey], tt.wantEmbedLen)
			}

			// Check block count
			blockKey := tt.wantArch + ".block_count"
			if blockCount, ok := info[blockKey].(int); !ok || blockCount != tt.wantBlockCount {
				t.Errorf("block_count = %v, want %v", info[blockKey], tt.wantBlockCount)
			}

			// Check parameter count
			if paramCount, ok := info["general.parameter_count"].(int64); !ok || paramCount != tt.wantParamCount {
				t.Errorf("parameter_count = %v, want %v", info["general.parameter_count"], tt.wantParamCount)
			}
		})
	}
}

func TestBuildModelInfo_ArchitectureConversion(t *testing.T) {
	tests := []struct {
		name          string
		architectures []string
		modelType     string
		wantArch      string
	}{
		{
			name:          "LlamaForCausalLM",
			architectures: []string{"LlamaForCausalLM"},
			wantArch:      "llama",
		},
		{
			name:          "Gemma3ForCausalLM",
			architectures: []string{"Gemma3ForCausalLM"},
			wantArch:      "gemma3",
		},
		{
			name:          "Gemma3ForConditionalGeneration",
			architectures: []string{"Gemma3ForConditionalGeneration"},
			wantArch:      "gemma3",
		},
		{
			name:          "Qwen2ForCausalLM",
			architectures: []string{"Qwen2ForCausalLM"},
			wantArch:      "qwen2",
		},
		{
			name:          "model_type takes precedence",
			architectures: []string{"LlamaForCausalLM"},
			modelType:     "custom",
			wantArch:      "custom",
		},
		{
			name:          "empty architectures with model_type",
			architectures: nil,
			modelType:     "mymodel",
			wantArch:      "mymodel",
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			config := modelConfig{
				Architectures: tt.architectures,
				ModelType:     tt.modelType,
			}
			info := buildModelInfo(config, 0, 0)

			if arch, ok := info["general.architecture"].(string); !ok || arch != tt.wantArch {
				t.Errorf("architecture = %v, want %v", info["general.architecture"], tt.wantArch)
			}
		})
	}
}

func TestBuildModelInfo_BytesPerParam(t *testing.T) {
	tests := []struct {
		name           string
		dtype          string
		totalBytes     int64
		tensorCount    int64
		wantParamCount int64
	}{
		{
			name:           "bfloat16",
			dtype:          "bfloat16",
			totalBytes:     2_000_150, // 1M * 2 + 150
			tensorCount:    1,
			wantParamCount: 1_000_000,
		},
		{
			name:           "float16",
			dtype:          "float16",
			totalBytes:     2_000_150,
			tensorCount:    1,
			wantParamCount: 1_000_000,
		},
		{
			name:           "float32",
			dtype:          "float32",
			totalBytes:     4_000_150, // 1M * 4 + 150
			tensorCount:    1,
			wantParamCount: 1_000_000,
		},
		{
			name:           "int8",
			dtype:          "int8",
			totalBytes:     1_000_150, // 1M * 1 + 150
			tensorCount:    1,
			wantParamCount: 1_000_000,
		},
		{
			name:           "unknown dtype defaults to 2 bytes",
			dtype:          "unknown",
			totalBytes:     2_000_150,
			tensorCount:    1,
			wantParamCount: 1_000_000,
		},
		{
			name:           "empty dtype defaults to 2 bytes",
			dtype:          "",
			totalBytes:     2_000_150,
			tensorCount:    1,
			wantParamCount: 1_000_000,
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			config := modelConfig{
				ModelType:  "test",
				TorchDtype: tt.dtype,
			}
			info := buildModelInfo(config, tt.totalBytes, tt.tensorCount)

			if paramCount, ok := info["general.parameter_count"].(int64); !ok || paramCount != tt.wantParamCount {
				t.Errorf("parameter_count = %v, want %v", info["general.parameter_count"], tt.wantParamCount)
			}
		})
	}
}

func TestParseSafetensorsAllHeaders_Errors(t *testing.T) {
	tests := []struct {
		name    string
		data    []byte
		wantErr string
	}{
		{
			name:    "empty data",
			data:    []byte{},
			wantErr: "failed to read header size",
		},
		{
			name:    "truncated header size",
			data:    []byte{0x01, 0x02, 0x03},
			wantErr: "failed to read header size",
		},
		{
			name: "header size too large",
			data: func() []byte {
				var buf bytes.Buffer
				binary.Write(&buf, binary.LittleEndian, uint64(200*1024*1024)) // 200 MiB
				return buf.Bytes()
			}(),
			wantErr: "header size too large",
		},
		{
			name: "truncated header",
			data: func() []byte {
				var buf bytes.Buffer
				binary.Write(&buf, binary.LittleEndian, uint64(100))
				buf.Write([]byte("short"))
				return buf.Bytes()
			}(),
			wantErr: "failed to read header",
		},
		{
			name: "invalid JSON",
			data: func() []byte {
				var buf bytes.Buffer
				binary.Write(&buf, binary.LittleEndian, uint64(10))
				buf.Write([]byte("not json!!"))
				return buf.Bytes()
			}(),
			wantErr: "failed to parse header",
		},
		{
			name: "no tensors in header",
			data: func() []byte {
				header := map[string]any{
					"__metadata__": map[string]any{"format": "pt"},
				}
				headerJSON, _ := json.Marshal(header)
				var buf bytes.Buffer
				binary.Write(&buf, binary.LittleEndian, uint64(len(headerJSON)))
				buf.Write(headerJSON)
				return buf.Bytes()
			}(),
			wantErr: "no tensor found in header",
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			_, err := parseSafetensorsAllHeaders(bytes.NewReader(tt.data))
			if err == nil {
				t.Error("expected error, got nil")
				return
			}
			if !bytes.Contains([]byte(err.Error()), []byte(tt.wantErr)) {
				t.Errorf("error = %v, want error containing %v", err, tt.wantErr)
			}
		})
	}
}

func TestGetTensorInfoFromManifest(t *testing.T) {
	// Create a temp directory for blobs and set OLLAMA_MODELS
	tempDir := t.TempDir()
	t.Setenv("OLLAMA_MODELS", tempDir)

	blobDir := filepath.Join(tempDir, "blobs")
	if err := os.MkdirAll(blobDir, 0o755); err != nil {
		t.Fatalf("failed to create blobs dir: %v", err)
	}

	// Create test tensor blobs with __metadata__
	tensors := []struct {
		name   string
		digest string
		dtype  string
		shape  []int64
	}{
		{
			name:   "model.embed_tokens.weight",
			digest: "sha256:abc123abc123abc123abc123abc123abc123abc123abc123abc123abc123abc0",
			dtype:  "BF16",
			shape:  []int64{262144, 2560},
		},
		{
			name:   "model.layers.0.self_attn.q_proj.weight",
			digest: "sha256:def456def456def456def456def456def456def456def456def456def456def0",
			dtype:  "BF16",
			shape:  []int64{2560, 2560},
		},
		{
			name:   "model.norm.weight",
			digest: "sha256:789789789789789789789789789789789789789789789789789789789789abc0",
			dtype:  "F32",
			shape:  []int64{2560},
		},
	}

	// Create blob files with tensor keyed by name
	var layers []manifest.Layer
	for _, tensor := range tensors {
		header := map[string]any{
			tensor.name: map[string]any{
				"dtype":        tensor.dtype,
				"shape":        tensor.shape,
				"data_offsets": []int64{0, 1000},
			},
		}
		headerJSON, _ := json.Marshal(header)

		var buf bytes.Buffer
		binary.Write(&buf, binary.LittleEndian, uint64(len(headerJSON)))
		buf.Write(headerJSON)

		// Write blob file using the digest format expected by GetBlobsPath
		blobPath, err := manifest.BlobsPath(tensor.digest)
		if err != nil {
			t.Fatalf("failed to get blob path: %v", err)
		}
		if err := os.WriteFile(blobPath, buf.Bytes(), 0o644); err != nil {
			t.Fatalf("failed to write blob: %v", err)
		}

		layers = append(layers, manifest.Layer{
			MediaType: manifest.MediaTypeImageTensor,
			Digest:    tensor.digest,
			Size:      int64(buf.Len() + 1000), // header + fake data
			Name:      tensor.name,
		})
	}

	// Add a non-tensor layer (should be skipped)
	layers = append(layers, manifest.Layer{
		MediaType: "application/vnd.ollama.image.json",
		Digest:    "sha256:0000000000000000000000000000000000000000000000000000000000000000",
		Size:      100,
		Name:      "config.json",
	})

	mf := &manifest.Manifest{
		SchemaVersion: 2,
		MediaType:     "application/vnd.docker.distribution.manifest.v2+json",
		Layers:        layers,
	}

	result, err := getTensorInfoFromManifest(mf)
	if err != nil {
		t.Fatalf("getTensorInfoFromManifest() error = %v", err)
	}

	if len(result) != 3 {
		t.Errorf("got %d tensors, want 3", len(result))
	}

	// Verify each tensor
	for i, tensor := range tensors {
		if i >= len(result) {
			break
		}
		if result[i].Name != tensor.name {
			t.Errorf("tensor[%d].Name = %v, want %v", i, result[i].Name, tensor.name)
		}
		if result[i].Type != tensor.dtype {
			t.Errorf("tensor[%d].Type = %v, want %v", i, result[i].Type, tensor.dtype)
		}
		if len(result[i].Shape) != len(tensor.shape) {
			t.Errorf("tensor[%d].Shape length = %v, want %v", i, len(result[i].Shape), len(tensor.shape))
		}
	}
}

func TestGetTensorInfoFromManifest_Quantized(t *testing.T) {
	// Create a temp directory for blobs and set OLLAMA_MODELS
	tempDir := t.TempDir()
	t.Setenv("OLLAMA_MODELS", tempDir)

	blobDir := filepath.Join(tempDir, "blobs")
	if err := os.MkdirAll(blobDir, 0o755); err != nil {
		t.Fatalf("failed to create blobs dir: %v", err)
	}

	// Create a combined quantized blob with __metadata__
	header := map[string]any{
		"__metadata__": map[string]string{
			"quant_type": "int4",
			"group_size": "32",
		},
		"model.layers.0.mlp.up_proj.weight": map[string]any{
			"dtype":        "U32",
			"shape":        []int64{2560, 320}, // packed: 2560 / 8 = 320
			"data_offsets": []int64{0, 3276800},
		},
		"model.layers.0.mlp.up_proj.weight.scale": map[string]any{
			"dtype":        "BF16",
			"shape":        []int64{2560, 80}, // 2560 / 32 = 80
			"data_offsets": []int64{3276800, 3686400},
		},
		"model.layers.0.mlp.up_proj.weight.bias": map[string]any{
			"dtype":        "BF16",
			"shape":        []int64{2560, 80},
			"data_offsets": []int64{3686400, 4096000},
		},
	}
	headerJSON, _ := json.Marshal(header)

	var buf bytes.Buffer
	binary.Write(&buf, binary.LittleEndian, uint64(len(headerJSON)))
	buf.Write(headerJSON)

	digest := "sha256:aabb11aabb11aabb11aabb11aabb11aabb11aabb11aabb11aabb11aabb11aabb"
	blobPath, err := manifest.BlobsPath(digest)
	if err != nil {
		t.Fatalf("failed to get blob path: %v", err)
	}
	if err := os.WriteFile(blobPath, buf.Bytes(), 0o644); err != nil {
		t.Fatalf("failed to write blob: %v", err)
	}

	mf := &manifest.Manifest{
		SchemaVersion: 2,
		MediaType:     "application/vnd.docker.distribution.manifest.v2+json",
		Layers: []manifest.Layer{
			{
				MediaType: manifest.MediaTypeImageTensor,
				Digest:    digest,
				Size:      int64(buf.Len() + 4096000),
				Name:      "model.layers.0.mlp.up_proj.weight",
			},
		},
	}

	result, err := getTensorInfoFromManifest(mf)
	if err != nil {
		t.Fatalf("getTensorInfoFromManifest() error = %v", err)
	}

	if len(result) != 1 {
		t.Fatalf("got %d tensors, want 1", len(result))
	}

	tensor := result[0]
	if tensor.Name != "model.layers.0.mlp.up_proj.weight" {
		t.Errorf("Name = %v, want model.layers.0.mlp.up_proj.weight", tensor.Name)
	}
	if tensor.Type != "int4" {
		t.Errorf("Type = %v, want int4", tensor.Type)
	}
	// Shape should be unpacked: 320 * 8 = 2560
	if len(tensor.Shape) != 2 || tensor.Shape[0] != 2560 || tensor.Shape[1] != 2560 {
		t.Errorf("Shape = %v, want [2560, 2560]", tensor.Shape)
	}
}

func TestGetParameterCountFromManifest(t *testing.T) {
	// Create a temp directory for blobs and set OLLAMA_MODELS
	tempDir := t.TempDir()
	t.Setenv("OLLAMA_MODELS", tempDir)

	blobDir := filepath.Join(tempDir, "blobs")
	if err := os.MkdirAll(blobDir, 0o755); err != nil {
		t.Fatalf("failed to create blobs dir: %v", err)
	}

	// Unquantized tensor: [4,5] = 20 params
	header1 := map[string]any{
		"model.embed_tokens.weight": map[string]any{
			"dtype":        "BF16",
			"shape":        []int64{4, 5},
			"data_offsets": []int64{0, 40},
		},
	}
	header1JSON, _ := json.Marshal(header1)
	var buf1 bytes.Buffer
	binary.Write(&buf1, binary.LittleEndian, uint64(len(header1JSON)))
	buf1.Write(header1JSON)

	digest1 := "sha256:1111111111111111111111111111111111111111111111111111111111111111"
	blobPath1, err := manifest.BlobsPath(digest1)
	if err != nil {
		t.Fatalf("failed to get blob path: %v", err)
	}
	if err := os.WriteFile(blobPath1, buf1.Bytes(), 0o644); err != nil {
		t.Fatalf("failed to write blob1: %v", err)
	}

	// Quantized int4 tensor with packed shape [10,2] -> unpacked [10,16] = 160 params
	header2 := map[string]any{
		"__metadata__": map[string]string{
			"quant_type": "int4",
			"group_size": "32",
		},
		"model.layers.0.mlp.up_proj.weight": map[string]any{
			"dtype":        "U32",
			"shape":        []int64{10, 2},
			"data_offsets": []int64{0, 80},
		},
		"model.layers.0.mlp.up_proj.weight.scale": map[string]any{
			"dtype":        "BF16",
			"shape":        []int64{10, 1},
			"data_offsets": []int64{80, 100},
		},
		"model.layers.0.mlp.up_proj.weight.bias": map[string]any{
			"dtype":        "BF16",
			"shape":        []int64{10, 1},
			"data_offsets": []int64{100, 120},
		},
	}
	header2JSON, _ := json.Marshal(header2)
	var buf2 bytes.Buffer
	binary.Write(&buf2, binary.LittleEndian, uint64(len(header2JSON)))
	buf2.Write(header2JSON)

	digest2 := "sha256:2222222222222222222222222222222222222222222222222222222222222222"
	blobPath2, err := manifest.BlobsPath(digest2)
	if err != nil {
		t.Fatalf("failed to get blob path: %v", err)
	}
	if err := os.WriteFile(blobPath2, buf2.Bytes(), 0o644); err != nil {
		t.Fatalf("failed to write blob2: %v", err)
	}

	mf := &manifest.Manifest{
		SchemaVersion: 2,
		MediaType:     "application/vnd.docker.distribution.manifest.v2+json",
		Layers: []manifest.Layer{
			{
				MediaType: manifest.MediaTypeImageTensor,
				Digest:    digest1,
				Size:      int64(buf1.Len() + 40),
				Name:      "model.embed_tokens.weight",
			},
			{
				MediaType: manifest.MediaTypeImageTensor,
				Digest:    digest2,
				Size:      int64(buf2.Len() + 120),
				Name:      "model.layers.0.mlp.up_proj.weight",
			},
		},
	}

	paramCount, err := getParameterCountFromManifest(mf)
	if err != nil {
		t.Fatalf("getParameterCountFromManifest() error = %v", err)
	}

	const want int64 = 180 // 20 + 160
	if paramCount != want {
		t.Errorf("parameter_count = %d, want %d", paramCount, want)
	}
}

func TestGetParameterCountFromManifest_MixedQuantizedPacked(t *testing.T) {
	// Create a temp directory for blobs and set OLLAMA_MODELS
	tempDir := t.TempDir()
	t.Setenv("OLLAMA_MODELS", tempDir)

	blobDir := filepath.Join(tempDir, "blobs")
	if err := os.MkdirAll(blobDir, 0o755); err != nil {
		t.Fatalf("failed to create blobs dir: %v", err)
	}

	// Packed mixed-precision blob (no global metadata):
	// - gate_proj: int4 packed [5,8] + scale [5,2] => unpacked [5,64] = 320 params
	// - down_proj: int8 packed [5,16] + scale [5,1] => unpacked [5,64] = 320 params
	header := map[string]any{
		"model.layers.0.mlp.experts.0.gate_proj.weight": map[string]any{
			"dtype":        "U32",
			"shape":        []int64{5, 8},
			"data_offsets": []int64{0, 160},
		},
		"model.layers.0.mlp.experts.0.gate_proj.weight.scale": map[string]any{
			"dtype":        "BF16",
			"shape":        []int64{5, 2},
			"data_offsets": []int64{160, 180},
		},
		"model.layers.0.mlp.experts.0.gate_proj.weight.bias": map[string]any{
			"dtype":        "BF16",
			"shape":        []int64{5, 2},
			"data_offsets": []int64{180, 200},
		},
		"model.layers.0.mlp.experts.0.down_proj.weight": map[string]any{
			"dtype":        "U32",
			"shape":        []int64{5, 16},
			"data_offsets": []int64{200, 520},
		},
		"model.layers.0.mlp.experts.0.down_proj.weight.scale": map[string]any{
			"dtype":        "BF16",
			"shape":        []int64{5, 1},
			"data_offsets": []int64{520, 530},
		},
		"model.layers.0.mlp.experts.0.down_proj.weight.bias": map[string]any{
			"dtype":        "BF16",
			"shape":        []int64{5, 1},
			"data_offsets": []int64{530, 540},
		},
	}
	headerJSON, _ := json.Marshal(header)
	var buf bytes.Buffer
	binary.Write(&buf, binary.LittleEndian, uint64(len(headerJSON)))
	buf.Write(headerJSON)

	digest := "sha256:3333333333333333333333333333333333333333333333333333333333333333"
	blobPath, err := manifest.BlobsPath(digest)
	if err != nil {
		t.Fatalf("failed to get blob path: %v", err)
	}
	if err := os.WriteFile(blobPath, buf.Bytes(), 0o644); err != nil {
		t.Fatalf("failed to write blob: %v", err)
	}

	mf := &manifest.Manifest{
		SchemaVersion: 2,
		MediaType:     "application/vnd.docker.distribution.manifest.v2+json",
		Layers: []manifest.Layer{
			{
				MediaType: manifest.MediaTypeImageTensor,
				Digest:    digest,
				Size:      int64(buf.Len() + 540),
				Name:      "model.layers.0.mlp.experts",
			},
		},
	}

	paramCount, err := getParameterCountFromManifest(mf)
	if err != nil {
		t.Fatalf("getParameterCountFromManifest() error = %v", err)
	}

	const want int64 = 640 // 320 + 320
	if paramCount != want {
		t.Errorf("parameter_count = %d, want %d", paramCount, want)
	}
}

func TestParseSafetensorsAllHeaders(t *testing.T) {
	tests := []struct {
		name       string
		header     map[string]any
		wantCount  int
		wantNames  []string
		wantDtypes []string
		wantQuants []string
		wantErr    bool
	}{
		{
			name: "single tensor blob",
			header: map[string]any{
				"model.layers.0.weight": map[string]any{
					"dtype":        "BF16",
					"shape":        []int64{2560, 2560},
					"data_offsets": []int64{0, 13107200},
				},
			},
			wantCount:  1,
			wantNames:  []string{"model.layers.0.weight"},
			wantDtypes: []string{"BF16"},
			wantQuants: []string{""},
		},
		{
			name: "packed unquantized blob",
			header: map[string]any{
				"model.layers.0.mlp.experts.0.down_proj.weight": map[string]any{
					"dtype":        "BF16",
					"shape":        []int64{2560, 10240},
					"data_offsets": []int64{0, 52428800},
				},
				"model.layers.0.mlp.experts.0.gate_proj.weight": map[string]any{
					"dtype":        "BF16",
					"shape":        []int64{10240, 2560},
					"data_offsets": []int64{52428800, 104857600},
				},
				"model.layers.0.mlp.experts.0.up_proj.weight": map[string]any{
					"dtype":        "BF16",
					"shape":        []int64{10240, 2560},
					"data_offsets": []int64{104857600, 157286400},
				},
			},
			wantCount: 3,
			wantNames: []string{
				"model.layers.0.mlp.experts.0.down_proj.weight",
				"model.layers.0.mlp.experts.0.gate_proj.weight",
				"model.layers.0.mlp.experts.0.up_proj.weight",
			},
			wantDtypes: []string{"BF16", "BF16", "BF16"},
			wantQuants: []string{"", "", ""},
		},
		{
			name: "packed quantized blob with global metadata",
			header: map[string]any{
				"__metadata__": map[string]any{
					"quant_type": "int4",
					"group_size": "32",
				},
				"model.layers.0.mlp.experts.0.gate_proj.weight": map[string]any{
					"dtype":        "U32",
					"shape":        []int64{10240, 320},
					"data_offsets": []int64{0, 13107200},
				},
				"model.layers.0.mlp.experts.0.gate_proj.weight.scale": map[string]any{
					"dtype":        "BF16",
					"shape":        []int64{10240, 80},
					"data_offsets": []int64{13107200, 14745600},
				},
				"model.layers.0.mlp.experts.0.gate_proj.weight.bias": map[string]any{
					"dtype":        "BF16",
					"shape":        []int64{10240, 80},
					"data_offsets": []int64{14745600, 16384000},
				},
				"model.layers.0.mlp.experts.0.up_proj.weight": map[string]any{
					"dtype":        "U32",
					"shape":        []int64{10240, 320},
					"data_offsets": []int64{16384000, 29491200},
				},
				"model.layers.0.mlp.experts.0.up_proj.weight.scale": map[string]any{
					"dtype":        "BF16",
					"shape":        []int64{10240, 80},
					"data_offsets": []int64{29491200, 31129600},
				},
				"model.layers.0.mlp.experts.0.up_proj.weight.bias": map[string]any{
					"dtype":        "BF16",
					"shape":        []int64{10240, 80},
					"data_offsets": []int64{31129600, 32768000},
				},
			},
			wantCount: 2,
			wantNames: []string{
				"model.layers.0.mlp.experts.0.gate_proj.weight",
				"model.layers.0.mlp.experts.0.up_proj.weight",
			},
			wantDtypes: []string{"U32", "U32"},
			wantQuants: []string{"int4", "int4"},
		},
		{
			name: "packed mixed-precision blob (no global metadata)",
			header: map[string]any{
				"model.layers.0.mlp.experts.0.gate_proj.weight": map[string]any{
					"dtype":        "U32",
					"shape":        []int64{10240, 320},
					"data_offsets": []int64{0, 13107200},
				},
				"model.layers.0.mlp.experts.0.gate_proj.weight.scale": map[string]any{
					"dtype":        "BF16",
					"shape":        []int64{10240, 80},
					"data_offsets": []int64{13107200, 14745600},
				},
				"model.layers.0.mlp.experts.0.gate_proj.weight.bias": map[string]any{
					"dtype":        "BF16",
					"shape":        []int64{10240, 80},
					"data_offsets": []int64{14745600, 16384000},
				},
				"model.layers.0.mlp.experts.0.down_proj.weight": map[string]any{
					"dtype":        "U32",
					"shape":        []int64{2560, 2560},
					"data_offsets": []int64{16384000, 42598400},
				},
				"model.layers.0.mlp.experts.0.down_proj.weight.scale": map[string]any{
					"dtype":        "BF16",
					"shape":        []int64{2560, 160},
					"data_offsets": []int64{42598400, 43417600},
				},
			},
			wantCount: 2,
			wantNames: []string{
				"model.layers.0.mlp.experts.0.down_proj.weight",
				"model.layers.0.mlp.experts.0.gate_proj.weight",
			},
			wantDtypes: []string{"U32", "U32"},
			wantQuants: []string{"int8", "int4"},
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			headerJSON, err := json.Marshal(tt.header)
			if err != nil {
				t.Fatalf("failed to marshal header: %v", err)
			}

			var buf bytes.Buffer
			if err := binary.Write(&buf, binary.LittleEndian, uint64(len(headerJSON))); err != nil {
				t.Fatalf("failed to write header size: %v", err)
			}
			buf.Write(headerJSON)

			results, err := parseSafetensorsAllHeaders(&buf)
			if (err != nil) != tt.wantErr {
				t.Errorf("parseSafetensorsAllHeaders() error = %v, wantErr %v", err, tt.wantErr)
				return
			}
			if tt.wantErr {
				return
			}

			if len(results) != tt.wantCount {
				t.Fatalf("got %d tensors, want %d", len(results), tt.wantCount)
			}

			for i, info := range results {
				if info.Name != tt.wantNames[i] {
					t.Errorf("tensor[%d].Name = %v, want %v", i, info.Name, tt.wantNames[i])
				}
				if info.Dtype != tt.wantDtypes[i] {
					t.Errorf("tensor[%d].Dtype = %v, want %v", i, info.Dtype, tt.wantDtypes[i])
				}
				if info.QuantType != tt.wantQuants[i] {
					t.Errorf("tensor[%d].QuantType = %v, want %v", i, info.QuantType, tt.wantQuants[i])
				}
			}
		})
	}
}

func TestGetTensorInfoFromManifest_Packed(t *testing.T) {
	// Create a temp directory for blobs and set OLLAMA_MODELS
	tempDir := t.TempDir()
	t.Setenv("OLLAMA_MODELS", tempDir)

	blobDir := filepath.Join(tempDir, "blobs")
	if err := os.MkdirAll(blobDir, 0o755); err != nil {
		t.Fatalf("failed to create blobs dir: %v", err)
	}

	// Create a packed blob with multiple expert tensors (mixed quantization)
	header := map[string]any{
		"model.layers.0.mlp.experts.0.gate_proj.weight": map[string]any{
			"dtype":        "U32",
			"shape":        []int64{10240, 320},
			"data_offsets": []int64{0, 13107200},
		},
		"model.layers.0.mlp.experts.0.gate_proj.weight.scale": map[string]any{
			"dtype":        "BF16",
			"shape":        []int64{10240, 80},
			"data_offsets": []int64{13107200, 14745600},
		},
		"model.layers.0.mlp.experts.0.gate_proj.weight.bias": map[string]any{
			"dtype":        "BF16",
			"shape":        []int64{10240, 80},
			"data_offsets": []int64{14745600, 16384000},
		},
		"model.layers.0.mlp.experts.0.down_proj.weight": map[string]any{
			"dtype":        "U32",
			"shape":        []int64{2560, 2560},
			"data_offsets": []int64{16384000, 42598400},
		},
		"model.layers.0.mlp.experts.0.down_proj.weight.scale": map[string]any{
			"dtype":        "BF16",
			"shape":        []int64{2560, 160},
			"data_offsets": []int64{42598400, 43417600},
		},
	}
	headerJSON, _ := json.Marshal(header)

	var buf bytes.Buffer
	binary.Write(&buf, binary.LittleEndian, uint64(len(headerJSON)))
	buf.Write(headerJSON)

	packedDigest := "sha256:aaaa000000000000000000000000000000000000000000000000000000000001"
	blobPath, err := manifest.BlobsPath(packedDigest)
	if err != nil {
		t.Fatalf("failed to get blob path: %v", err)
	}
	if err := os.WriteFile(blobPath, buf.Bytes(), 0o644); err != nil {
		t.Fatalf("failed to write packed blob: %v", err)
	}

	// Also create a regular (single-tensor) blob
	singleHeader := map[string]any{
		"model.embed_tokens.weight": map[string]any{
			"dtype":        "BF16",
			"shape":        []int64{262144, 2560},
			"data_offsets": []int64{0, 1342177280},
		},
	}
	singleHeaderJSON, _ := json.Marshal(singleHeader)
	var singleBuf bytes.Buffer
	binary.Write(&singleBuf, binary.LittleEndian, uint64(len(singleHeaderJSON)))
	singleBuf.Write(singleHeaderJSON)

	singleDigest := "sha256:bbbb000000000000000000000000000000000000000000000000000000000002"
	singleBlobPath, err := manifest.BlobsPath(singleDigest)
	if err != nil {
		t.Fatalf("failed to get blob path: %v", err)
	}
	if err := os.WriteFile(singleBlobPath, singleBuf.Bytes(), 0o644); err != nil {
		t.Fatalf("failed to write single blob: %v", err)
	}

	mf := &manifest.Manifest{
		SchemaVersion: 2,
		MediaType:     "application/vnd.docker.distribution.manifest.v2+json",
		Layers: []manifest.Layer{
			{
				MediaType: manifest.MediaTypeImageTensor,
				Digest:    singleDigest,
				Size:      int64(singleBuf.Len()),
				Name:      "model.embed_tokens.weight",
			},
			{
				MediaType: manifest.MediaTypeImageTensor,
				Digest:    packedDigest,
				Size:      int64(buf.Len()),
				Name:      "model.layers.0.mlp.experts", // group prefix
			},
		},
	}

	result, err := getTensorInfoFromManifest(mf)
	if err != nil {
		t.Fatalf("getTensorInfoFromManifest() error = %v", err)
	}

	// Should have 3 tensors: 1 single + 2 packed main tensors
	if len(result) != 3 {
		t.Fatalf("got %d tensors, want 3. Tensors: %v", len(result), result)
	}

	// First tensor should be the single blob
	if result[0].Name != "model.embed_tokens.weight" {
		t.Errorf("tensor[0].Name = %v, want model.embed_tokens.weight", result[0].Name)
	}
	if result[0].Type != "BF16" {
		t.Errorf("tensor[0].Type = %v, want BF16", result[0].Type)
	}

	// Packed tensors should have their actual names (sorted)
	packedNames := make(map[string]bool)
	for _, r := range result[1:] {
		packedNames[r.Name] = true
	}
	if !packedNames["model.layers.0.mlp.experts.0.down_proj.weight"] {
		t.Error("missing packed tensor: model.layers.0.mlp.experts.0.down_proj.weight")
	}
	if !packedNames["model.layers.0.mlp.experts.0.gate_proj.weight"] {
		t.Error("missing packed tensor: model.layers.0.mlp.experts.0.gate_proj.weight")
	}

	packedTypes := make(map[string]string)
	for _, r := range result[1:] {
		packedTypes[r.Name] = r.Type
	}
	if packedTypes["model.layers.0.mlp.experts.0.down_proj.weight"] != "int8" {
		t.Errorf("down_proj.Type = %v, want int8", packedTypes["model.layers.0.mlp.experts.0.down_proj.weight"])
	}
	if packedTypes["model.layers.0.mlp.experts.0.gate_proj.weight"] != "int4" {
		t.Errorf("gate_proj.Type = %v, want int4", packedTypes["model.layers.0.mlp.experts.0.gate_proj.weight"])
	}
}

func TestGetSafetensorsDtypeScansPastUnquantizedFirstBlob(t *testing.T) {
	t.Setenv("OLLAMA_MODELS", t.TempDir())

	writeSafetensorsLayer := func(t *testing.T, header map[string]any, name string) manifest.Layer {
		t.Helper()

		headerJSON, err := json.Marshal(header)
		if err != nil {
			t.Fatalf("failed to marshal header: %v", err)
		}

		var buf bytes.Buffer
		if err := binary.Write(&buf, binary.LittleEndian, uint64(len(headerJSON))); err != nil {
			t.Fatalf("failed to write header size: %v", err)
		}
		buf.Write(headerJSON)

		layer, err := manifest.NewLayer(&buf, manifest.MediaTypeImageTensor)
		if err != nil {
			t.Fatalf("failed to create tensor layer: %v", err)
		}
		layer.Name = name
		return layer
	}

	configData, err := json.Marshal(map[string]any{
		"model_format": "safetensors",
	})
	if err != nil {
		t.Fatalf("failed to marshal config: %v", err)
	}
	configLayer, err := manifest.NewLayer(bytes.NewReader(configData), "application/vnd.docker.container.image.v1+json")
	if err != nil {
		t.Fatalf("failed to create config layer: %v", err)
	}

	unquantized := writeSafetensorsLayer(t, map[string]any{
		"model.embed_tokens.weight": map[string]any{
			"dtype":        "BF16",
			"shape":        []int64{16, 8},
			"data_offsets": []int64{0, 256},
		},
	}, "model.embed_tokens.weight")

	quantized := writeSafetensorsLayer(t, map[string]any{
		"__metadata__": map[string]string{
			"quant_type": "mxfp8",
			"group_size": "32",
		},
		"model.layers.0.mlp.down_proj.weight": map[string]any{
			"dtype":        "U32",
			"shape":        []int64{16, 4},
			"data_offsets": []int64{0, 256},
		},
		"model.layers.0.mlp.down_proj.weight.scale": map[string]any{
			"dtype":        "BF16",
			"shape":        []int64{16, 1},
			"data_offsets": []int64{256, 288},
		},
	}, "model.layers.0.mlp.down_proj.weight")

	name := model.ParseName("mixed-fp8-safetensors")
	if err := manifest.WriteManifest(name, configLayer, []manifest.Layer{unquantized, quantized}); err != nil {
		t.Fatalf("failed to write manifest: %v", err)
	}

	got, err := GetSafetensorsDtype(name)
	if err != nil {
		t.Fatalf("GetSafetensorsDtype() error = %v", err)
	}
	if got != "mxfp8" {
		t.Fatalf("GetSafetensorsDtype() = %q, want mxfp8", got)
	}
}