mirror of
https://github.com/ollama/ollama.git
synced 2026-05-08 00:51:34 -05:00
* mlx: add laguna model support * convert: support fp8 safetensors import Decode HF F8_E4M3 safetensors with block scale companions into GGUF-supported tensor types, and record which output tensors came from FP8 source weights. Use that source-precision metadata during create quantization: default FP8-sourced GGUFs to Q8_0, keep non-FP8 tensors at their original precision for Q8_0, and promote non-FP8 quantizable tensors to Q8_0 for Q4_K requests. * ggml: add laguna model support * server: preserve generate logprobs with builtin parsers Generate requests were dropping logprob-only chunks whenever a builtin parser buffered visible content. Chat already handled this case, but generate only forwarded chunks with visible response, thinking, or tool-call output. Keep generate chunks that carry logprobs even when the builtin parser has not flushed visible content yet, and add a regression test that exercises the behavior with a generic thinking parser. * review comments - perf improvements * ggml: implement nemotron 3 nano omni * add poolside integration * update poolside doc * adapt to new cache setup * fix test * fix test --------- Co-authored-by: Eva Ho <hoyyeva@gmail.com>
102 lines
2.7 KiB
Go
102 lines
2.7 KiB
Go
package convert
|
|
|
|
import (
|
|
"errors"
|
|
"io"
|
|
"io/fs"
|
|
"strings"
|
|
)
|
|
|
|
type Tensor interface {
|
|
Name() string
|
|
Shape() []uint64
|
|
Kind() uint32
|
|
SetRepacker(Repacker)
|
|
WriteTo(io.Writer) (int64, error)
|
|
Clone() Tensor
|
|
}
|
|
|
|
type tensorBase struct {
|
|
name string
|
|
shape []uint64
|
|
repacker Repacker
|
|
}
|
|
|
|
func (t tensorBase) Name() string {
|
|
return t.name
|
|
}
|
|
|
|
func (t tensorBase) Shape() []uint64 {
|
|
return t.shape
|
|
}
|
|
|
|
const (
|
|
tensorKindFP32 uint32 = iota
|
|
tensorKindFP16
|
|
tensorKindBF16 = 30
|
|
tensorKindMXFP4 = 39
|
|
)
|
|
|
|
func (t tensorBase) Kind() uint32 {
|
|
if strings.HasSuffix(t.name, ".ffn_gate_inp.weight") ||
|
|
strings.HasSuffix(t.name, ".bias") ||
|
|
strings.HasSuffix(t.name, ".shortconv.conv.weight") ||
|
|
strings.HasSuffix(t.name, ".ssm_conv1d.weight") || // SSM conv kernel must be F32 for Metal
|
|
strings.HasPrefix(t.name, "a.feature_extractor.") || // audio feature-extractor constants are read with BackendGet and must be real F32 values
|
|
strings.HasPrefix(t.name, "a.conv1d.") || // audio SSCP conv weights are kept F32 for im2col; this likely slows audio and should be revisited
|
|
strings.HasPrefix(t.name, "a.subsampling.") || // audio Parakeet subsampling weights are kept F32 for conv/linear stability; this likely slows audio and should be revisited
|
|
strings.Contains(t.name, ".conv_dw.") || // audio depthwise conv weights are kept F32; this likely slows audio and should be revisited
|
|
t.name == "token_types.weight" ||
|
|
t.name == "v.positional_embedding_vlm" ||
|
|
t.name == "v.position_embd.weight" ||
|
|
t.name == "v.tile_position_embd.weight" ||
|
|
t.name == "v.pre_tile_position_embd.weight" ||
|
|
t.name == "v.post_tile_position_embd.weight" ||
|
|
t.name == "s.position_embd" ||
|
|
strings.HasSuffix(t.name, "rel_pos_h") ||
|
|
strings.HasSuffix(t.name, "rel_pos_w") {
|
|
// these tensors are always F32
|
|
return tensorKindFP32
|
|
}
|
|
|
|
switch len(t.shape) {
|
|
case 0:
|
|
panic("invalid tensor shape")
|
|
case 1:
|
|
return tensorKindFP32
|
|
default:
|
|
return tensorKindFP16
|
|
}
|
|
}
|
|
|
|
func (t *tensorBase) SetRepacker(fn Repacker) {
|
|
t.repacker = fn
|
|
}
|
|
|
|
type Repacker func(string, []float32, []uint64) ([]float32, error)
|
|
|
|
func parseTensors(fsys fs.FS, replacer *strings.Replacer) ([]Tensor, error) {
|
|
patterns := []struct {
|
|
Pattern string
|
|
Func func(fs.FS, *strings.Replacer, ...string) ([]Tensor, error)
|
|
}{
|
|
{"*.safetensors", parseSafetensors},
|
|
{"pytorch_model-*-of-*.bin", parseTorch},
|
|
{"pytorch_model.bin", parseTorch},
|
|
{"consolidated.*.pth", parseTorch},
|
|
}
|
|
|
|
for _, pattern := range patterns {
|
|
matches, err := fs.Glob(fsys, pattern.Pattern)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if len(matches) > 0 {
|
|
return pattern.Func(fsys, replacer, matches...)
|
|
}
|
|
}
|
|
|
|
return nil, errors.New("unknown tensor format")
|
|
}
|