added batch fix

2025-12-05 18:46:22 -06:00 · 2025-11-21 16:29:56 -08:00
parent 47e272c35a
commit cee4922649
1 changed files with 5 additions and 0 deletions
--- a/llm/server.go
+++ b/llm/server.go
@@ -170,6 +170,11 @@ func NewLlamaServer(systemInfo ml.SystemInfo, gpus []ml.DeviceInfo, modelPath st

 	opts.NumBatch = min(opts.NumBatch, opts.NumCtx)

+	if f.KV().Architecture() == "nomic-bert" {
+		opts.NumBatch = opts.NumCtx
+		slog.Debug("nomic-bert model detected, setting batch size equal to context length", "num_batch", opts.NumBatch, "num_ctx", opts.NumCtx)
+	}
+
 	loadRequest := LoadRequest{LoraPath: adapters, KvSize: opts.NumCtx * numParallel, BatchSize: opts.NumBatch, Parallel: numParallel, MultiUserCache: envconfig.MultiUserCache()}

 	defaultThreads := systemInfo.ThreadCount