From 9191dfaf05eaccda149cff937fb180bdb87ee5d1 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Thu, 4 Dec 2025 12:16:52 -0800 Subject: [PATCH] llm: Enable flash attention for mistral3 by default --- fs/ggml/ggml.go | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ggml/ggml.go b/fs/ggml/ggml.go index 6ce9724f2..4004bbfd9 100644 --- a/fs/ggml/ggml.go +++ b/fs/ggml/ggml.go @@ -831,6 +831,7 @@ func (f GGML) FlashAttention() bool { return slices.Contains([]string{ "gemma3", "gptoss", "gpt-oss", + "mistral3", "qwen3", "qwen3moe", "qwen3vl", "qwen3vlmoe", }, f.KV().String("general.architecture"))