ml/backend/ggml: create tensor on specific backend

some tensors should be created on specific backends to reduce number of copies and improve performance
2026-03-11 17:34:04 -05:00 · 2025-02-25 16:06:32 -08:00
parent 764e199d67
commit 7bae7fa5ce
6 changed files with 129 additions and 60 deletions
--- a/kvcache/encoder.go
+++ b/kvcache/encoder.go
@@ -106,7 +106,7 @@ func (c *EncoderCache) Put(ctx ml.Context, key, value ml.Tensor) {
 	}

 	if _, ok := c.ctxs[c.curLayer]; !ok {
-		c.ctxs[c.curLayer] = c.backend.NewContext()
+		c.ctxs[c.curLayer] = c.backend.NewContextSize(2).Layer(c.curLayer)
 	}

 	if _, ok := c.keys[c.curLayer]; !ok {