mirror of
https://github.com/ollama/ollama.git
synced 2026-03-11 17:34:04 -05:00
ml/backend/ggml: create tensor on specific backend
some tensors should be created on specific backends to reduce number of copies and improve performance
This commit is contained in:
@@ -106,7 +106,7 @@ func (c *EncoderCache) Put(ctx ml.Context, key, value ml.Tensor) {
|
||||
}
|
||||
|
||||
if _, ok := c.ctxs[c.curLayer]; !ok {
|
||||
c.ctxs[c.curLayer] = c.backend.NewContext()
|
||||
c.ctxs[c.curLayer] = c.backend.NewContextSize(2).Layer(c.curLayer)
|
||||
}
|
||||
|
||||
if _, ok := c.keys[c.curLayer]; !ok {
|
||||
|
||||
Reference in New Issue
Block a user