mirror of
https://github.com/ollama/ollama.git
synced 2026-03-09 03:12:11 -05:00
bugfix: fix crash bug in token cache logic
This change fixes a problem in the token cache logic to avoid panics caused by empty token arrays by ensuring at least one token remains on full cache hits in the relevant function. The happens if there is an exact match in the cache on subsequent generations.
This commit is contained in:
@@ -78,6 +78,11 @@ func (c *kvCache) findRemaining(tokens []int32) []int32 {
|
||||
prefix++
|
||||
}
|
||||
|
||||
if prefix == len(tokens) && prefix > 0 {
|
||||
// Leave one token to run through the model so we can sample a response.
|
||||
prefix--
|
||||
}
|
||||
|
||||
if prefix < len(c.tokens) {
|
||||
trim := len(c.tokens) - prefix
|
||||
for _, kv := range c.caches {
|
||||
|
||||
Reference in New Issue
Block a user