ollama/model/models/lfm2/cache.go

package lfm2

import (
	"github.com/ollama/ollama/kvcache"
	"github.com/ollama/ollama/ml"
)

var (
	_ kvcache.Cache           = (*HybridCache)(nil)
	_ kvcache.CheckpointCache = (*HybridCache)(nil)
)

// HybridCache adapts the shared recurrent cache for LFM2:
// - KV attention cache is handled by the embedded causal cache
// - shortconv recurrent state uses conv slots [dConv, hiddenSize]
//
// This reuses shared checkpoint/restore logic for prefix mismatch recovery.
type HybridCache struct {
	*kvcache.Recurrent
}

func NewHybridCache(shift func(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error), hiddenSize, dConv int) *HybridCache {
	base := kvcache.NewRecurrentCache(kvcache.RecurrentConfig{
		Shift:               shift,
		ConvDim:             dConv,
		ConvChannels:        hiddenSize,
		RecurrentStateSize:  1, // LFM2 uses only conv state; keep a minimal recurrent buffer size.
		CheckpointLogPrefix: "lfm2",
	})

	return &HybridCache{Recurrent: base}
}

func (c *HybridCache) slotsTensor() ml.Tensor {
	return c.SlotsTensor()
}

func (c *HybridCache) seqTokens() int {
	return c.SeqTokens()
}

func (c *HybridCache) numSeqs() int {
	return c.NumSeqs()
}