Files
ollama/model/models/lfm2/model.go
2026-02-23 14:38:10 -08:00

749 lines
22 KiB
Go

package lfm2
import (
"bytes"
"cmp"
"errors"
"fmt"
"image"
"math"
"github.com/ollama/ollama/fs"
"github.com/ollama/ollama/ml"
"github.com/ollama/ollama/ml/nn"
"github.com/ollama/ollama/ml/nn/rope"
"github.com/ollama/ollama/model"
"github.com/ollama/ollama/model/input"
"github.com/ollama/ollama/tokenizer"
)
type Options struct {
hiddenSize int
headDim, ropeDim int
eps, ropeBase, ropeScale float32
ropeType string
originalContextLength int
// per-layer head counts (LFM2 alternates attention and recurrent layers)
numHeadsByLayer []int
numKVHeadsByLayer []int
// MoE config
numExperts int
numExpertsUsed int
normTopKProb bool
expertWeightsScale float32
expertGatingFunc uint32
}
const (
expertGatingFuncSoftmax = uint32(0)
expertGatingFuncSigmoid = uint32(2)
)
func (o Options) headDimValue() int {
// Head dim is shared across layers; fall back to first attention layer head count.
for _, h := range o.numHeadsByLayer {
if h > 0 {
return cmp.Or(o.headDim, o.hiddenSize/h)
}
}
return cmp.Or(o.headDim, o.hiddenSize)
}
func (o Options) applyRotaryPositionEmbeddings(ctx ml.Context, states, positions ml.Tensor) ml.Tensor {
opts := []func(*rope.Options){rope.WithTypeNeoX()}
if o.ropeType == "yarn" {
attnFactor := float32(1.0 / (1.0 + 0.1*math.Log(float64(o.ropeScale))))
opts = append(opts,
rope.WithOriginalContextLength(o.originalContextLength),
rope.WithExtrapolationFactor(1.),
rope.WithAttentionFactor(attnFactor),
)
}
headCount := 1
for _, h := range o.numHeadsByLayer {
if h > 0 {
headCount = h
break
}
}
return nn.RoPE(ctx, states, positions, cmp.Or(o.ropeDim, o.headDim, o.hiddenSize/headCount), o.ropeBase, 1./o.ropeScale, opts...)
}
type Model struct {
model.Base
tokenizer.Tokenizer
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
Layers []Layer `gguf:"blk"`
OutputNorm *nn.RMSNorm `gguf:"output_norm,alt:token_embd_norm"`
Output *nn.Linear `gguf:"output,alt:token_embd"`
VisionModel *VisionModel `gguf:"v"`
VisionProjector *VisionProjector `gguf:"mm"`
ImageProcessor ImageProcessor
imageTokenID int32
imageStartToken int32
imageEndToken int32
imageThumbnailID int32
imageRowColIDs map[imageGridPos]int32
useSpecialTokens bool
projectorOptions VisionProjectorOptions
Options
}
var _ model.MultimodalProcessor = (*Model)(nil)
type imageGridPos struct {
row int
col int
}
type visionEmbeddingLayout struct {
rows int
cols int
hasThumbnail bool
}
type visionChunkData struct {
tokens int
row int
col int
thumbnail bool
layout *visionEmbeddingLayout
}
func (m *Model) Validate() error {
if m.TokenEmbedding == nil {
return errors.New("lfm2: missing token_embd tensor")
}
if m.OutputNorm == nil {
return errors.New("lfm2: missing output_norm tensor")
}
if m.Output == nil {
return errors.New("lfm2: missing output tensor")
}
for i, layer := range m.Layers {
if layer.AttentionNorm == nil {
return fmt.Errorf("lfm2: missing blk.%d.attn_norm tensor", i)
}
if layer.MLPNorm == nil {
return fmt.Errorf("lfm2: missing blk.%d.ffn_norm tensor", i)
}
switch ff := layer.MLP.(type) {
case nil:
return fmt.Errorf("lfm2: missing blk.%d feed-forward tensors", i)
case *denseMLP:
if ff.Up == nil || ff.Down == nil || ff.Gate == nil {
return fmt.Errorf("lfm2: missing blk.%d dense feed-forward tensors", i)
}
case *sparseMLP:
if ff.Router == nil || ff.Gate == nil || ff.Up == nil || ff.Down == nil {
return fmt.Errorf("lfm2: missing blk.%d sparse feed-forward tensors", i)
}
default:
return fmt.Errorf("lfm2: unsupported feed-forward type at blk.%d", i)
}
switch op := layer.Operator.(type) {
case *Attention:
if op == nil || op.Query == nil || op.Key == nil || op.Value == nil || op.Output == nil || op.QueryNorm == nil || op.KeyNorm == nil {
return fmt.Errorf("lfm2: missing blk.%d attention tensors", i)
}
case *ShortConv:
if op == nil || op.Conv == nil || op.Conv.Weight == nil || op.InProj == nil || op.OutProj == nil {
return fmt.Errorf("lfm2: missing blk.%d shortconv tensors", i)
}
default:
return fmt.Errorf("lfm2: unsupported operator at blk.%d", i)
}
}
if m.VisionModel != nil {
if m.VisionModel.PatchEmbedding == nil {
return errors.New("lfm2: missing vision patch embedding tensors")
}
if m.VisionModel.PositionEmbedding == nil {
return errors.New("lfm2: missing vision position embedding tensors")
}
if m.VisionModel.PostLayerNorm == nil {
return errors.New("lfm2: missing vision post layer norm tensors")
}
if len(m.VisionModel.Layers) == 0 {
return errors.New("lfm2: missing vision encoder layers")
}
for i, layer := range m.VisionModel.Layers {
if layer.LayerNorm1 == nil || layer.LayerNorm2 == nil || layer.SelfAttention == nil || layer.MLP == nil {
return fmt.Errorf("lfm2: missing vision layer tensors at v.blk.%d", i)
}
if layer.SelfAttention.Query == nil || layer.SelfAttention.Key == nil || layer.SelfAttention.Value == nil || layer.SelfAttention.Output == nil {
return fmt.Errorf("lfm2: missing vision attention tensors at v.blk.%d", i)
}
if layer.MLP.Up == nil || layer.MLP.Down == nil {
return fmt.Errorf("lfm2: missing vision feed-forward tensors at v.blk.%d", i)
}
}
if m.VisionProjector == nil || m.VisionProjector.Linear1 == nil || m.VisionProjector.Linear2 == nil {
return errors.New("lfm2: missing multimodal projector tensors")
}
}
return nil
}
func New(c fs.Config) (model.Model, error) {
if c.String("tokenizer.ggml.model") != "gpt2" {
return nil, model.ErrUnsupportedTokenizer
}
numExperts := int(c.Uint("expert_count"))
isMoE := numExperts > 0
numExpertsUsed := int(c.Uint("expert_used_count"))
if isMoE {
if numExperts <= 0 {
return nil, fmt.Errorf("lfm2: invalid expert_count=%d", numExperts)
}
if numExpertsUsed <= 0 || numExpertsUsed > numExperts {
return nil, fmt.Errorf("lfm2: invalid expert_used_count=%d for expert_count=%d", numExpertsUsed, numExperts)
}
}
vocabulary := tokenizer.Vocabulary{
Values: c.Strings("tokenizer.ggml.tokens"),
Scores: c.Floats("tokenizer.ggml.scores"),
Types: c.Ints("tokenizer.ggml.token_type"),
Merges: c.Strings("tokenizer.ggml.merges"),
AddBOS: c.Bool("tokenizer.ggml.add_bos_token", true),
BOS: []int32{int32(c.Uint("tokenizer.ggml.bos_token_id"))},
AddEOS: c.Bool("tokenizer.ggml.add_eos_token", false),
EOS: append(
[]int32{int32(c.Uint("tokenizer.ggml.eos_token_id"))},
c.Ints("tokenizer.ggml.eos_token_ids")...,
),
}
var pretokenizers []string
switch c.String("tokenizer.ggml.pre") {
case "default":
// use default BPE pretokenizer
default:
// llama-bpe style (default for LFM2)
pretokenizers = []string{
`(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`,
}
}
m := Model{
Tokenizer: tokenizer.NewBytePairEncoding(&vocabulary, pretokenizers...),
Layers: make([]Layer, c.Uint("block_count")),
ImageProcessor: newImageProcessor(c),
VisionModel: newVisionModel(c),
VisionProjector: &VisionProjector{},
imageRowColIDs: make(map[imageGridPos]int32),
projectorOptions: VisionProjectorOptions{
scaleFactor: int(c.Uint("vision.projector.scale_factor", 2)),
useLayerNorm: c.Bool("vision.projector.use_layernorm", false),
},
Options: Options{
hiddenSize: int(c.Uint("embedding_length")),
headDim: int(c.Uint("attention.key_length")),
ropeDim: int(c.Uint("rope.dimension_count")),
eps: c.Float("attention.layer_norm_rms_epsilon"),
ropeType: c.String("rope.scaling.type"),
ropeBase: c.Float("rope.freq_base"),
ropeScale: c.Float("rope.scaling.factor", 1),
originalContextLength: int(c.Uint("rope.scaling.original_context_length")),
numExperts: numExperts,
numExpertsUsed: numExpertsUsed,
normTopKProb: c.Bool("norm_top_k_prob", true),
expertWeightsScale: c.Float("expert_weights_scale", 1.0),
expertGatingFunc: c.Uint("expert_gating_func", expertGatingFuncSoftmax),
},
}
lookupTokenID := func(token string) int32 {
for i, t := range vocabulary.Values {
if t == token {
return int32(i)
}
}
return 0
}
resolveTokenID := func(explicitKey, token string, fallback uint32) int32 {
if explicitKey != "" {
if id := c.Uint(explicitKey); id != 0 {
return int32(id)
}
}
if tokenID := lookupTokenID(token); tokenID != 0 {
return tokenID
}
return int32(fallback)
}
m.imageTokenID = resolveTokenID("vision.image_token_id", "<image>", 396)
m.imageStartToken = resolveTokenID("vision.image_start_token_id", "<|image_start|>", 0)
m.imageEndToken = resolveTokenID("vision.image_end_token_id", "<|image_end|>", 0)
m.imageThumbnailID = resolveTokenID("vision.image_thumbnail_token_id", "<|img_thumbnail|>", 0)
m.useSpecialTokens = c.Bool("vision.use_image_special_tokens", true)
maxGridTokens := int(c.Uint("vision.max_tiles", 10))
if maxGridTokens <= 0 {
maxGridTokens = 10
}
for row := 1; row <= maxGridTokens; row++ {
for col := 1; col <= maxGridTokens; col++ {
token := fmt.Sprintf("<|img_row_%d_col_%d|>", row, col)
if tokenID := lookupTokenID(token); tokenID > 0 {
m.imageRowColIDs[imageGridPos{row: row, col: col}] = tokenID
}
}
}
if !m.useSpecialTokens {
m.imageStartToken = 0
m.imageEndToken = 0
m.imageThumbnailID = 0
m.imageRowColIDs = map[imageGridPos]int32{}
}
if c.Uint("vision.block_count") == 0 {
m.VisionModel = nil
m.VisionProjector = nil
}
type headCounts interface {
HeadCount() []uint64
HeadCountKV() []uint64
}
hc, ok := c.(headCounts)
if !ok {
return nil, model.ErrUnsupportedModel
}
headCount := hc.HeadCount()
headCountKV := hc.HeadCountKV()
m.numHeadsByLayer = make([]int, len(m.Layers))
m.numKVHeadsByLayer = make([]int, len(m.Layers))
leadingDenseBlockCount := int(c.Uint("leading_dense_block_count"))
if leadingDenseBlockCount < 0 {
leadingDenseBlockCount = 0
}
if leadingDenseBlockCount > len(m.Layers) {
leadingDenseBlockCount = len(m.Layers)
}
for i := range m.Layers {
m.numHeadsByLayer[i] = int(headCount[i])
m.numKVHeadsByLayer[i] = int(headCountKV[i])
if m.numKVHeadsByLayer[i] == 0 {
m.Layers[i].Operator = &ShortConv{}
} else {
m.Layers[i].Operator = &Attention{}
}
if isMoE && i >= leadingDenseBlockCount {
m.Layers[i].MLP = &sparseMLP{}
} else {
m.Layers[i].MLP = &denseMLP{}
}
}
lCache := int(c.Uint("shortconv.l_cache"))
dConv := max(0, lCache-1)
m.Cache = NewHybridCache(m.Shift, m.hiddenSize, dConv)
return &m, nil
}
type Operator interface {
Forward(ctx ml.Context, hiddenStates, positions ml.Tensor, cache *HybridCache, layer int, opts *Options) ml.Tensor
}
type Attention struct {
Query *nn.Linear `gguf:"attn_q"`
QueryNorm *nn.RMSNorm `gguf:"attn_q_norm"`
Key *nn.Linear `gguf:"attn_k"`
KeyNorm *nn.RMSNorm `gguf:"attn_k_norm"`
Value *nn.Linear `gguf:"attn_v"`
Output *nn.Linear `gguf:"attn_output,alt:attn_out"`
}
func (sa *Attention) Forward(ctx ml.Context, hiddenStates, positions ml.Tensor, cache *HybridCache, layer int, opts *Options) ml.Tensor {
batchSize := hiddenStates.Dim(1)
headDim := opts.headDimValue()
numHeads := opts.numHeadsByLayer[layer]
numKVHeads := opts.numKVHeadsByLayer[layer]
query := sa.Query.Forward(ctx, hiddenStates)
key := sa.Key.Forward(ctx, hiddenStates)
value := sa.Value.Forward(ctx, hiddenStates)
query = query.Reshape(ctx, headDim, numHeads, batchSize)
key = key.Reshape(ctx, headDim, numKVHeads, batchSize)
value = value.Reshape(ctx, headDim, numKVHeads, batchSize)
query = sa.QueryNorm.Forward(ctx, query, opts.eps)
key = sa.KeyNorm.Forward(ctx, key, opts.eps)
query = opts.applyRotaryPositionEmbeddings(ctx, query, positions)
key = opts.applyRotaryPositionEmbeddings(ctx, key, positions)
attention := nn.Attention(ctx, query, key, value, 1./math.Sqrt(float64(headDim)), cache)
attention = attention.Reshape(ctx, attention.Dim(0)*attention.Dim(1), batchSize)
return sa.Output.Forward(ctx, attention)
}
type FeedForward interface {
Forward(ml.Context, ml.Tensor, *Options) ml.Tensor
}
type denseMLP struct {
Up *nn.Linear `gguf:"ffn_up"`
Down *nn.Linear `gguf:"ffn_down"`
Gate *nn.Linear `gguf:"ffn_gate"`
}
func (mlp *denseMLP) Forward(ctx ml.Context, hiddenState ml.Tensor, opts *Options) ml.Tensor {
hiddenState = mlp.Gate.Forward(ctx, hiddenState).SILU(ctx, mlp.Up.Forward(ctx, hiddenState))
return mlp.Down.Forward(ctx, hiddenState)
}
type sparseMLP struct {
Router *nn.Linear `gguf:"ffn_gate_inp"`
Gate *nn.LinearBatch `gguf:"ffn_gate_exps"`
Up *nn.LinearBatch `gguf:"ffn_up_exps"`
Down *nn.LinearBatch `gguf:"ffn_down_exps"`
Bias ml.Tensor `gguf:"exp_probs_b.bias,alt:exp_probs_b"`
}
func (mlp *sparseMLP) Forward(ctx ml.Context, hiddenState ml.Tensor, opts *Options) ml.Tensor {
// hiddenState: [hidden, tokens]
routerLogits := mlp.Router.Forward(ctx, hiddenState)
probs := routerLogits.Softmax(ctx)
if opts.expertGatingFunc == expertGatingFuncSigmoid {
probs = routerLogits.Sigmoid(ctx)
}
selectionProbs := probs
if mlp.Bias != nil {
selectionProbs = selectionProbs.Add(ctx, mlp.Bias)
}
selectedExperts := selectionProbs.TopK(ctx, opts.numExpertsUsed)
routingWeights := probs.Reshape(ctx, 1, opts.numExperts, hiddenState.Dim(1)).Rows(ctx, selectedExperts)
if opts.normTopKProb {
routingWeights = routingWeights.Reshape(ctx, opts.numExpertsUsed, hiddenState.Dim(1))
weightsSum := routingWeights.SumRows(ctx)
weightsSum = weightsSum.Clamp(ctx, 1e-6, float32(math.Inf(1)))
routingWeights = routingWeights.Div(ctx, weightsSum)
routingWeights = routingWeights.Reshape(ctx, 1, opts.numExpertsUsed, hiddenState.Dim(1))
}
if opts.expertWeightsScale != 1 {
routingWeights = routingWeights.Scale(ctx, float64(opts.expertWeightsScale))
}
// Build routing-weights branch early to enable topk-MoE fusion.
ctx.Forward(routingWeights)
hiddenState3D := hiddenState.Reshape(ctx, hiddenState.Dim(0), 1, hiddenState.Dim(1))
experts := mlp.Gate.Forward(ctx, hiddenState3D, selectedExperts).SILU(ctx, mlp.Up.Forward(ctx, hiddenState3D, selectedExperts))
experts = mlp.Down.Forward(ctx, experts, selectedExperts)
experts = experts.Mul(ctx, routingWeights)
nextState := experts.View(ctx, 0, experts.Dim(0), experts.Stride(2), experts.Dim(2))
for i := 1; i < opts.numExpertsUsed; i++ {
nextState = nextState.Add(ctx, experts.View(ctx, i*experts.Stride(1), experts.Dim(0), experts.Stride(2), experts.Dim(2)))
}
return nextState
}
type Layer struct {
AttentionNorm *nn.RMSNorm `gguf:"attn_norm"`
Operator Operator
MLPNorm *nn.RMSNorm `gguf:"ffn_norm"`
MLP FeedForward
}
func (l *Layer) Forward(ctx ml.Context, layer int, hiddenState, positions, outputs ml.Tensor, cache *HybridCache, opts *Options) ml.Tensor {
residual := hiddenState
hiddenState = l.AttentionNorm.Forward(ctx, hiddenState, opts.eps)
hiddenState = l.Operator.Forward(ctx, hiddenState, positions, cache, layer, opts)
if outputs != nil {
hiddenState = hiddenState.Rows(ctx, outputs)
residual = residual.Rows(ctx, outputs)
}
hiddenState = hiddenState.Add(ctx, residual)
residual = hiddenState
hiddenState = l.MLPNorm.Forward(ctx, hiddenState, opts.eps)
hiddenState = l.MLP.Forward(ctx, hiddenState, opts)
return hiddenState.Add(ctx, residual)
}
func (m *Model) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error) {
return m.applyRotaryPositionEmbeddings(ctx, key, shift), nil
}
func multimodalTokenCount(mm input.Multimodal) int {
if mm.Tensor != nil {
return mm.Tensor.Dim(1)
}
switch data := mm.Data.(type) {
case int:
return data
case int32:
return int(data)
case visionChunkData:
return data.tokens
case *visionChunkData:
if data != nil {
return data.tokens
}
}
return 0
}
func multimodalChunkInfo(mm input.Multimodal) visionChunkData {
switch data := mm.Data.(type) {
case visionChunkData:
return data
case *visionChunkData:
if data != nil {
return *data
}
}
return visionChunkData{
tokens: multimodalTokenCount(mm),
}
}
func multimodalLayout(mm []input.Multimodal) visionEmbeddingLayout {
layout := visionEmbeddingLayout{rows: 1, cols: 1}
if len(mm) == 0 {
return layout
}
first := multimodalChunkInfo(mm[0])
if first.layout != nil {
return *first.layout
}
return layout
}
func (m *Model) imageRowColToken(row, col int) int32 {
if row <= 0 || col <= 0 {
return 0
}
return m.imageRowColIDs[imageGridPos{row: row, col: col}]
}
func (m *Model) appendImageChunk(result []*input.Input, chunk input.Multimodal, imageToken int32, hash uint64) ([]*input.Input, error) {
tokenCount := multimodalTokenCount(chunk)
if tokenCount <= 0 {
return nil, errors.New("lfm2: multimodal input has no tokens")
}
result = append(result, &input.Input{
Token: imageToken,
Multimodal: []input.Multimodal{chunk},
MultimodalHash: hash,
SameBatch: tokenCount - 1,
})
for range tokenCount - 1 {
result = append(result, &input.Input{Token: imageToken})
}
return result, nil
}
func (m *Model) EncodeMultimodal(ctx ml.Context, multimodalData []byte) ([]input.Multimodal, error) {
if m.VisionModel == nil || m.VisionProjector == nil || len(m.VisionModel.Layers) == 0 {
return nil, model.ErrNoVisionModel
}
img, _, err := image.Decode(bytes.NewReader(multimodalData))
if err != nil {
return nil, err
}
processedImages, layout, err := m.ImageProcessor.ProcessImage(img)
if err != nil {
return nil, err
}
if m.ImageProcessor.patchSize <= 0 {
return nil, errors.New("lfm2: invalid vision patch size")
}
layoutInfo := &visionEmbeddingLayout{
rows: layout.rows,
cols: layout.cols,
hasThumbnail: layout.hasThumbnail,
}
mm := make([]input.Multimodal, 0, len(processedImages))
for i, processed := range processedImages {
patches := visionPatchGrid{
Width: processed.size.X / m.ImageProcessor.patchSize,
Height: processed.size.Y / m.ImageProcessor.patchSize,
}
if patches.Width == 0 || patches.Height == 0 {
return nil, errors.New("lfm2: invalid resized image dimensions")
}
pixelValues := ctx.Input().FromFloats(processed.data, processed.size.X, processed.size.Y, m.ImageProcessor.numChannels)
visionOutputs := m.VisionModel.Forward(ctx, pixelValues, patches)
projected := m.VisionProjector.Forward(ctx, visionOutputs, patches, m.projectorOptions)
chunk := visionChunkData{
tokens: projected.Dim(1),
row: processed.row,
col: processed.col,
thumbnail: processed.thumbnail,
}
if i == 0 {
chunk.layout = layoutInfo
}
mm = append(mm, input.Multimodal{
Tensor: projected,
Data: chunk,
})
}
return mm, nil
}
func (m *Model) PostTokenize(inputs []*input.Input) ([]*input.Input, error) {
var result []*input.Input
imageToken := m.imageTokenID
if imageToken == 0 {
imageToken = 396
}
useSpecialTokens := m.useSpecialTokens || m.imageStartToken > 0 || m.imageEndToken > 0 || m.imageThumbnailID > 0 || len(m.imageRowColIDs) > 0
for _, inp := range inputs {
if len(inp.Multimodal) == 0 {
result = append(result, inp)
continue
}
layout := multimodalLayout(inp.Multimodal)
if layout.rows <= 0 {
layout.rows = 1
}
if layout.cols <= 0 {
layout.cols = 1
}
tiles := layout.rows * layout.cols
multitile := tiles > 1
if useSpecialTokens && m.imageStartToken > 0 {
result = append(result, &input.Input{Token: m.imageStartToken})
}
for i, mm := range inp.Multimodal {
chunk := multimodalChunkInfo(mm)
if chunk.tokens <= 0 {
chunk.tokens = multimodalTokenCount(mm)
}
if multitile && !chunk.thumbnail && chunk.row == 0 && chunk.col == 0 && i < tiles {
chunk.row = i/layout.cols + 1
chunk.col = i%layout.cols + 1
}
if multitile && layout.hasThumbnail && i == tiles {
chunk.thumbnail = true
}
if useSpecialTokens && multitile {
if chunk.thumbnail {
if m.imageThumbnailID > 0 {
result = append(result, &input.Input{Token: m.imageThumbnailID})
}
} else if marker := m.imageRowColToken(chunk.row, chunk.col); marker > 0 {
result = append(result, &input.Input{Token: marker})
}
}
var err error
result, err = m.appendImageChunk(result, input.Multimodal{
Tensor: mm.Tensor,
Data: chunk,
}, imageToken, inp.MultimodalHash)
if err != nil {
return nil, err
}
}
if useSpecialTokens && m.imageEndToken > 0 {
result = append(result, &input.Input{Token: m.imageEndToken})
}
}
return result, nil
}
func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error) {
positions := ctx.Input().FromInts(batch.Positions, len(batch.Positions))
hiddenState := m.TokenEmbedding.Forward(ctx, batch.Inputs)
if len(batch.Multimodal) > 0 {
// We splice vision embeddings into token embeddings in-place; duplicate to
// avoid aliasing the raw embedding output graph.
hiddenState = hiddenState.Duplicate(ctx)
}
for _, mm := range batch.Multimodal {
offset := mm.Index
for _, multimodal := range mm.Multimodal {
if multimodal.Tensor == nil {
continue
}
visionOutputs := multimodal.Tensor
ctx.Forward(visionOutputs.Copy(ctx, hiddenState.View(ctx, offset*hiddenState.Stride(1), visionOutputs.Dim(0)*visionOutputs.Dim(1))))
offset += visionOutputs.Dim(1)
}
}
for i, layer := range m.Layers {
m.Cache.SetLayer(i)
var outputs ml.Tensor
if i == len(m.Layers)-1 {
outputs = batch.Outputs
}
hiddenState = layer.Forward(ctx, i, hiddenState, positions, outputs, m.Cache.(*HybridCache), &m.Options)
}
hiddenState = m.OutputNorm.Forward(ctx, hiddenState, m.eps)
return m.Output.Forward(ctx, hiddenState), nil
}
func init() {
model.Register("lfm2", New)
model.Register("lfm2moe", New)
}