qwen3next: avoid inplace sigmoid for shared gate (#14077)

This commit is contained in:
Jeffrey Morgan
2026-02-04 15:50:02 -08:00
committed by GitHub
parent c323161f24
commit d25535c3f3
3 changed files with 9 additions and 1 deletions

View File

@@ -135,7 +135,7 @@ func (mlp *sparse) Forward(ctx ml.Context, hiddenStates ml.Tensor, opts *Options
// Apply shared expert gating
if mlp.SharedGateInp != nil {
sharedGateVal := mlp.SharedGateInp.Forward(ctx, hiddenStates2D)
sharedGateVal = sharedGateVal.Sigmoid(ctx)
sharedGateVal = sharedGateVal.SigmoidOut(ctx)
// Broadcast gate to match dimensions
sharedGateVal = sharedGateVal.Repeat(ctx, 0, sharedOut.Dim(0))
sharedOut = sharedOut.Mul(ctx, sharedGateVal)