ollama/server/model_recommendations.go

package server

import (
	"context"
	"encoding/json"
	"errors"
	"fmt"
	"io"
	"log/slog"
	"math/rand/v2"
	"net/http"
	"os"
	"path/filepath"
	"strings"
	"sync"
	"time"

	"github.com/ollama/ollama/api"
	"github.com/ollama/ollama/envconfig"
	"github.com/ollama/ollama/format"
)

const modelRecommendationsURL = "https://ollama.com/api/experimental/model-recommendations"

var (
	modelRecommendationsRefreshInterval     = 4 * time.Hour
	modelRecommendationsFetchTimeout        = 3 * time.Second
	modelRecommendationsReadRefreshCooldown = 5 * time.Second
	modelRecommendationsBackoffSteps        = []time.Duration{
		5 * time.Minute,
		15 * time.Minute,
		time.Hour,
		4 * time.Hour,
	}

	errModelRecommendationsNoCloud = errors.New("cloud disabled")
)

type modelRecommendationsCache struct {
	mu                   sync.RWMutex
	recommendations      []api.ModelRecommendation
	refreshing           bool
	nextReadRefreshAfter time.Time
	once                 sync.Once
	client               *http.Client
}

func newModelRecommendationsCache() *modelRecommendationsCache {
	return &modelRecommendationsCache{
		recommendations: cloneModelRecommendations(defaultModelRecommendations),
		client:          http.DefaultClient,
	}
}

func (c *modelRecommendationsCache) Start(ctx context.Context) {
	c.once.Do(func() {
		slog.Debug("starting model recommendations cache",
			"default_recommendations", len(defaultModelRecommendations),
			"refresh_interval", modelRecommendationsRefreshInterval.String(),
			"fetch_timeout", modelRecommendationsFetchTimeout.String(),
		)
		go c.run(ctx)
	})
}

func (c *modelRecommendationsCache) Get() []api.ModelRecommendation {
	c.mu.RLock()
	defer c.mu.RUnlock()
	return cloneModelRecommendations(c.recommendations)
}

func (c *modelRecommendationsCache) GetSWR(ctx context.Context) []api.ModelRecommendation {
	recs := c.Get()
	c.triggerRefreshOnRead(ctx)
	return recs
}

func (c *modelRecommendationsCache) set(recs []api.ModelRecommendation) {
	c.mu.Lock()
	c.recommendations = cloneModelRecommendations(recs)
	c.mu.Unlock()
}

func (c *modelRecommendationsCache) beginRefresh() bool {
	c.mu.Lock()
	defer c.mu.Unlock()
	if c.refreshing {
		return false
	}
	c.refreshing = true
	return true
}

func (c *modelRecommendationsCache) beginReadRefresh() bool {
	c.mu.Lock()
	defer c.mu.Unlock()
	now := time.Now()
	if c.refreshing || now.Before(c.nextReadRefreshAfter) {
		return false
	}

	c.refreshing = true
	return true
}

func (c *modelRecommendationsCache) endRefresh() {
	c.mu.Lock()
	c.refreshing = false
	c.mu.Unlock()
}

func (c *modelRecommendationsCache) endReadRefresh() {
	c.mu.Lock()
	c.refreshing = false
	c.nextReadRefreshAfter = time.Now().Add(modelRecommendationsReadRefreshCooldown)
	c.mu.Unlock()
}

func (c *modelRecommendationsCache) refreshIfIdle(ctx context.Context) (bool, error) {
	if !c.beginRefresh() {
		return false, nil
	}
	defer c.endRefresh()
	return true, c.refresh(ctx)
}

func (c *modelRecommendationsCache) triggerRefreshOnRead(ctx context.Context) {
	if !c.beginReadRefresh() {
		return
	}
	if ctx == nil {
		ctx = context.Background()
	}
	ctx = context.WithoutCancel(ctx)

	slog.Debug("triggering model recommendations refresh on read")
	go func() {
		defer c.endReadRefresh()

		if err := c.refresh(ctx); err != nil {
			switch {
			case errors.Is(err, errModelRecommendationsNoCloud):
				slog.Debug("skipping model recommendations read refresh because cloud is disabled")
			default:
				slog.Warn("model recommendations read refresh failed", "error", err)
			}
		}
	}()
}

func (c *modelRecommendationsCache) run(ctx context.Context) {
	c.loadSnapshot()

	failures := 0
	for {
		started, err := c.refreshIfIdle(ctx)
		switch {
		case !started:
			failures = 0
			slog.Debug("skipping timer model recommendations refresh because refresh is already running")
		case err == nil:
			failures = 0
		case errors.Is(err, errModelRecommendationsNoCloud):
			failures = 0
			slog.Debug("skipping model recommendations refresh because cloud is disabled")
		default:
			failures++
			slog.Warn("model recommendations refresh failed", "error", err)
		}

		var wait time.Duration
		if failures == 0 {
			wait = withJitter(modelRecommendationsRefreshInterval)
		} else {
			wait = withJitter(modelRecommendationsBackoffSteps[min(failures-1, len(modelRecommendationsBackoffSteps)-1)])
		}
		slog.Info("model recommendations cache sleep scheduled", "wait", wait.String(), "consecutive_failures", failures)

		select {
		case <-ctx.Done():
			slog.Debug("stopping model recommendations cache")
			return
		case <-time.After(wait):
		}
	}
}

func (c *modelRecommendationsCache) refresh(ctx context.Context) error {
	if envconfig.NoCloud() {
		return errModelRecommendationsNoCloud
	}
	slog.Debug("refreshing model recommendations from remote", "url", modelRecommendationsURL)

	reqCtx, cancel := context.WithTimeout(ctx, modelRecommendationsFetchTimeout)
	defer cancel()

	req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, modelRecommendationsURL, nil)
	if err != nil {
		return err
	}
	req.Header.Set("Accept", "application/json")

	resp, err := c.client.Do(req)
	if err != nil {
		return err
	}
	defer resp.Body.Close()

	if resp.StatusCode >= http.StatusBadRequest {
		body, _ := io.ReadAll(io.LimitReader(resp.Body, 2048))
		return fmt.Errorf("status %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
	}

	var payload api.ModelRecommendationsResponse
	if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil {
		return err
	}

	recs, err := validateModelRecommendations(payload.Recommendations)
	if err != nil {
		return err
	}

	c.set(recs)
	slog.Debug("model recommendations refreshed", "count", len(recs))
	if err := c.persistSnapshot(recs); err != nil {
		slog.Warn("failed to persist model recommendations snapshot", "error", err)
	}
	return nil
}

func (c *modelRecommendationsCache) loadSnapshot() {
	path, err := modelRecommendationsSnapshotPath()
	if err != nil {
		slog.Warn("failed to resolve model recommendations snapshot path", "error", err)
		return
	}

	data, err := os.ReadFile(path)
	if err != nil {
		if !errors.Is(err, os.ErrNotExist) {
			slog.Warn("failed to read model recommendations snapshot", "path", path, "error", err)
		} else {
			slog.Debug("model recommendations snapshot not found", "path", path)
		}
		return
	}

	var snap api.ModelRecommendationsResponse
	if err := json.Unmarshal(data, &snap); err != nil {
		slog.Warn("failed to parse model recommendations snapshot", "path", path, "error", err)
		return
	}

	recs, err := validateModelRecommendations(snap.Recommendations)
	if err != nil {
		slog.Warn("ignoring invalid model recommendations snapshot", "path", path, "error", err)
		return
	}

	c.set(recs)
	slog.Debug("loaded model recommendations snapshot", "path", path, "count", len(recs))
}

func (c *modelRecommendationsCache) persistSnapshot(recs []api.ModelRecommendation) error {
	path, err := modelRecommendationsSnapshotPath()
	if err != nil {
		return err
	}
	if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
		return err
	}

	payload := api.ModelRecommendationsResponse{Recommendations: recs}
	data, err := json.MarshalIndent(payload, "", "  ")
	if err != nil {
		return err
	}

	tmp, err := os.CreateTemp(filepath.Dir(path), ".model-recommendations-*.tmp")
	if err != nil {
		return err
	}
	tmpPath := tmp.Name()
	defer os.Remove(tmpPath)

	if _, err := tmp.Write(data); err != nil {
		_ = tmp.Close()
		return err
	}
	if err := tmp.Sync(); err != nil {
		_ = tmp.Close()
		return err
	}
	if err := tmp.Close(); err != nil {
		return err
	}

	if err := os.Rename(tmpPath, path); err != nil {
		return err
	}
	slog.Debug("persisted model recommendations snapshot", "path", path, "count", len(recs))
	return nil
}

func modelRecommendationsSnapshotPath() (string, error) {
	home, err := os.UserHomeDir()
	if err != nil {
		return "", err
	}
	return filepath.Join(home, ".ollama", "cache", "model-recommendations.json"), nil
}

func validateModelRecommendations(recs []api.ModelRecommendation) ([]api.ModelRecommendation, error) {
	if len(recs) == 0 {
		return nil, errors.New("empty recommendations")
	}

	seen := make(map[string]struct{}, len(recs))
	valid := make([]api.ModelRecommendation, 0, len(recs))
	for _, rec := range recs {
		rec.Model = strings.TrimSpace(rec.Model)
		rec.Description = strings.TrimSpace(rec.Description)
		rec.RequiredPlan = strings.TrimSpace(rec.RequiredPlan)

		if rec.Model == "" {
			return nil, errors.New("recommendation missing model")
		}
		if _, ok := seen[rec.Model]; ok {
			return nil, fmt.Errorf("duplicate recommendation %q", rec.Model)
		}
		seen[rec.Model] = struct{}{}

		if isCloudRecommendation(rec.Model) && (rec.ContextLength <= 0 || rec.MaxOutputTokens <= 0) {
			slog.Warn("dropping cloud recommendation missing limits", "model", rec.Model)
			continue
		}
		valid = append(valid, rec)
	}

	if len(valid) == 0 {
		return nil, errors.New("no valid recommendations")
	}

	return valid, nil
}

func isCloudRecommendation(modelName string) bool {
	return strings.HasSuffix(modelName, ":cloud") || strings.HasSuffix(modelName, "-cloud")
}

func withJitter(d time.Duration) time.Duration {
	if d <= 0 {
		return d
	}
	// jitter in range [0.8x, 1.2x]
	factor := 0.8 + rand.Float64()*0.4
	return time.Duration(float64(d) * factor)
}

func cloneModelRecommendations(in []api.ModelRecommendation) []api.ModelRecommendation {
	out := make([]api.ModelRecommendation, len(in))
	copy(out, in)
	return out
}

var defaultModelRecommendations = []api.ModelRecommendation{
	{
		Model:           "kimi-k2.6:cloud",
		Description:     "State-of-the-art coding, long-horizon execution, and multimodal agent swarm capability",
		ContextLength:   262_144,
		MaxOutputTokens: 262_144,
	},
	{
		Model:           "glm-5.1:cloud",
		Description:     "Reasoning and code generation",
		ContextLength:   202_752,
		MaxOutputTokens: 131_072,
	},
	{
		Model:           "qwen3.5:cloud",
		Description:     "Reasoning, coding, and agentic tool use with vision",
		ContextLength:   262_144,
		MaxOutputTokens: 32_768,
	},
	{
		Model:           "minimax-m2.7:cloud",
		Description:     "Fast, efficient coding and real-world productivity",
		ContextLength:   204_800,
		MaxOutputTokens: 128_000,
	},
	{
		Model:       "gemma4",
		Description: "Reasoning and code generation locally",
		VRAMBytes:   12 * format.GigaByte,
	},
	{
		Model:       "qwen3.5",
		Description: "Reasoning, coding, and visual understanding locally",
		VRAMBytes:   14 * format.GigaByte,
	},
}