mirror of
https://github.com/ollama/ollama.git
synced 2026-05-07 08:30:05 -05:00
403 lines
10 KiB
Go
403 lines
10 KiB
Go
package server
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"log/slog"
|
|
"math/rand/v2"
|
|
"net/http"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/ollama/ollama/api"
|
|
"github.com/ollama/ollama/envconfig"
|
|
"github.com/ollama/ollama/format"
|
|
)
|
|
|
|
const modelRecommendationsURL = "https://ollama.com/api/experimental/model-recommendations"
|
|
|
|
var (
|
|
modelRecommendationsRefreshInterval = 4 * time.Hour
|
|
modelRecommendationsFetchTimeout = 3 * time.Second
|
|
modelRecommendationsReadRefreshCooldown = 5 * time.Second
|
|
modelRecommendationsBackoffSteps = []time.Duration{
|
|
5 * time.Minute,
|
|
15 * time.Minute,
|
|
time.Hour,
|
|
4 * time.Hour,
|
|
}
|
|
|
|
errModelRecommendationsNoCloud = errors.New("cloud disabled")
|
|
)
|
|
|
|
type modelRecommendationsCache struct {
|
|
mu sync.RWMutex
|
|
recommendations []api.ModelRecommendation
|
|
refreshing bool
|
|
nextReadRefreshAfter time.Time
|
|
once sync.Once
|
|
client *http.Client
|
|
}
|
|
|
|
func newModelRecommendationsCache() *modelRecommendationsCache {
|
|
return &modelRecommendationsCache{
|
|
recommendations: cloneModelRecommendations(defaultModelRecommendations),
|
|
client: http.DefaultClient,
|
|
}
|
|
}
|
|
|
|
func (c *modelRecommendationsCache) Start(ctx context.Context) {
|
|
c.once.Do(func() {
|
|
slog.Debug("starting model recommendations cache",
|
|
"default_recommendations", len(defaultModelRecommendations),
|
|
"refresh_interval", modelRecommendationsRefreshInterval.String(),
|
|
"fetch_timeout", modelRecommendationsFetchTimeout.String(),
|
|
)
|
|
go c.run(ctx)
|
|
})
|
|
}
|
|
|
|
func (c *modelRecommendationsCache) Get() []api.ModelRecommendation {
|
|
c.mu.RLock()
|
|
defer c.mu.RUnlock()
|
|
return cloneModelRecommendations(c.recommendations)
|
|
}
|
|
|
|
func (c *modelRecommendationsCache) GetSWR(ctx context.Context) []api.ModelRecommendation {
|
|
recs := c.Get()
|
|
c.triggerRefreshOnRead(ctx)
|
|
return recs
|
|
}
|
|
|
|
func (c *modelRecommendationsCache) set(recs []api.ModelRecommendation) {
|
|
c.mu.Lock()
|
|
c.recommendations = cloneModelRecommendations(recs)
|
|
c.mu.Unlock()
|
|
}
|
|
|
|
func (c *modelRecommendationsCache) beginRefresh() bool {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
if c.refreshing {
|
|
return false
|
|
}
|
|
c.refreshing = true
|
|
return true
|
|
}
|
|
|
|
func (c *modelRecommendationsCache) beginReadRefresh() bool {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
now := time.Now()
|
|
if c.refreshing || now.Before(c.nextReadRefreshAfter) {
|
|
return false
|
|
}
|
|
|
|
c.refreshing = true
|
|
return true
|
|
}
|
|
|
|
func (c *modelRecommendationsCache) endRefresh() {
|
|
c.mu.Lock()
|
|
c.refreshing = false
|
|
c.mu.Unlock()
|
|
}
|
|
|
|
func (c *modelRecommendationsCache) endReadRefresh() {
|
|
c.mu.Lock()
|
|
c.refreshing = false
|
|
c.nextReadRefreshAfter = time.Now().Add(modelRecommendationsReadRefreshCooldown)
|
|
c.mu.Unlock()
|
|
}
|
|
|
|
func (c *modelRecommendationsCache) refreshIfIdle(ctx context.Context) (bool, error) {
|
|
if !c.beginRefresh() {
|
|
return false, nil
|
|
}
|
|
defer c.endRefresh()
|
|
return true, c.refresh(ctx)
|
|
}
|
|
|
|
func (c *modelRecommendationsCache) triggerRefreshOnRead(ctx context.Context) {
|
|
if !c.beginReadRefresh() {
|
|
return
|
|
}
|
|
if ctx == nil {
|
|
ctx = context.Background()
|
|
}
|
|
ctx = context.WithoutCancel(ctx)
|
|
|
|
slog.Debug("triggering model recommendations refresh on read")
|
|
go func() {
|
|
defer c.endReadRefresh()
|
|
|
|
if err := c.refresh(ctx); err != nil {
|
|
switch {
|
|
case errors.Is(err, errModelRecommendationsNoCloud):
|
|
slog.Debug("skipping model recommendations read refresh because cloud is disabled")
|
|
default:
|
|
slog.Warn("model recommendations read refresh failed", "error", err)
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
|
|
func (c *modelRecommendationsCache) run(ctx context.Context) {
|
|
c.loadSnapshot()
|
|
|
|
failures := 0
|
|
for {
|
|
started, err := c.refreshIfIdle(ctx)
|
|
switch {
|
|
case !started:
|
|
failures = 0
|
|
slog.Debug("skipping timer model recommendations refresh because refresh is already running")
|
|
case err == nil:
|
|
failures = 0
|
|
case errors.Is(err, errModelRecommendationsNoCloud):
|
|
failures = 0
|
|
slog.Debug("skipping model recommendations refresh because cloud is disabled")
|
|
default:
|
|
failures++
|
|
slog.Warn("model recommendations refresh failed", "error", err)
|
|
}
|
|
|
|
var wait time.Duration
|
|
if failures == 0 {
|
|
wait = withJitter(modelRecommendationsRefreshInterval)
|
|
} else {
|
|
wait = withJitter(modelRecommendationsBackoffSteps[min(failures-1, len(modelRecommendationsBackoffSteps)-1)])
|
|
}
|
|
slog.Info("model recommendations cache sleep scheduled", "wait", wait.String(), "consecutive_failures", failures)
|
|
|
|
select {
|
|
case <-ctx.Done():
|
|
slog.Debug("stopping model recommendations cache")
|
|
return
|
|
case <-time.After(wait):
|
|
}
|
|
}
|
|
}
|
|
|
|
func (c *modelRecommendationsCache) refresh(ctx context.Context) error {
|
|
if envconfig.NoCloud() {
|
|
return errModelRecommendationsNoCloud
|
|
}
|
|
slog.Debug("refreshing model recommendations from remote", "url", modelRecommendationsURL)
|
|
|
|
reqCtx, cancel := context.WithTimeout(ctx, modelRecommendationsFetchTimeout)
|
|
defer cancel()
|
|
|
|
req, err := http.NewRequestWithContext(reqCtx, http.MethodGet, modelRecommendationsURL, nil)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
req.Header.Set("Accept", "application/json")
|
|
|
|
resp, err := c.client.Do(req)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode >= http.StatusBadRequest {
|
|
body, _ := io.ReadAll(io.LimitReader(resp.Body, 2048))
|
|
return fmt.Errorf("status %d: %s", resp.StatusCode, strings.TrimSpace(string(body)))
|
|
}
|
|
|
|
var payload api.ModelRecommendationsResponse
|
|
if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil {
|
|
return err
|
|
}
|
|
|
|
recs, err := validateModelRecommendations(payload.Recommendations)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
c.set(recs)
|
|
slog.Debug("model recommendations refreshed", "count", len(recs))
|
|
if err := c.persistSnapshot(recs); err != nil {
|
|
slog.Warn("failed to persist model recommendations snapshot", "error", err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (c *modelRecommendationsCache) loadSnapshot() {
|
|
path, err := modelRecommendationsSnapshotPath()
|
|
if err != nil {
|
|
slog.Warn("failed to resolve model recommendations snapshot path", "error", err)
|
|
return
|
|
}
|
|
|
|
data, err := os.ReadFile(path)
|
|
if err != nil {
|
|
if !errors.Is(err, os.ErrNotExist) {
|
|
slog.Warn("failed to read model recommendations snapshot", "path", path, "error", err)
|
|
} else {
|
|
slog.Debug("model recommendations snapshot not found", "path", path)
|
|
}
|
|
return
|
|
}
|
|
|
|
var snap api.ModelRecommendationsResponse
|
|
if err := json.Unmarshal(data, &snap); err != nil {
|
|
slog.Warn("failed to parse model recommendations snapshot", "path", path, "error", err)
|
|
return
|
|
}
|
|
|
|
recs, err := validateModelRecommendations(snap.Recommendations)
|
|
if err != nil {
|
|
slog.Warn("ignoring invalid model recommendations snapshot", "path", path, "error", err)
|
|
return
|
|
}
|
|
|
|
c.set(recs)
|
|
slog.Debug("loaded model recommendations snapshot", "path", path, "count", len(recs))
|
|
}
|
|
|
|
func (c *modelRecommendationsCache) persistSnapshot(recs []api.ModelRecommendation) error {
|
|
path, err := modelRecommendationsSnapshotPath()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
|
|
return err
|
|
}
|
|
|
|
payload := api.ModelRecommendationsResponse{Recommendations: recs}
|
|
data, err := json.MarshalIndent(payload, "", " ")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
tmp, err := os.CreateTemp(filepath.Dir(path), ".model-recommendations-*.tmp")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
tmpPath := tmp.Name()
|
|
defer os.Remove(tmpPath)
|
|
|
|
if _, err := tmp.Write(data); err != nil {
|
|
_ = tmp.Close()
|
|
return err
|
|
}
|
|
if err := tmp.Sync(); err != nil {
|
|
_ = tmp.Close()
|
|
return err
|
|
}
|
|
if err := tmp.Close(); err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := os.Rename(tmpPath, path); err != nil {
|
|
return err
|
|
}
|
|
slog.Debug("persisted model recommendations snapshot", "path", path, "count", len(recs))
|
|
return nil
|
|
}
|
|
|
|
func modelRecommendationsSnapshotPath() (string, error) {
|
|
home, err := os.UserHomeDir()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return filepath.Join(home, ".ollama", "cache", "model-recommendations.json"), nil
|
|
}
|
|
|
|
func validateModelRecommendations(recs []api.ModelRecommendation) ([]api.ModelRecommendation, error) {
|
|
if len(recs) == 0 {
|
|
return nil, errors.New("empty recommendations")
|
|
}
|
|
|
|
seen := make(map[string]struct{}, len(recs))
|
|
valid := make([]api.ModelRecommendation, 0, len(recs))
|
|
for _, rec := range recs {
|
|
rec.Model = strings.TrimSpace(rec.Model)
|
|
rec.Description = strings.TrimSpace(rec.Description)
|
|
rec.RequiredPlan = strings.TrimSpace(rec.RequiredPlan)
|
|
|
|
if rec.Model == "" {
|
|
return nil, errors.New("recommendation missing model")
|
|
}
|
|
if _, ok := seen[rec.Model]; ok {
|
|
return nil, fmt.Errorf("duplicate recommendation %q", rec.Model)
|
|
}
|
|
seen[rec.Model] = struct{}{}
|
|
|
|
if isCloudRecommendation(rec.Model) && (rec.ContextLength <= 0 || rec.MaxOutputTokens <= 0) {
|
|
slog.Warn("dropping cloud recommendation missing limits", "model", rec.Model)
|
|
continue
|
|
}
|
|
valid = append(valid, rec)
|
|
}
|
|
|
|
if len(valid) == 0 {
|
|
return nil, errors.New("no valid recommendations")
|
|
}
|
|
|
|
return valid, nil
|
|
}
|
|
|
|
func isCloudRecommendation(modelName string) bool {
|
|
return strings.HasSuffix(modelName, ":cloud") || strings.HasSuffix(modelName, "-cloud")
|
|
}
|
|
|
|
func withJitter(d time.Duration) time.Duration {
|
|
if d <= 0 {
|
|
return d
|
|
}
|
|
// jitter in range [0.8x, 1.2x]
|
|
factor := 0.8 + rand.Float64()*0.4
|
|
return time.Duration(float64(d) * factor)
|
|
}
|
|
|
|
func cloneModelRecommendations(in []api.ModelRecommendation) []api.ModelRecommendation {
|
|
out := make([]api.ModelRecommendation, len(in))
|
|
copy(out, in)
|
|
return out
|
|
}
|
|
|
|
var defaultModelRecommendations = []api.ModelRecommendation{
|
|
{
|
|
Model: "kimi-k2.6:cloud",
|
|
Description: "State-of-the-art coding, long-horizon execution, and multimodal agent swarm capability",
|
|
ContextLength: 262_144,
|
|
MaxOutputTokens: 262_144,
|
|
},
|
|
{
|
|
Model: "glm-5.1:cloud",
|
|
Description: "Reasoning and code generation",
|
|
ContextLength: 202_752,
|
|
MaxOutputTokens: 131_072,
|
|
},
|
|
{
|
|
Model: "qwen3.5:cloud",
|
|
Description: "Reasoning, coding, and agentic tool use with vision",
|
|
ContextLength: 262_144,
|
|
MaxOutputTokens: 32_768,
|
|
},
|
|
{
|
|
Model: "minimax-m2.7:cloud",
|
|
Description: "Fast, efficient coding and real-world productivity",
|
|
ContextLength: 204_800,
|
|
MaxOutputTokens: 128_000,
|
|
},
|
|
{
|
|
Model: "gemma4",
|
|
Description: "Reasoning and code generation locally",
|
|
VRAMBytes: 12 * format.GigaByte,
|
|
},
|
|
{
|
|
Model: "qwen3.5",
|
|
Description: "Reasoning, coding, and visual understanding locally",
|
|
VRAMBytes: 14 * format.GigaByte,
|
|
},
|
|
}
|