mirror of
https://github.com/ollama/ollama.git
synced 2026-03-08 23:04:13 -05:00
cmd/config: fix cloud model limit lookups in integrations (#14650)
This commit is contained in:
@@ -1277,7 +1277,8 @@ func TestDroidEdit_LocalModelDefaultMaxOutput(t *testing.T) {
|
||||
func TestDroidEdit_CloudModelLimitsUsed(t *testing.T) {
|
||||
// Verify that every cloud model in cloudModelLimits has a valid output
|
||||
// value that would be used for maxOutputTokens when isCloudModel returns true.
|
||||
// :cloud suffix stripping must also work since that's how users specify them.
|
||||
// Cloud suffix normalization must also work since integrations may see either
|
||||
// :cloud or -cloud model names.
|
||||
for name, expected := range cloudModelLimits {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
l, ok := lookupCloudModelLimit(name)
|
||||
@@ -1296,6 +1297,15 @@ func TestDroidEdit_CloudModelLimitsUsed(t *testing.T) {
|
||||
if l2.Output != expected.Output {
|
||||
t.Errorf(":cloud output = %d, want %d", l2.Output, expected.Output)
|
||||
}
|
||||
// Also verify -cloud suffix lookup
|
||||
dashCloudName := name + "-cloud"
|
||||
l3, ok := lookupCloudModelLimit(dashCloudName)
|
||||
if !ok {
|
||||
t.Fatalf("lookupCloudModelLimit(%q) returned false", dashCloudName)
|
||||
}
|
||||
if l3.Output != expected.Output {
|
||||
t.Errorf("-cloud output = %d, want %d", l3.Output, expected.Output)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -81,6 +81,7 @@ var cloudModelLimits = map[string]cloudModelLimit{
|
||||
"deepseek-v3.2": {Context: 163_840, Output: 65_536},
|
||||
"glm-4.6": {Context: 202_752, Output: 131_072},
|
||||
"glm-4.7": {Context: 202_752, Output: 131_072},
|
||||
"glm-5": {Context: 202_752, Output: 131_072},
|
||||
"gpt-oss:120b": {Context: 131_072, Output: 131_072},
|
||||
"gpt-oss:20b": {Context: 131_072, Output: 131_072},
|
||||
"kimi-k2:1t": {Context: 262_144, Output: 262_144},
|
||||
|
||||
@@ -26,17 +26,15 @@ type cloudModelLimit struct {
|
||||
}
|
||||
|
||||
// lookupCloudModelLimit returns the token limits for a cloud model.
|
||||
// It tries the exact name first, then strips the ":cloud" suffix.
|
||||
// It normalizes common cloud suffixes before checking the shared limit map.
|
||||
func lookupCloudModelLimit(name string) (cloudModelLimit, bool) {
|
||||
// TODO(parthsareen): migrate to using cloud check instead.
|
||||
for _, suffix := range []string{"-cloud", ":cloud"} {
|
||||
name = strings.TrimSuffix(name, suffix)
|
||||
}
|
||||
if l, ok := cloudModelLimits[name]; ok {
|
||||
return l, true
|
||||
}
|
||||
base := strings.TrimSuffix(name, ":cloud")
|
||||
if base != name {
|
||||
if l, ok := cloudModelLimits[base]; ok {
|
||||
return l, true
|
||||
}
|
||||
}
|
||||
return cloudModelLimit{}, false
|
||||
}
|
||||
|
||||
|
||||
@@ -666,6 +666,9 @@ func TestLookupCloudModelLimit(t *testing.T) {
|
||||
}{
|
||||
{"glm-4.7", true, 202_752, 131_072},
|
||||
{"glm-4.7:cloud", true, 202_752, 131_072},
|
||||
{"glm-5:cloud", true, 202_752, 131_072},
|
||||
{"gpt-oss:120b-cloud", true, 131_072, 131_072},
|
||||
{"gpt-oss:20b-cloud", true, 131_072, 131_072},
|
||||
{"kimi-k2.5", true, 262_144, 262_144},
|
||||
{"kimi-k2.5:cloud", true, 262_144, 262_144},
|
||||
{"deepseek-v3.2", true, 163_840, 65_536},
|
||||
|
||||
@@ -205,6 +205,9 @@ func createConfig(ctx context.Context, client *api.Client, modelID string) map[s
|
||||
"id": modelID,
|
||||
"_launch": true,
|
||||
}
|
||||
if l, ok := lookupCloudModelLimit(modelID); ok {
|
||||
cfg["contextWindow"] = l.Context
|
||||
}
|
||||
|
||||
resp, err := client.Show(ctx, &api.ShowRequest{Model: modelID})
|
||||
if err != nil {
|
||||
@@ -223,7 +226,8 @@ func createConfig(ctx context.Context, client *api.Client, modelID string) map[s
|
||||
cfg["reasoning"] = true
|
||||
}
|
||||
|
||||
// Extract context window from ModelInfo
|
||||
// Extract context window from ModelInfo. For known cloud models, the
|
||||
// pre-filled shared limit remains unless the server provides a positive value.
|
||||
for key, val := range resp.ModelInfo {
|
||||
if strings.HasSuffix(key, ".context_length") {
|
||||
if ctxLen, ok := val.(float64); ok && ctxLen > 0 {
|
||||
|
||||
@@ -798,6 +798,60 @@ func TestCreateConfig(t *testing.T) {
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("falls back to cloud context when show fails", func(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
fmt.Fprintf(w, `{"error":"model not found"}`)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
u, _ := url.Parse(srv.URL)
|
||||
client := api.NewClient(u, srv.Client())
|
||||
|
||||
cfg := createConfig(context.Background(), client, "kimi-k2.5:cloud")
|
||||
|
||||
if cfg["contextWindow"] != 262_144 {
|
||||
t.Errorf("contextWindow = %v, want 262144", cfg["contextWindow"])
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("falls back to cloud context when model info is empty", func(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/api/show" {
|
||||
fmt.Fprintf(w, `{"capabilities":[],"model_info":{}}`)
|
||||
return
|
||||
}
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
u, _ := url.Parse(srv.URL)
|
||||
client := api.NewClient(u, srv.Client())
|
||||
|
||||
cfg := createConfig(context.Background(), client, "glm-5:cloud")
|
||||
|
||||
if cfg["contextWindow"] != 202_752 {
|
||||
t.Errorf("contextWindow = %v, want 202752", cfg["contextWindow"])
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("falls back to cloud context for dash cloud suffix", func(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
fmt.Fprintf(w, `{"error":"model not found"}`)
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
u, _ := url.Parse(srv.URL)
|
||||
client := api.NewClient(u, srv.Client())
|
||||
|
||||
cfg := createConfig(context.Background(), client, "gpt-oss:120b-cloud")
|
||||
|
||||
if cfg["contextWindow"] != 131_072 {
|
||||
t.Errorf("contextWindow = %v, want 131072", cfg["contextWindow"])
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("skips zero context length", func(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/api/show" {
|
||||
|
||||
Reference in New Issue
Block a user