cmd/config: fix cloud model limit lookups in integrations (#14650)

This commit is contained in:
Parth Sareen
2026-03-05 13:57:28 -08:00
committed by GitHub
parent 15732f0ea7
commit 9896e3627f
6 changed files with 79 additions and 9 deletions

View File

@@ -1277,7 +1277,8 @@ func TestDroidEdit_LocalModelDefaultMaxOutput(t *testing.T) {
func TestDroidEdit_CloudModelLimitsUsed(t *testing.T) {
// Verify that every cloud model in cloudModelLimits has a valid output
// value that would be used for maxOutputTokens when isCloudModel returns true.
// :cloud suffix stripping must also work since that's how users specify them.
// Cloud suffix normalization must also work since integrations may see either
// :cloud or -cloud model names.
for name, expected := range cloudModelLimits {
t.Run(name, func(t *testing.T) {
l, ok := lookupCloudModelLimit(name)
@@ -1296,6 +1297,15 @@ func TestDroidEdit_CloudModelLimitsUsed(t *testing.T) {
if l2.Output != expected.Output {
t.Errorf(":cloud output = %d, want %d", l2.Output, expected.Output)
}
// Also verify -cloud suffix lookup
dashCloudName := name + "-cloud"
l3, ok := lookupCloudModelLimit(dashCloudName)
if !ok {
t.Fatalf("lookupCloudModelLimit(%q) returned false", dashCloudName)
}
if l3.Output != expected.Output {
t.Errorf("-cloud output = %d, want %d", l3.Output, expected.Output)
}
})
}
}

View File

@@ -81,6 +81,7 @@ var cloudModelLimits = map[string]cloudModelLimit{
"deepseek-v3.2": {Context: 163_840, Output: 65_536},
"glm-4.6": {Context: 202_752, Output: 131_072},
"glm-4.7": {Context: 202_752, Output: 131_072},
"glm-5": {Context: 202_752, Output: 131_072},
"gpt-oss:120b": {Context: 131_072, Output: 131_072},
"gpt-oss:20b": {Context: 131_072, Output: 131_072},
"kimi-k2:1t": {Context: 262_144, Output: 262_144},

View File

@@ -26,17 +26,15 @@ type cloudModelLimit struct {
}
// lookupCloudModelLimit returns the token limits for a cloud model.
// It tries the exact name first, then strips the ":cloud" suffix.
// It normalizes common cloud suffixes before checking the shared limit map.
func lookupCloudModelLimit(name string) (cloudModelLimit, bool) {
// TODO(parthsareen): migrate to using cloud check instead.
for _, suffix := range []string{"-cloud", ":cloud"} {
name = strings.TrimSuffix(name, suffix)
}
if l, ok := cloudModelLimits[name]; ok {
return l, true
}
base := strings.TrimSuffix(name, ":cloud")
if base != name {
if l, ok := cloudModelLimits[base]; ok {
return l, true
}
}
return cloudModelLimit{}, false
}

View File

@@ -666,6 +666,9 @@ func TestLookupCloudModelLimit(t *testing.T) {
}{
{"glm-4.7", true, 202_752, 131_072},
{"glm-4.7:cloud", true, 202_752, 131_072},
{"glm-5:cloud", true, 202_752, 131_072},
{"gpt-oss:120b-cloud", true, 131_072, 131_072},
{"gpt-oss:20b-cloud", true, 131_072, 131_072},
{"kimi-k2.5", true, 262_144, 262_144},
{"kimi-k2.5:cloud", true, 262_144, 262_144},
{"deepseek-v3.2", true, 163_840, 65_536},

View File

@@ -205,6 +205,9 @@ func createConfig(ctx context.Context, client *api.Client, modelID string) map[s
"id": modelID,
"_launch": true,
}
if l, ok := lookupCloudModelLimit(modelID); ok {
cfg["contextWindow"] = l.Context
}
resp, err := client.Show(ctx, &api.ShowRequest{Model: modelID})
if err != nil {
@@ -223,7 +226,8 @@ func createConfig(ctx context.Context, client *api.Client, modelID string) map[s
cfg["reasoning"] = true
}
// Extract context window from ModelInfo
// Extract context window from ModelInfo. For known cloud models, the
// pre-filled shared limit remains unless the server provides a positive value.
for key, val := range resp.ModelInfo {
if strings.HasSuffix(key, ".context_length") {
if ctxLen, ok := val.(float64); ok && ctxLen > 0 {

View File

@@ -798,6 +798,60 @@ func TestCreateConfig(t *testing.T) {
}
})
t.Run("falls back to cloud context when show fails", func(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNotFound)
fmt.Fprintf(w, `{"error":"model not found"}`)
}))
defer srv.Close()
u, _ := url.Parse(srv.URL)
client := api.NewClient(u, srv.Client())
cfg := createConfig(context.Background(), client, "kimi-k2.5:cloud")
if cfg["contextWindow"] != 262_144 {
t.Errorf("contextWindow = %v, want 262144", cfg["contextWindow"])
}
})
t.Run("falls back to cloud context when model info is empty", func(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/api/show" {
fmt.Fprintf(w, `{"capabilities":[],"model_info":{}}`)
return
}
w.WriteHeader(http.StatusNotFound)
}))
defer srv.Close()
u, _ := url.Parse(srv.URL)
client := api.NewClient(u, srv.Client())
cfg := createConfig(context.Background(), client, "glm-5:cloud")
if cfg["contextWindow"] != 202_752 {
t.Errorf("contextWindow = %v, want 202752", cfg["contextWindow"])
}
})
t.Run("falls back to cloud context for dash cloud suffix", func(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNotFound)
fmt.Fprintf(w, `{"error":"model not found"}`)
}))
defer srv.Close()
u, _ := url.Parse(srv.URL)
client := api.NewClient(u, srv.Client())
cfg := createConfig(context.Background(), client, "gpt-oss:120b-cloud")
if cfg["contextWindow"] != 131_072 {
t.Errorf("contextWindow = %v, want 131072", cfg["contextWindow"])
}
})
t.Run("skips zero context length", func(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/api/show" {