From 1af850e6e3668ece1c953bb27e38e8827393adff Mon Sep 17 00:00:00 2001 From: Bruce MacDonald Date: Fri, 6 Mar 2026 14:08:34 -0800 Subject: [PATCH] parsers: repair unclosed arg_value tags in GLM tool calls (#14656) GLM models sometimes omits closing tags in tool call XML, causing xml.Unmarshal to fail with "element closed by ". This is a known issue across the GLM family. Sanitize the input to fix closing arg_key values so encoding/xml can handle it. --- model/parsers/glm46.go | 49 ++++++++++++++++++- model/parsers/glm46_test.go | 83 ++++++++++++++++++++++++++++++++ x/models/glm4_moe_lite/parser.go | 47 +++++++++++++++++- 3 files changed, 175 insertions(+), 4 deletions(-) diff --git a/model/parsers/glm46.go b/model/parsers/glm46.go index 7befc711f..fb6ea888e 100644 --- a/model/parsers/glm46.go +++ b/model/parsers/glm46.go @@ -345,6 +345,47 @@ func escapeGLM46Content(s string) string { return result.String() } +// repairUnclosedArgValues inserts missing closing tags. +// GLM models sometimes omit the closing tag, producing XML like: +// +// value +// +// instead of: +// +// value +func repairUnclosedArgValues(s string) string { + var result strings.Builder + for { + openIdx := strings.Index(s, "") + if openIdx == -1 { + result.WriteString(s) + break + } + afterOpen := openIdx + len("") + closeIdx := strings.Index(s[afterOpen:], "") + nextKeyIdx := strings.Index(s[afterOpen:], "") + // Check if properly closed before the next (or no next key) + if closeIdx != -1 && (nextKeyIdx == -1 || closeIdx < nextKeyIdx) { + end := afterOpen + closeIdx + len("") + result.WriteString(s[:end]) + s = s[end:] + continue + } + // Unclosed — insert before the next or at end + if nextKeyIdx != -1 { + insertAt := afterOpen + nextKeyIdx + result.WriteString(s[:insertAt]) + result.WriteString("") + s = s[insertAt:] + } else { + result.WriteString(s) + result.WriteString("") + break + } + } + return result.String() +} + func parseGLM46ToolCall(raw glm46EventRawToolCall, tools []api.Tool) (api.ToolCall, error) { // Escape any unescaped entities in text content // We need to escape text between tags, but not the tags themselves @@ -353,10 +394,14 @@ func parseGLM46ToolCall(raw glm46EventRawToolCall, tools []api.Tool) (api.ToolCa // Wrap the content in a root element to make it valid XML xmlString := "" + escaped + "" - // Parse XML into struct + // Parse XML into struct, retrying once with repaired XML if it fails var parsed GLMToolCallXML if err := xml.Unmarshal([]byte(xmlString), &parsed); err != nil { - return api.ToolCall{}, fmt.Errorf("failed to parse XML: %w", err) + parsed = GLMToolCallXML{} + repaired := "" + repairUnclosedArgValues(escaped) + "" + if err2 := xml.Unmarshal([]byte(repaired), &parsed); err2 != nil { + return api.ToolCall{}, fmt.Errorf("failed to parse XML: %w", err) + } } // Extract and trim function name diff --git a/model/parsers/glm46_test.go b/model/parsers/glm46_test.go index 341b93fbe..8cd88d196 100644 --- a/model/parsers/glm46_test.go +++ b/model/parsers/glm46_test.go @@ -846,6 +846,47 @@ line3`, }, }, }, + { + name: "unclosed arg_value at end", + tools: []api.Tool{}, + rawToolCall: `get-weather +city +Paris`, + wantToolCall: api.ToolCall{ + Function: api.ToolCallFunction{ + Name: "get-weather", + Arguments: args(`{"city": "Paris"}`), + }, + }, + }, + { + name: "unclosed arg_value before next arg_key", + tools: []api.Tool{}, + rawToolCall: `get-weather +city +Parisunit +celsius`, + wantToolCall: api.ToolCall{ + Function: api.ToolCallFunction{ + Name: "get-weather", + Arguments: args(`{"city": "Paris", "unit": "celsius"}`), + }, + }, + }, + { + name: "multiple unclosed arg_values", + tools: []api.Tool{}, + rawToolCall: `get-weather +city +Parisunit +celsius`, + wantToolCall: api.ToolCall{ + Function: api.ToolCallFunction{ + Name: "get-weather", + Arguments: args(`{"city": "Paris", "unit": "celsius"}`), + }, + }, + }, } for i, tc := range cases { @@ -860,3 +901,45 @@ line3`, }) } } + +func TestRepairUnclosedArgValues(t *testing.T) { + cases := []struct { + name string + input string + want string + }{ + { + name: "already valid", + input: `kv`, + want: `kv`, + }, + { + name: "unclosed at end", + input: `kv`, + want: `kv`, + }, + { + name: "unclosed before next arg_key", + input: `a1b2`, + want: `a1b2`, + }, + { + name: "no arg_value tags", + input: `just plain text`, + want: `just plain text`, + }, + { + name: "multiple unclosed", + input: `a1b2`, + want: `a1b2`, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := repairUnclosedArgValues(tc.input) + if got != tc.want { + t.Errorf("got %q, want %q", got, tc.want) + } + }) + } +} diff --git a/x/models/glm4_moe_lite/parser.go b/x/models/glm4_moe_lite/parser.go index c81ec5a40..de1b2cc17 100644 --- a/x/models/glm4_moe_lite/parser.go +++ b/x/models/glm4_moe_lite/parser.go @@ -369,6 +369,45 @@ func escapeContent(s string) string { return result.String() } +// repairUnclosedArgValues inserts missing closing tags. +// GLM models sometimes omit the closing tag, producing XML like: +// +// value +// +// instead of: +// +// value +func repairUnclosedArgValues(s string) string { + var result strings.Builder + for { + openIdx := strings.Index(s, "") + if openIdx == -1 { + result.WriteString(s) + break + } + afterOpen := openIdx + len("") + closeIdx := strings.Index(s[afterOpen:], "") + nextKeyIdx := strings.Index(s[afterOpen:], "") + if closeIdx != -1 && (nextKeyIdx == -1 || closeIdx < nextKeyIdx) { + end := afterOpen + closeIdx + len("") + result.WriteString(s[:end]) + s = s[end:] + continue + } + if nextKeyIdx != -1 { + insertAt := afterOpen + nextKeyIdx + result.WriteString(s[:insertAt]) + result.WriteString("") + s = s[insertAt:] + } else { + result.WriteString(s) + result.WriteString("") + break + } + } + return result.String() +} + func parseToolCall(raw eventRawToolCall, tools []api.Tool) (api.ToolCall, error) { // Escape any unescaped entities in text content escaped := escapeContent(raw.raw) @@ -376,10 +415,14 @@ func parseToolCall(raw eventRawToolCall, tools []api.Tool) (api.ToolCall, error) // Wrap the content in a root element to make it valid XML xmlString := "" + escaped + "" - // Parse XML into struct + // Parse XML into struct, retrying once with repaired XML if it fails var parsed ToolCallXML if err := xml.Unmarshal([]byte(xmlString), &parsed); err != nil { - return api.ToolCall{}, fmt.Errorf("failed to parse XML: %w", err) + parsed = ToolCallXML{} + repaired := "" + repairUnclosedArgValues(escaped) + "" + if err2 := xml.Unmarshal([]byte(repaired), &parsed); err2 != nil { + return api.ToolCall{}, fmt.Errorf("failed to parse XML: %w", err) + } } // Extract and trim function name