parsers: repair unclosed arg_value tags in GLM tool calls (#14656)

GLM models sometimes omits </arg_value> closing tags in tool call XML, causing xml.Unmarshal to fail with "element <arg_value> closed by </tool_call>".

This is a known issue across the GLM family.

Sanitize the input to fix closing arg_key values so encoding/xml can handle it.
This commit is contained in:
Bruce MacDonald
2026-03-06 14:08:34 -08:00
committed by GitHub
parent 9b0c7cc7b9
commit 1af850e6e3
3 changed files with 175 additions and 4 deletions

View File

@@ -345,6 +345,47 @@ func escapeGLM46Content(s string) string {
return result.String()
}
// repairUnclosedArgValues inserts missing </arg_value> closing tags.
// GLM models sometimes omit the closing tag, producing XML like:
//
// <arg_value>value</tool_call>
//
// instead of:
//
// <arg_value>value</arg_value></tool_call>
func repairUnclosedArgValues(s string) string {
var result strings.Builder
for {
openIdx := strings.Index(s, "<arg_value>")
if openIdx == -1 {
result.WriteString(s)
break
}
afterOpen := openIdx + len("<arg_value>")
closeIdx := strings.Index(s[afterOpen:], "</arg_value>")
nextKeyIdx := strings.Index(s[afterOpen:], "<arg_key>")
// Check if properly closed before the next <arg_key> (or no next key)
if closeIdx != -1 && (nextKeyIdx == -1 || closeIdx < nextKeyIdx) {
end := afterOpen + closeIdx + len("</arg_value>")
result.WriteString(s[:end])
s = s[end:]
continue
}
// Unclosed — insert </arg_value> before the next <arg_key> or at end
if nextKeyIdx != -1 {
insertAt := afterOpen + nextKeyIdx
result.WriteString(s[:insertAt])
result.WriteString("</arg_value>")
s = s[insertAt:]
} else {
result.WriteString(s)
result.WriteString("</arg_value>")
break
}
}
return result.String()
}
func parseGLM46ToolCall(raw glm46EventRawToolCall, tools []api.Tool) (api.ToolCall, error) {
// Escape any unescaped entities in text content
// We need to escape text between tags, but not the tags themselves
@@ -353,10 +394,14 @@ func parseGLM46ToolCall(raw glm46EventRawToolCall, tools []api.Tool) (api.ToolCa
// Wrap the content in a root element to make it valid XML
xmlString := "<tool_call>" + escaped + "</tool_call>"
// Parse XML into struct
// Parse XML into struct, retrying once with repaired XML if it fails
var parsed GLMToolCallXML
if err := xml.Unmarshal([]byte(xmlString), &parsed); err != nil {
return api.ToolCall{}, fmt.Errorf("failed to parse XML: %w", err)
parsed = GLMToolCallXML{}
repaired := "<tool_call>" + repairUnclosedArgValues(escaped) + "</tool_call>"
if err2 := xml.Unmarshal([]byte(repaired), &parsed); err2 != nil {
return api.ToolCall{}, fmt.Errorf("failed to parse XML: %w", err)
}
}
// Extract and trim function name

View File

@@ -846,6 +846,47 @@ line3</arg_value>`,
},
},
},
{
name: "unclosed arg_value at end",
tools: []api.Tool{},
rawToolCall: `get-weather
<arg_key>city</arg_key>
<arg_value>Paris`,
wantToolCall: api.ToolCall{
Function: api.ToolCallFunction{
Name: "get-weather",
Arguments: args(`{"city": "Paris"}`),
},
},
},
{
name: "unclosed arg_value before next arg_key",
tools: []api.Tool{},
rawToolCall: `get-weather
<arg_key>city</arg_key>
<arg_value>Paris<arg_key>unit</arg_key>
<arg_value>celsius</arg_value>`,
wantToolCall: api.ToolCall{
Function: api.ToolCallFunction{
Name: "get-weather",
Arguments: args(`{"city": "Paris", "unit": "celsius"}`),
},
},
},
{
name: "multiple unclosed arg_values",
tools: []api.Tool{},
rawToolCall: `get-weather
<arg_key>city</arg_key>
<arg_value>Paris<arg_key>unit</arg_key>
<arg_value>celsius`,
wantToolCall: api.ToolCall{
Function: api.ToolCallFunction{
Name: "get-weather",
Arguments: args(`{"city": "Paris", "unit": "celsius"}`),
},
},
},
}
for i, tc := range cases {
@@ -860,3 +901,45 @@ line3</arg_value>`,
})
}
}
func TestRepairUnclosedArgValues(t *testing.T) {
cases := []struct {
name string
input string
want string
}{
{
name: "already valid",
input: `<arg_key>k</arg_key><arg_value>v</arg_value>`,
want: `<arg_key>k</arg_key><arg_value>v</arg_value>`,
},
{
name: "unclosed at end",
input: `<arg_key>k</arg_key><arg_value>v`,
want: `<arg_key>k</arg_key><arg_value>v</arg_value>`,
},
{
name: "unclosed before next arg_key",
input: `<arg_key>a</arg_key><arg_value>1<arg_key>b</arg_key><arg_value>2</arg_value>`,
want: `<arg_key>a</arg_key><arg_value>1</arg_value><arg_key>b</arg_key><arg_value>2</arg_value>`,
},
{
name: "no arg_value tags",
input: `just plain text`,
want: `just plain text`,
},
{
name: "multiple unclosed",
input: `<arg_key>a</arg_key><arg_value>1<arg_key>b</arg_key><arg_value>2`,
want: `<arg_key>a</arg_key><arg_value>1</arg_value><arg_key>b</arg_key><arg_value>2</arg_value>`,
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
got := repairUnclosedArgValues(tc.input)
if got != tc.want {
t.Errorf("got %q, want %q", got, tc.want)
}
})
}
}

View File

@@ -369,6 +369,45 @@ func escapeContent(s string) string {
return result.String()
}
// repairUnclosedArgValues inserts missing </arg_value> closing tags.
// GLM models sometimes omit the closing tag, producing XML like:
//
// <arg_value>value</tool_call>
//
// instead of:
//
// <arg_value>value</arg_value></tool_call>
func repairUnclosedArgValues(s string) string {
var result strings.Builder
for {
openIdx := strings.Index(s, "<arg_value>")
if openIdx == -1 {
result.WriteString(s)
break
}
afterOpen := openIdx + len("<arg_value>")
closeIdx := strings.Index(s[afterOpen:], "</arg_value>")
nextKeyIdx := strings.Index(s[afterOpen:], "<arg_key>")
if closeIdx != -1 && (nextKeyIdx == -1 || closeIdx < nextKeyIdx) {
end := afterOpen + closeIdx + len("</arg_value>")
result.WriteString(s[:end])
s = s[end:]
continue
}
if nextKeyIdx != -1 {
insertAt := afterOpen + nextKeyIdx
result.WriteString(s[:insertAt])
result.WriteString("</arg_value>")
s = s[insertAt:]
} else {
result.WriteString(s)
result.WriteString("</arg_value>")
break
}
}
return result.String()
}
func parseToolCall(raw eventRawToolCall, tools []api.Tool) (api.ToolCall, error) {
// Escape any unescaped entities in text content
escaped := escapeContent(raw.raw)
@@ -376,10 +415,14 @@ func parseToolCall(raw eventRawToolCall, tools []api.Tool) (api.ToolCall, error)
// Wrap the content in a root element to make it valid XML
xmlString := "<tool_call>" + escaped + "</tool_call>"
// Parse XML into struct
// Parse XML into struct, retrying once with repaired XML if it fails
var parsed ToolCallXML
if err := xml.Unmarshal([]byte(xmlString), &parsed); err != nil {
return api.ToolCall{}, fmt.Errorf("failed to parse XML: %w", err)
parsed = ToolCallXML{}
repaired := "<tool_call>" + repairUnclosedArgValues(escaped) + "</tool_call>"
if err2 := xml.Unmarshal([]byte(repaired), &parsed); err2 != nil {
return api.ToolCall{}, fmt.Errorf("failed to parse XML: %w", err)
}
}
// Extract and trim function name