mirror of
https://github.com/ollama/ollama.git
synced 2026-03-08 23:04:13 -05:00
parsers: repair unclosed arg_value tags in GLM tool calls (#14656)
GLM models sometimes omits </arg_value> closing tags in tool call XML, causing xml.Unmarshal to fail with "element <arg_value> closed by </tool_call>". This is a known issue across the GLM family. Sanitize the input to fix closing arg_key values so encoding/xml can handle it.
This commit is contained in:
@@ -345,6 +345,47 @@ func escapeGLM46Content(s string) string {
|
||||
return result.String()
|
||||
}
|
||||
|
||||
// repairUnclosedArgValues inserts missing </arg_value> closing tags.
|
||||
// GLM models sometimes omit the closing tag, producing XML like:
|
||||
//
|
||||
// <arg_value>value</tool_call>
|
||||
//
|
||||
// instead of:
|
||||
//
|
||||
// <arg_value>value</arg_value></tool_call>
|
||||
func repairUnclosedArgValues(s string) string {
|
||||
var result strings.Builder
|
||||
for {
|
||||
openIdx := strings.Index(s, "<arg_value>")
|
||||
if openIdx == -1 {
|
||||
result.WriteString(s)
|
||||
break
|
||||
}
|
||||
afterOpen := openIdx + len("<arg_value>")
|
||||
closeIdx := strings.Index(s[afterOpen:], "</arg_value>")
|
||||
nextKeyIdx := strings.Index(s[afterOpen:], "<arg_key>")
|
||||
// Check if properly closed before the next <arg_key> (or no next key)
|
||||
if closeIdx != -1 && (nextKeyIdx == -1 || closeIdx < nextKeyIdx) {
|
||||
end := afterOpen + closeIdx + len("</arg_value>")
|
||||
result.WriteString(s[:end])
|
||||
s = s[end:]
|
||||
continue
|
||||
}
|
||||
// Unclosed — insert </arg_value> before the next <arg_key> or at end
|
||||
if nextKeyIdx != -1 {
|
||||
insertAt := afterOpen + nextKeyIdx
|
||||
result.WriteString(s[:insertAt])
|
||||
result.WriteString("</arg_value>")
|
||||
s = s[insertAt:]
|
||||
} else {
|
||||
result.WriteString(s)
|
||||
result.WriteString("</arg_value>")
|
||||
break
|
||||
}
|
||||
}
|
||||
return result.String()
|
||||
}
|
||||
|
||||
func parseGLM46ToolCall(raw glm46EventRawToolCall, tools []api.Tool) (api.ToolCall, error) {
|
||||
// Escape any unescaped entities in text content
|
||||
// We need to escape text between tags, but not the tags themselves
|
||||
@@ -353,10 +394,14 @@ func parseGLM46ToolCall(raw glm46EventRawToolCall, tools []api.Tool) (api.ToolCa
|
||||
// Wrap the content in a root element to make it valid XML
|
||||
xmlString := "<tool_call>" + escaped + "</tool_call>"
|
||||
|
||||
// Parse XML into struct
|
||||
// Parse XML into struct, retrying once with repaired XML if it fails
|
||||
var parsed GLMToolCallXML
|
||||
if err := xml.Unmarshal([]byte(xmlString), &parsed); err != nil {
|
||||
return api.ToolCall{}, fmt.Errorf("failed to parse XML: %w", err)
|
||||
parsed = GLMToolCallXML{}
|
||||
repaired := "<tool_call>" + repairUnclosedArgValues(escaped) + "</tool_call>"
|
||||
if err2 := xml.Unmarshal([]byte(repaired), &parsed); err2 != nil {
|
||||
return api.ToolCall{}, fmt.Errorf("failed to parse XML: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Extract and trim function name
|
||||
|
||||
@@ -846,6 +846,47 @@ line3</arg_value>`,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "unclosed arg_value at end",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `get-weather
|
||||
<arg_key>city</arg_key>
|
||||
<arg_value>Paris`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "get-weather",
|
||||
Arguments: args(`{"city": "Paris"}`),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "unclosed arg_value before next arg_key",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `get-weather
|
||||
<arg_key>city</arg_key>
|
||||
<arg_value>Paris<arg_key>unit</arg_key>
|
||||
<arg_value>celsius</arg_value>`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "get-weather",
|
||||
Arguments: args(`{"city": "Paris", "unit": "celsius"}`),
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "multiple unclosed arg_values",
|
||||
tools: []api.Tool{},
|
||||
rawToolCall: `get-weather
|
||||
<arg_key>city</arg_key>
|
||||
<arg_value>Paris<arg_key>unit</arg_key>
|
||||
<arg_value>celsius`,
|
||||
wantToolCall: api.ToolCall{
|
||||
Function: api.ToolCallFunction{
|
||||
Name: "get-weather",
|
||||
Arguments: args(`{"city": "Paris", "unit": "celsius"}`),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range cases {
|
||||
@@ -860,3 +901,45 @@ line3</arg_value>`,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRepairUnclosedArgValues(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
input string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "already valid",
|
||||
input: `<arg_key>k</arg_key><arg_value>v</arg_value>`,
|
||||
want: `<arg_key>k</arg_key><arg_value>v</arg_value>`,
|
||||
},
|
||||
{
|
||||
name: "unclosed at end",
|
||||
input: `<arg_key>k</arg_key><arg_value>v`,
|
||||
want: `<arg_key>k</arg_key><arg_value>v</arg_value>`,
|
||||
},
|
||||
{
|
||||
name: "unclosed before next arg_key",
|
||||
input: `<arg_key>a</arg_key><arg_value>1<arg_key>b</arg_key><arg_value>2</arg_value>`,
|
||||
want: `<arg_key>a</arg_key><arg_value>1</arg_value><arg_key>b</arg_key><arg_value>2</arg_value>`,
|
||||
},
|
||||
{
|
||||
name: "no arg_value tags",
|
||||
input: `just plain text`,
|
||||
want: `just plain text`,
|
||||
},
|
||||
{
|
||||
name: "multiple unclosed",
|
||||
input: `<arg_key>a</arg_key><arg_value>1<arg_key>b</arg_key><arg_value>2`,
|
||||
want: `<arg_key>a</arg_key><arg_value>1</arg_value><arg_key>b</arg_key><arg_value>2</arg_value>`,
|
||||
},
|
||||
}
|
||||
for _, tc := range cases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got := repairUnclosedArgValues(tc.input)
|
||||
if got != tc.want {
|
||||
t.Errorf("got %q, want %q", got, tc.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -369,6 +369,45 @@ func escapeContent(s string) string {
|
||||
return result.String()
|
||||
}
|
||||
|
||||
// repairUnclosedArgValues inserts missing </arg_value> closing tags.
|
||||
// GLM models sometimes omit the closing tag, producing XML like:
|
||||
//
|
||||
// <arg_value>value</tool_call>
|
||||
//
|
||||
// instead of:
|
||||
//
|
||||
// <arg_value>value</arg_value></tool_call>
|
||||
func repairUnclosedArgValues(s string) string {
|
||||
var result strings.Builder
|
||||
for {
|
||||
openIdx := strings.Index(s, "<arg_value>")
|
||||
if openIdx == -1 {
|
||||
result.WriteString(s)
|
||||
break
|
||||
}
|
||||
afterOpen := openIdx + len("<arg_value>")
|
||||
closeIdx := strings.Index(s[afterOpen:], "</arg_value>")
|
||||
nextKeyIdx := strings.Index(s[afterOpen:], "<arg_key>")
|
||||
if closeIdx != -1 && (nextKeyIdx == -1 || closeIdx < nextKeyIdx) {
|
||||
end := afterOpen + closeIdx + len("</arg_value>")
|
||||
result.WriteString(s[:end])
|
||||
s = s[end:]
|
||||
continue
|
||||
}
|
||||
if nextKeyIdx != -1 {
|
||||
insertAt := afterOpen + nextKeyIdx
|
||||
result.WriteString(s[:insertAt])
|
||||
result.WriteString("</arg_value>")
|
||||
s = s[insertAt:]
|
||||
} else {
|
||||
result.WriteString(s)
|
||||
result.WriteString("</arg_value>")
|
||||
break
|
||||
}
|
||||
}
|
||||
return result.String()
|
||||
}
|
||||
|
||||
func parseToolCall(raw eventRawToolCall, tools []api.Tool) (api.ToolCall, error) {
|
||||
// Escape any unescaped entities in text content
|
||||
escaped := escapeContent(raw.raw)
|
||||
@@ -376,10 +415,14 @@ func parseToolCall(raw eventRawToolCall, tools []api.Tool) (api.ToolCall, error)
|
||||
// Wrap the content in a root element to make it valid XML
|
||||
xmlString := "<tool_call>" + escaped + "</tool_call>"
|
||||
|
||||
// Parse XML into struct
|
||||
// Parse XML into struct, retrying once with repaired XML if it fails
|
||||
var parsed ToolCallXML
|
||||
if err := xml.Unmarshal([]byte(xmlString), &parsed); err != nil {
|
||||
return api.ToolCall{}, fmt.Errorf("failed to parse XML: %w", err)
|
||||
parsed = ToolCallXML{}
|
||||
repaired := "<tool_call>" + repairUnclosedArgValues(escaped) + "</tool_call>"
|
||||
if err2 := xml.Unmarshal([]byte(repaired), &parsed); err2 != nil {
|
||||
return api.ToolCall{}, fmt.Errorf("failed to parse XML: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Extract and trim function name
|
||||
|
||||
Reference in New Issue
Block a user