@nanocollective/nanocoder
Version:
A local-first CLI coding agent that brings the power of agentic coding tools like Claude Code and Gemini CLI to local models or controlled APIs like OpenRouter
248 lines • 9.98 kB
JavaScript
import { ensureString } from '../utils/type-helpers.js';
/**
* Parses XML-formatted tool calls from non-function-calling models
* Expected format: <tool_name><param1>value1</param1><param2>value2</param2></tool_name>
*/
export class XMLToolCallParser {
static TOOL_CALL_REGEX = /<(\w+)>(.*?)<\/\1>/gs;
static PARAMETER_REGEX = /<(\w+)>(.*?)<\/\1>/gs;
/**
* Extracts tool calls from text content containing XML-formatted tool calls
* Type-preserving: Accepts unknown type, converts to string for processing
*/
static parseToolCalls(content) {
const toolCalls = [];
let match;
// Handle content that might be wrapped in markdown code blocks
let processedContent = ensureString(content);
const codeBlockMatch = processedContent.match(/```(?:\w+)?\s*\n?([\s\S]*?)\n?```/);
if (codeBlockMatch && codeBlockMatch[1]) {
processedContent = codeBlockMatch[1].trim();
}
// Remove <tool_call> wrapper tags if present
processedContent = processedContent.replace(/<\/?tool_call>/g, '').trim();
// Find all tool call blocks
this.TOOL_CALL_REGEX.lastIndex = 0; // Reset regex state
while ((match = this.TOOL_CALL_REGEX.exec(processedContent)) !== null) {
const [fullMatch, toolName, innerXml] = match;
// Skip if this is a generic "tool_call" tag that slipped through
if (toolName === 'tool_call') {
continue;
}
// Validate that this is a properly formed tool call
if (!this.isValidToolCall(fullMatch, toolName)) {
continue;
}
const parameters = this.parseParameters(innerXml);
toolCalls.push({
toolName,
parameters,
});
}
return toolCalls;
}
/**
* Validates that a matched string is a proper tool call
* Rejects partial matches, malformed syntax, and invalid structures
*/
static isValidToolCall(fullMatch, toolName) {
// Reject common HTML tags that might be in model output
const htmlTags = [
'div',
'span',
'p',
'a',
'ul',
'ol',
'li',
'table',
'tr',
'td',
'th',
'thead',
'tbody',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'br',
'hr',
'strong',
'em',
'code',
'pre',
'blockquote',
'img',
'section',
'article',
'header',
'footer',
'nav',
'aside',
];
if (htmlTags.includes(toolName.toLowerCase())) {
return false;
}
// Check for malformed attribute-style syntax like <function=name> or <parameter=name>
if (fullMatch.includes('=')) {
return false;
}
// Check if the closing tag is properly formed
if (!fullMatch.endsWith(`</${toolName}>`)) {
return false;
}
// Extract inner content between opening and closing tags
const innerContent = fullMatch.substring(toolName.length + 2, fullMatch.length - (toolName.length + 3));
// Valid tool calls should contain parameter tags (other XML tags inside)
// This prevents matching on standalone parameter tags from malformed XML
const hasNestedTags = /<\w+>/.test(innerContent);
// Tool names should contain underscores (snake_case convention for tools)
// or be reasonably long compound words
const hasUnderscore = toolName.includes('_');
// Allow if it has nested tags OR follows naming convention
if (!hasNestedTags && !hasUnderscore) {
return false;
}
return true;
}
/**
* Parses parameters from inner XML content
*/
static parseParameters(innerXml) {
const parameters = {};
let match;
// Reset regex state
this.PARAMETER_REGEX.lastIndex = 0;
while ((match = this.PARAMETER_REGEX.exec(innerXml)) !== null) {
const [, paramName, paramValue] = match;
// Trim whitespace from parameter value
const trimmedValue = paramValue.trim();
// Try to parse as JSON for complex objects/arrays
try {
parameters[paramName] = JSON.parse(trimmedValue);
}
catch {
// If not valid JSON, use as string (preserving internal whitespace/newlines)
parameters[paramName] = trimmedValue;
}
}
return parameters;
}
/**
* Converts parsed tool calls to the standard ToolCall format
*/
static convertToToolCalls(parsedCalls) {
return parsedCalls.map((call, index) => ({
id: `xml_call_${index}`,
function: {
name: call.toolName,
arguments: call.parameters,
},
}));
}
/**
* Removes XML tool call blocks from content, leaving only the text
* Type-preserving: Accepts unknown type, converts to string for processing
*/
static removeToolCallsFromContent(content) {
let cleanedContent = ensureString(content);
// Remove all markdown code blocks that contain XML tool calls (using global flag)
cleanedContent = cleanedContent.replace(/```(?:\w+)?\s*\n?([\s\S]*?)\n?```/g, (match, blockContent) => {
if (blockContent) {
// Reset regex and check if this block contains XML tool calls
this.TOOL_CALL_REGEX.lastIndex = 0;
const testResult = this.TOOL_CALL_REGEX.test(blockContent);
this.TOOL_CALL_REGEX.lastIndex = 0; // Reset again after test
if (testResult) {
// This code block contains XML tool calls, remove it entirely
return '';
}
}
// Keep blocks that don't contain XML tool calls
return match;
});
// Remove XML tool calls that aren't in code blocks
this.TOOL_CALL_REGEX.lastIndex = 0;
cleanedContent = cleanedContent.replace(this.TOOL_CALL_REGEX, '');
// Remove any <tool_call> wrapper tags that may be left behind
cleanedContent = cleanedContent.replace(/<\/?tool_call>/g, '');
// Clean up whitespace artifacts left by removed tool calls
cleanedContent = cleanedContent
// Remove trailing whitespace from each line
.replace(/[ \t]+$/gm, '')
// Collapse multiple spaces (but not at start of line for indentation)
.replace(/([^ \t\n]) {2,}/g, '$1 ')
// Remove lines that are only whitespace
.replace(/^[ \t]+$/gm, '')
// Collapse 2+ consecutive blank lines to a single blank line
.replace(/\n{3,}/g, '\n\n')
.trim();
return cleanedContent;
}
/**
* Checks if content contains XML-formatted tool calls
* Type-preserving: Accepts unknown type, converts to string for processing
*/
static hasToolCalls(content) {
// Use parseToolCalls with validation to ensure we only detect valid tool calls
const toolCalls = this.parseToolCalls(content);
return toolCalls.length > 0;
}
/**
* Detects malformed XML tool call attempts and returns error details
* Returns null if no malformed tool calls detected
* Type-preserving: Accepts unknown type, converts to string for processing
*/
static detectMalformedToolCall(content) {
// Type guard: ensure content is string for processing operations
// BUT original type is preserved in memory via the ToolCall structure
const contentStr = ensureString(content);
// Common malformed patterns
const patterns = [
{
// [tool_use: name] or [Tool: name] syntax (common with some models like GLM)
regex: /\[(?:tool_use|Tool):\s*(\w+)\]/i,
error: 'Invalid syntax: [tool_use: name] or [Tool: name] format is not supported',
},
{
// <function=name> syntax
regex: /<function=(\w+)>/,
error: 'Invalid syntax: <function=name> is not supported',
},
{
// <parameter=name> syntax
regex: /<parameter=(\w+)>/,
error: 'Invalid syntax: <parameter=name> is not supported',
},
{
// Generic closing </parameter> without proper name
regex: /<parameter=\w+>[\s\S]*?<\/parameter>/,
error: 'Invalid parameter syntax: parameters must use named tags, not generic <parameter> wrapper',
},
{
// Generic closing </function> when <function=name> was used
regex: /<function=\w+>[\s\S]*?<\/function>/,
error: 'Invalid function syntax: use simple named tags, not <function=name> wrapper',
},
];
for (const pattern of patterns) {
const match = contentStr.match(pattern.regex);
if (match) {
return {
error: pattern.error,
examples: this.getCorrectFormatExamples(),
};
}
}
return null;
}
/**
* Generates correct format examples for error messages
*/
static getCorrectFormatExamples() {
return `Please use the native tool calling format provided by the system. The tools are already available to you - call them directly using the function calling interface.`;
}
}
//# sourceMappingURL=xml-parser.js.map