UNPKG

@nanocollective/nanocoder

Version:

A local-first CLI coding agent that brings the power of agentic coding tools like Claude Code and Gemini CLI to local models or controlled APIs like OpenRouter

248 lines 9.98 kB
import { ensureString } from '../utils/type-helpers.js'; /** * Parses XML-formatted tool calls from non-function-calling models * Expected format: <tool_name><param1>value1</param1><param2>value2</param2></tool_name> */ export class XMLToolCallParser { static TOOL_CALL_REGEX = /<(\w+)>(.*?)<\/\1>/gs; static PARAMETER_REGEX = /<(\w+)>(.*?)<\/\1>/gs; /** * Extracts tool calls from text content containing XML-formatted tool calls * Type-preserving: Accepts unknown type, converts to string for processing */ static parseToolCalls(content) { const toolCalls = []; let match; // Handle content that might be wrapped in markdown code blocks let processedContent = ensureString(content); const codeBlockMatch = processedContent.match(/```(?:\w+)?\s*\n?([\s\S]*?)\n?```/); if (codeBlockMatch && codeBlockMatch[1]) { processedContent = codeBlockMatch[1].trim(); } // Remove <tool_call> wrapper tags if present processedContent = processedContent.replace(/<\/?tool_call>/g, '').trim(); // Find all tool call blocks this.TOOL_CALL_REGEX.lastIndex = 0; // Reset regex state while ((match = this.TOOL_CALL_REGEX.exec(processedContent)) !== null) { const [fullMatch, toolName, innerXml] = match; // Skip if this is a generic "tool_call" tag that slipped through if (toolName === 'tool_call') { continue; } // Validate that this is a properly formed tool call if (!this.isValidToolCall(fullMatch, toolName)) { continue; } const parameters = this.parseParameters(innerXml); toolCalls.push({ toolName, parameters, }); } return toolCalls; } /** * Validates that a matched string is a proper tool call * Rejects partial matches, malformed syntax, and invalid structures */ static isValidToolCall(fullMatch, toolName) { // Reject common HTML tags that might be in model output const htmlTags = [ 'div', 'span', 'p', 'a', 'ul', 'ol', 'li', 'table', 'tr', 'td', 'th', 'thead', 'tbody', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'br', 'hr', 'strong', 'em', 'code', 'pre', 'blockquote', 'img', 'section', 'article', 'header', 'footer', 'nav', 'aside', ]; if (htmlTags.includes(toolName.toLowerCase())) { return false; } // Check for malformed attribute-style syntax like <function=name> or <parameter=name> if (fullMatch.includes('=')) { return false; } // Check if the closing tag is properly formed if (!fullMatch.endsWith(`</${toolName}>`)) { return false; } // Extract inner content between opening and closing tags const innerContent = fullMatch.substring(toolName.length + 2, fullMatch.length - (toolName.length + 3)); // Valid tool calls should contain parameter tags (other XML tags inside) // This prevents matching on standalone parameter tags from malformed XML const hasNestedTags = /<\w+>/.test(innerContent); // Tool names should contain underscores (snake_case convention for tools) // or be reasonably long compound words const hasUnderscore = toolName.includes('_'); // Allow if it has nested tags OR follows naming convention if (!hasNestedTags && !hasUnderscore) { return false; } return true; } /** * Parses parameters from inner XML content */ static parseParameters(innerXml) { const parameters = {}; let match; // Reset regex state this.PARAMETER_REGEX.lastIndex = 0; while ((match = this.PARAMETER_REGEX.exec(innerXml)) !== null) { const [, paramName, paramValue] = match; // Trim whitespace from parameter value const trimmedValue = paramValue.trim(); // Try to parse as JSON for complex objects/arrays try { parameters[paramName] = JSON.parse(trimmedValue); } catch { // If not valid JSON, use as string (preserving internal whitespace/newlines) parameters[paramName] = trimmedValue; } } return parameters; } /** * Converts parsed tool calls to the standard ToolCall format */ static convertToToolCalls(parsedCalls) { return parsedCalls.map((call, index) => ({ id: `xml_call_${index}`, function: { name: call.toolName, arguments: call.parameters, }, })); } /** * Removes XML tool call blocks from content, leaving only the text * Type-preserving: Accepts unknown type, converts to string for processing */ static removeToolCallsFromContent(content) { let cleanedContent = ensureString(content); // Remove all markdown code blocks that contain XML tool calls (using global flag) cleanedContent = cleanedContent.replace(/```(?:\w+)?\s*\n?([\s\S]*?)\n?```/g, (match, blockContent) => { if (blockContent) { // Reset regex and check if this block contains XML tool calls this.TOOL_CALL_REGEX.lastIndex = 0; const testResult = this.TOOL_CALL_REGEX.test(blockContent); this.TOOL_CALL_REGEX.lastIndex = 0; // Reset again after test if (testResult) { // This code block contains XML tool calls, remove it entirely return ''; } } // Keep blocks that don't contain XML tool calls return match; }); // Remove XML tool calls that aren't in code blocks this.TOOL_CALL_REGEX.lastIndex = 0; cleanedContent = cleanedContent.replace(this.TOOL_CALL_REGEX, ''); // Remove any <tool_call> wrapper tags that may be left behind cleanedContent = cleanedContent.replace(/<\/?tool_call>/g, ''); // Clean up whitespace artifacts left by removed tool calls cleanedContent = cleanedContent // Remove trailing whitespace from each line .replace(/[ \t]+$/gm, '') // Collapse multiple spaces (but not at start of line for indentation) .replace(/([^ \t\n]) {2,}/g, '$1 ') // Remove lines that are only whitespace .replace(/^[ \t]+$/gm, '') // Collapse 2+ consecutive blank lines to a single blank line .replace(/\n{3,}/g, '\n\n') .trim(); return cleanedContent; } /** * Checks if content contains XML-formatted tool calls * Type-preserving: Accepts unknown type, converts to string for processing */ static hasToolCalls(content) { // Use parseToolCalls with validation to ensure we only detect valid tool calls const toolCalls = this.parseToolCalls(content); return toolCalls.length > 0; } /** * Detects malformed XML tool call attempts and returns error details * Returns null if no malformed tool calls detected * Type-preserving: Accepts unknown type, converts to string for processing */ static detectMalformedToolCall(content) { // Type guard: ensure content is string for processing operations // BUT original type is preserved in memory via the ToolCall structure const contentStr = ensureString(content); // Common malformed patterns const patterns = [ { // [tool_use: name] or [Tool: name] syntax (common with some models like GLM) regex: /\[(?:tool_use|Tool):\s*(\w+)\]/i, error: 'Invalid syntax: [tool_use: name] or [Tool: name] format is not supported', }, { // <function=name> syntax regex: /<function=(\w+)>/, error: 'Invalid syntax: <function=name> is not supported', }, { // <parameter=name> syntax regex: /<parameter=(\w+)>/, error: 'Invalid syntax: <parameter=name> is not supported', }, { // Generic closing </parameter> without proper name regex: /<parameter=\w+>[\s\S]*?<\/parameter>/, error: 'Invalid parameter syntax: parameters must use named tags, not generic <parameter> wrapper', }, { // Generic closing </function> when <function=name> was used regex: /<function=\w+>[\s\S]*?<\/function>/, error: 'Invalid function syntax: use simple named tags, not <function=name> wrapper', }, ]; for (const pattern of patterns) { const match = contentStr.match(pattern.regex); if (match) { return { error: pattern.error, examples: this.getCorrectFormatExamples(), }; } } return null; } /** * Generates correct format examples for error messages */ static getCorrectFormatExamples() { return `Please use the native tool calling format provided by the system. The tools are already available to you - call them directly using the function calling interface.`; } } //# sourceMappingURL=xml-parser.js.map