UNPKG

@probelabs/probe

Version:

Node.js wrapper for the probe code search tool

190 lines (166 loc) 7.57 kB
/** * Shared XML parsing utilities used by both CLI/SDK and MCP modes * This module contains the core logic for thinking tag removal and attempt_complete recovery */ /** * Remove thinking tags and their content from XML string * Handles both closed and unclosed thinking tags * @param {string} xmlString - The XML string to clean * @returns {string} - Cleaned XML string without thinking tags */ export function removeThinkingTags(xmlString) { let result = xmlString; // Remove all properly closed thinking tags first result = result.replace(/<thinking>[\s\S]*?<\/thinking>/g, ''); // Handle unclosed thinking tags // Find any remaining <thinking> tag (which means it's unclosed) const thinkingIndex = result.indexOf('<thinking>'); if (thinkingIndex !== -1) { // Check if there's a tool tag after the thinking tag // We want to preserve tool tags even if they're after unclosed thinking const afterThinking = result.substring(thinkingIndex + '<thinking>'.length); // Look for any tool tags in the remaining content const toolPattern = /<(search|query|extract|listFiles|searchFiles|implement|attempt_completion|attempt_complete)>/; const toolMatch = afterThinking.match(toolPattern); if (toolMatch) { // Found a tool tag - remove thinking tag and its content up to the tool tag const toolStart = thinkingIndex + '<thinking>'.length + toolMatch.index; result = result.substring(0, thinkingIndex) + result.substring(toolStart); } else { // No tool tag found - remove everything from <thinking> onwards result = result.substring(0, thinkingIndex); } } return result.trim(); } /** * Extract thinking content for potential logging * @param {string} xmlString - The XML string to extract from * @returns {string|null} - Thinking content or null if not found */ export function extractThinkingContent(xmlString) { const thinkingMatch = xmlString.match(/<thinking>([\s\S]*?)<\/thinking>/); return thinkingMatch ? thinkingMatch[1].trim() : null; } /** * Check for attempt_complete recovery patterns and return standardized result * @param {string} cleanedXmlString - XML string with thinking tags already removed * @param {Array<string>} validTools - List of valid tool names * @returns {Object|null} - Standardized attempt_completion result or null */ export function checkAttemptCompleteRecovery(cleanedXmlString, validTools = []) { // Check for <attempt_completion> with content (with or without closing tag) // This handles: "<attempt_completion>content" or "<attempt_completion>content</attempt_completion>" // IMPORTANT: Use greedy match ([\s\S]*) instead of non-greedy ([\s\S]*?) to handle cases // where the content contains the string "</attempt_completion>" (e.g., in regex patterns or code examples). // We want to find the LAST occurrence of </attempt_completion>, not the first one. const openTagIndex = cleanedXmlString.indexOf('<attempt_completion>'); if (openTagIndex !== -1) { const afterOpenTag = cleanedXmlString.substring(openTagIndex + '<attempt_completion>'.length); const closeTagIndex = cleanedXmlString.lastIndexOf('</attempt_completion>'); let content; let hasClosingTag = false; if (closeTagIndex !== -1 && closeTagIndex >= openTagIndex + '<attempt_completion>'.length) { // Found a closing tag at or after the opening tag - extract content between them content = cleanedXmlString.substring( openTagIndex + '<attempt_completion>'.length, closeTagIndex ).trim(); hasClosingTag = true; } else { // No closing tag - use content from opening tag to end of string content = afterOpenTag.trim(); hasClosingTag = false; } if (content) { // If there's content after the tag, use it as the result return { toolName: 'attempt_completion', params: { result: content } }; } // If the tag exists but is empty: // - With closing tag (e.g., "<attempt_completion></attempt_completion>"): use empty string // - Without closing tag (e.g., "<attempt_completion>"): use previous response return { toolName: 'attempt_completion', params: { result: hasClosingTag ? '' : '__PREVIOUS_RESPONSE__' } }; } // Enhanced recovery logic for attempt_complete shorthand const attemptCompletePatterns = [ // Standard shorthand with optional whitespace /^<attempt_complete>\s*$/, // Empty with proper closing tag (common case from the logs) /^<attempt_complete>\s*<\/attempt_complete>\s*$/, // Self-closing variant /^<attempt_complete\s*\/>\s*$/, // Incomplete opening tag (missing closing bracket) /^<attempt_complete\s*$/, // With trailing content (extract just the tag part) - must come after empty tag pattern /^<attempt_complete>(.*)$/s, // Self-closing with trailing content /^<attempt_complete\s*\/>(.*)$/s ]; for (const pattern of attemptCompletePatterns) { const match = cleanedXmlString.match(pattern); if (match) { // Convert any form of attempt_complete to the standard format return { toolName: 'attempt_completion', params: { result: '__PREVIOUS_RESPONSE__' } }; } } // Additional recovery: check if the string contains attempt_complete anywhere // and treat the entire response as a completion signal if no other tool tags are found if (cleanedXmlString.includes('<attempt_complete') && !hasOtherToolTags(cleanedXmlString, validTools)) { // This handles malformed cases where attempt_complete appears but is broken return { toolName: 'attempt_completion', params: { result: '__PREVIOUS_RESPONSE__' } }; } return null; } /** * Helper function to check if the XML string contains other tool tags * @param {string} xmlString - The XML string to check * @param {string[]} validTools - List of valid tool names * @returns {boolean} - True if other tool tags are found */ function hasOtherToolTags(xmlString, validTools = []) { const defaultTools = ['search', 'query', 'extract', 'listFiles', 'searchFiles', 'implement', 'attempt_completion']; const toolsToCheck = validTools.length > 0 ? validTools : defaultTools; // Check for any tool tags other than attempt_complete variants for (const tool of toolsToCheck) { if (tool !== 'attempt_completion' && xmlString.includes(`<${tool}`)) { return true; } } return false; } /** * Apply the full thinking tag removal and attempt_complete recovery logic * This replicates the core logic from parseXmlToolCallWithThinking * @param {string} xmlString - The XML string to process * @param {Array<string>} validTools - List of valid tool names * @returns {Object} - Processing result with cleanedXml and potentialRecovery */ export function processXmlWithThinkingAndRecovery(xmlString, validTools = []) { // Extract thinking content if present (for potential logging or analysis) const thinkingContent = extractThinkingContent(xmlString); // Remove thinking tags and their content from the XML string const cleanedXmlString = removeThinkingTags(xmlString); // Check for attempt_complete recovery patterns const recoveryResult = checkAttemptCompleteRecovery(cleanedXmlString, validTools); // If debugging is enabled, log the thinking content if (process.env.DEBUG === '1' && thinkingContent) { console.log(`[DEBUG] AI Thinking Process:\n${thinkingContent}`); } return { cleanedXmlString, thinkingContent, recoveryResult }; }