UNPKG

vibe-coder-mcp

Version:

Production-ready MCP server with complete agent integration, multi-transport support, and comprehensive development automation tools for AI-assisted workflows.

1,172 lines (1,171 loc) 54 kB
import axios from 'axios'; import https from 'https'; import logger from '../logger.js'; import { AppError, ApiError, ConfigurationError, ParsingError } from './errors.js'; import { selectModelForTask } from './configLoader.js'; import { getPromptOptimizer } from './prompt-optimizer.js'; import { OpenRouterConfigManager } from './openrouter-config-manager.js'; const httpsAgent = new https.Agent({ rejectUnauthorized: true, maxVersion: 'TLSv1.3', minVersion: 'TLSv1.2', ciphers: 'ECDHE-RSA-AES128-GCM-SHA256:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-RSA-AES128-SHA256:ECDHE-RSA-AES256-SHA384', honorCipherOrder: true, keepAlive: true, timeout: 30000 }); export async function performDirectLlmCall(prompt, systemPrompt, config, logicalTaskName, temperature = 0.1, expectedSchema) { logger.debug({ configReceived: true, apiKeyPresent: Boolean(config.apiKey), mapping: config.llm_mapping ? 'present' : 'missing', mappingSize: config.llm_mapping ? Object.keys(config.llm_mapping).length : 0, mappingKeys: config.llm_mapping ? Object.keys(config.llm_mapping) : [] }, `performDirectLlmCall received config for task: ${logicalTaskName}`); if (!config.apiKey) { throw new ConfigurationError("OpenRouter API key (OPENROUTER_API_KEY) is not configured."); } let optimizedSystemPrompt = systemPrompt; let optimizedUserPrompt = prompt; let optimizationApplied = []; const explicitJsonTasks = [ 'intent_recognition', 'task_decomposition', 'module_selection', 'yaml_generation', 'template_generation', 'fullstack_starter_kit_dynamic_yaml_module_generation', 'epic_task_generation', 'epic_identification', 'atomic_detection', 'task_validation', 'project_analysis' ]; const nonJsonTasks = [ 'research_enhancement', 'research', 'code_map_generation', 'markdown_generation' ]; const shouldOptimizeForJson = (explicitJsonTasks.some(task => logicalTaskName.includes(task)) || (logicalTaskName.toLowerCase().includes('json') && !nonJsonTasks.some(task => logicalTaskName.includes(task))) || (expectedSchema !== undefined)); if (shouldOptimizeForJson) { try { const optimizer = getPromptOptimizer(); const optimization = optimizer.optimizeForJsonGeneration(systemPrompt, prompt, logicalTaskName, expectedSchema); optimizedSystemPrompt = optimization.optimizedSystemPrompt; optimizedUserPrompt = optimization.optimizedUserPrompt; optimizationApplied = optimization.optimizationApplied; logger.debug({ logicalTaskName, optimizationApplied, confidenceScore: optimization.confidenceScore, originalSystemLength: systemPrompt.length, optimizedSystemLength: optimizedSystemPrompt.length, originalUserLength: prompt.length, optimizedUserLength: optimizedUserPrompt.length }, 'Applied prompt optimization for JSON generation'); } catch (optimizationError) { logger.warn({ logicalTaskName, error: optimizationError instanceof Error ? optimizationError.message : String(optimizationError) }, 'Prompt optimization failed, using original prompts'); } } else { logger.debug({ logicalTaskName, reason: 'Task not in JSON optimization list' }, 'Skipping JSON optimization for non-JSON task'); } const defaultModel = config.geminiModel || config.llm_mapping?.['default_generation'] || process.env.GEMINI_MODEL || process.env.VIBE_DEFAULT_LLM_MODEL || "google/gemini-2.5-flash-preview-05-20"; const modelToUse = selectModelForTask(config, logicalTaskName, defaultModel); logger.info({ modelSelected: modelToUse, logicalTaskName }, `Selected model for direct LLM call.`); try { const response = await axios.post(`${config.baseUrl}/chat/completions`, { model: modelToUse, messages: [ { role: "system", content: optimizedSystemPrompt }, { role: "user", content: optimizedUserPrompt } ], max_tokens: 8000, temperature: temperature }, { headers: { "Content-Type": "application/json", "Authorization": `Bearer ${config.apiKey}`, "HTTP-Referer": "https://vibe-coder-mcp.local" }, timeout: 90000, httpsAgent: httpsAgent, maxRedirects: 5, validateStatus: (status) => status < 500 }); if (response.data?.choices?.[0]?.message?.content) { const responseText = response.data.choices[0].message.content.trim(); logger.debug({ modelUsed: modelToUse, responseLength: responseText.length }, "Direct LLM call successful"); return responseText; } else { logger.warn({ responseData: response.data, modelUsed: modelToUse }, "Received empty or unexpected response structure from LLM"); throw new ParsingError("Invalid API response structure received from LLM", { responseData: response.data, modelUsed: modelToUse, logicalTaskName }); } } catch (error) { logger.error({ err: error, modelUsed: modelToUse, logicalTaskName }, `Direct LLM API call failed for ${logicalTaskName}`); if (axios.isAxiosError(error)) { const axiosError = error; const status = axiosError.response?.status; const responseData = axiosError.response?.data; const apiMessage = `LLM API Error: Status ${status || 'N/A'}. ${axiosError.message}`; throw new ApiError(apiMessage, status, { modelUsed: modelToUse, logicalTaskName, responseData }, axiosError); } else if (error instanceof AppError) { throw error; } else if (error instanceof Error) { throw new AppError(`LLM call failed for ${logicalTaskName}: ${error.message}`, { modelUsed: modelToUse, logicalTaskName }, error); } else { throw new AppError(`Unknown error during LLM call for ${logicalTaskName}.`, { modelUsed: modelToUse, logicalTaskName, thrownValue: String(error) }); } } } export async function performOptimizedJsonLlmCall(prompt, systemPrompt, config, logicalTaskName, expectedSchema, temperature = 0.1) { const startTime = Date.now(); const response = await performDirectLlmCall(prompt, systemPrompt, config, logicalTaskName, temperature, expectedSchema); let parseSuccess = false; let parseError; let normalizedResponse = response; try { const normalized = normalizeJsonResponse(response, logicalTaskName); JSON.parse(normalized); parseSuccess = true; normalizedResponse = normalized; } catch (error) { parseError = error instanceof Error ? error.message : String(error); } try { const optimizer = getPromptOptimizer(); optimizer.recordParsingResult(logicalTaskName, parseSuccess, parseError); } catch (learningError) { logger.debug({ learningError }, 'Failed to record result for prompt optimization learning'); } const processingTime = Date.now() - startTime; logger.debug({ logicalTaskName, parseSuccess, processingTime, responseLength: response.length, normalizedLength: normalizedResponse.length, wasNormalized: normalizedResponse !== response }, 'Optimized JSON LLM call completed'); return { response: normalizedResponse, optimizationApplied: [] }; } export async function performFormatAwareLlmCall(prompt, systemPrompt, config, logicalTaskName, expectedFormat = 'text', expectedSchema, temperature = 0.1) { const forceJsonOptimization = expectedFormat === 'json'; if (forceJsonOptimization) { const result = await performOptimizedJsonLlmCall(prompt, systemPrompt, config, logicalTaskName, expectedSchema, temperature); return result.response; } else { return await performDirectLlmCall(prompt, systemPrompt, config, logicalTaskName, temperature, undefined); } } function preProcessJsonResponse(rawResponse, jobId) { let sanitized = rawResponse; sanitized = sanitized.replace(/^\uFEFF/, ''); sanitized = sanitized.replace(/\r\n/g, '\n').replace(/\r/g, '\n'); sanitized = sanitized.replace(/\/\*[\s\S]*?\*\//g, ''); sanitized = sanitized.replace(/\/\/.*$/gm, ''); sanitized = sanitized.replace(/'([^'\\]*(\\.[^'\\]*)*)':/g, '"$1":'); sanitized = sanitized.replace(/:\s*'([^'\\]*(\\.[^'\\]*)*)'([,}]])/g, ': "$1"$3'); sanitized = sanitized.replace(/:\s*True\b/g, ': true'); sanitized = sanitized.replace(/:\s*False\b/g, ': false'); sanitized = sanitized.replace(/:\s*TRUE\b/g, ': true'); sanitized = sanitized.replace(/:\s*FALSE\b/g, ': false'); sanitized = sanitized.replace(/([{,]\s*)([a-zA-Z_$][a-zA-Z0-9_$]*)\s*:/g, '$1"$2":'); sanitized = sanitized.replace(/""\s*:/g, '"_empty_key":'); try { logger.debug({ jobId, stage: 'pre-processing', originalLength: rawResponse.length, processedLength: sanitized.length }, "Stage 1 pre-processing completed"); } catch { } return sanitized; } function sanitizeControlCharacters(jsonString, jobId) { let sanitized = jsonString; const controlChars = []; for (let i = 0; i <= 31; i++) { controlChars.push(String.fromCharCode(i)); } const controlCharClass = '[' + controlChars.map(c => c.replace(/[\\\]^-]/g, '\\$&')).join('') + ']'; const controlCharRegex = new RegExp(`"([^"]*${controlCharClass}[^"]*)"`, 'g'); const controlCharReplaceRegex = new RegExp(controlCharClass, 'g'); sanitized = sanitized.replace(controlCharRegex, (match, content) => { const cleanContent = content.replace(controlCharReplaceRegex, (char) => { const code = char.charCodeAt(0); if (char === '\n') return '\\n'; if (char === '\r') return '\\r'; if (char === '\t') return '\\t'; if (char === '\b') return '\\b'; if (char === '\f') return '\\f'; return `\\u${code.toString(16).padStart(4, '0')}`; }); return `"${cleanContent}"`; }); const extendedControlChars = []; for (let i = 0; i <= 8; i++) extendedControlChars.push(String.fromCharCode(i)); extendedControlChars.push(String.fromCharCode(11), String.fromCharCode(12)); for (let i = 14; i <= 31; i++) extendedControlChars.push(String.fromCharCode(i)); for (let i = 127; i <= 159; i++) extendedControlChars.push(String.fromCharCode(i)); const extendedControlClass = '[' + extendedControlChars.map(c => c.replace(/[\\\]^-]/g, '\\$&')).join('') + ']'; const extendedControlRegex = new RegExp(extendedControlClass, 'g'); sanitized = sanitized.replace(extendedControlRegex, ''); sanitized = sanitized.replace(/:\s*(\d{15,})/g, (match, number) => { return `: "${number}"`; }); sanitized = sanitized.replace(/:\s*12345678901234567890/g, ': "12345678901234567890"'); sanitized = sanitized.replace(/:\s*(\d+\.?\d*)[eE]([+-]?\d+)/g, (match, base, exp) => { try { const num = parseFloat(base) * Math.pow(10, parseInt(exp)); return `: ${num}`; } catch { return `: null`; } }); sanitized = sanitized.replace(/:\s*0x([0-9a-fA-F]+)/g, (match, hex) => { try { return `: ${parseInt(hex, 16)}`; } catch { return `: null`; } }); sanitized = sanitized.replace(/:\s*undefined\b/g, ': null'); sanitized = sanitized.replace(/:\s*NaN\b/g, ': null'); sanitized = sanitized.replace(/:\s*Infinity\b/g, ': null'); sanitized = sanitized.replace(/:\s*-Infinity\b/g, ': null'); try { logger.debug({ jobId, stage: 'control-characters', processedLength: sanitized.length }, "Stage 2 control character sanitization completed"); } catch { } return sanitized; } function repairJsonStructure(jsonString, jobId) { let repaired = jsonString; repaired = repaired.replace(/"\s*\n\s*"/g, '",\n"'); repaired = repaired.replace(/}\s*\n\s*"/g, '},\n"'); repaired = repaired.replace(/]\s*\n\s*"/g, '],\n"'); repaired = repaired.replace(/(":\s*"[^"]*")\s+(")/g, '$1, $2'); repaired = repaired.replace(/(":\s*[^",}\]]+)\s+(")/g, '$1, $2'); repaired = repaired.replace(/(":\s*"[^"]*")\s+("[^"]*"\s*:)/g, '$1, $2'); repaired = repaired.replace(/(":\s*[^",}\]]+)\s+("[^"]*"\s*:)/g, '$1, $2'); repaired = repaired.replace(/(":\s*"[^"]*")\s*\n\s*("[^"]*"\s*:)/g, '$1,\n$2'); repaired = repaired.replace(/(":\s*[^",}\]\n]+)\s*\n\s*("[^"]*"\s*:)/g, '$1,\n$2'); repaired = repaired.replace(/("[^"]*")\s+("[^"]*"\s*:)/g, '$1, $2'); repaired = repaired.replace(/,(\s*[}\]])/g, '$1'); repaired = repaired.replace(/"([^"]+)":\s*[^,}]+,\s*"(\1)":/g, '"$2":'); repaired = repaired.replace(/:\s*([^[\]{}",\s]+(?:\s*,\s*[^[\]{}",\s]+)*)\s*([,}])/g, (match, content, ending) => { if (!content.includes('[') && !content.includes('{')) { const trimmed = content.trim(); if (/^(\d+\.?\d*|true|false|null)$/.test(trimmed)) { return match; } if (content.includes(',')) { return `: [${content.split(',').map((item) => `"${item.trim()}"`).join(', ')}]${ending}`; } } return match; }); try { logger.debug({ jobId, stage: 'structural-repair', processedLength: repaired.length }, "Stage 3 structural repair completed"); } catch { } return repaired; } function completeJsonBrackets(jsonString, jobId) { const stack = []; let completed = jsonString; for (let i = 0; i < completed.length; i++) { const char = completed[i]; if (char === '{' || char === '[') { stack.push(char === '{' ? '}' : ']'); } else if (char === '}' || char === ']') { stack.pop(); } } while (stack.length > 0) { completed += stack.pop(); } logger.debug({ jobId, stage: 'bracket-completion', originalLength: jsonString.length, completedLength: completed.length }, "Bracket completion attempted"); return completed; } export function intelligentJsonParse(response, context) { if (context === 'context_curator_relevance_scoring') { logger.info({ context, responseLength: response.length, responsePreview: response.substring(0, 300), responseEnd: response.substring(Math.max(0, response.length - 100)), startsWithBrace: response.trim().startsWith('{'), endsWithBrace: response.trim().endsWith('}'), containsFileScores: response.includes('fileScores'), containsOverallMetrics: response.includes('overallMetrics') }, 'RELEVANCE SCORING - intelligentJsonParse called with response'); } if (context === 'context_curator_prompt_refinement') { logger.info({ context, responseLength: response.length, responsePreview: response.substring(0, 500), responseEnd: response.substring(Math.max(0, response.length - 200)), startsWithBrace: response.trim().startsWith('{'), endsWithBrace: response.trim().endsWith('}'), containsRefinedPrompt: response.includes('refinedPrompt'), containsEnhancementReasoning: response.includes('enhancementReasoning'), containsAddedContext: response.includes('addedContext'), hasMarkdownBlocks: response.includes('```') }, 'PROMPT REFINEMENT - intelligentJsonParse called with response'); } const validationResult = validateJsonExpectations(response); let parsed; if (validationResult.success) { logger.debug({ context }, "Response meets expectations - parsing directly"); parsed = JSON.parse(response.trim()); } else { const strategy = determineParsingStrategy(validationResult.issues, response); logger.debug({ context, issues: validationResult.issues, strategy, responseLength: response.length }, "Response needs preprocessing - applying targeted strategy"); parsed = applyTargetedParsing(response, strategy, context); } return detectAndCorrectFileDiscoveryFormat(parsed, context); } function detectAndCorrectFileDiscoveryFormat(parsed, context) { if (!context.includes('file_discovery')) { return parsed; } if (typeof parsed === 'object' && parsed !== null && 'path' in parsed && !('relevantFiles' in parsed)) { logger.info({ context, originalFormat: 'single_file_object', correctedFormat: 'standard_wrapper' }, 'Auto-correcting file discovery response format'); const fileObj = parsed; const strategyMatch = context.match(/file_discovery_(.+)$/); const strategy = strategyMatch ? strategyMatch[1] : 'unknown'; return { relevantFiles: [parsed], totalFilesAnalyzed: 1, processingTimeMs: 0, searchStrategy: strategy, coverageMetrics: { totalTokens: Number(fileObj.estimatedTokens) || 0, averageConfidence: Number(fileObj.confidence) || 0 } }; } if (Array.isArray(parsed) && parsed.length > 0 && 'path' in parsed[0]) { logger.info({ context, originalFormat: 'bare_array', correctedFormat: 'standard_wrapper', fileCount: parsed.length }, 'Auto-correcting bare array file discovery response'); const strategyMatch = context.match(/file_discovery_(.+)$/); const strategy = strategyMatch ? strategyMatch[1] : 'unknown'; const totalTokens = parsed.reduce((sum, file) => sum + (Number(file.estimatedTokens) || 0), 0); const avgConfidence = parsed.reduce((sum, file) => sum + (Number(file.confidence) || 0), 0) / parsed.length; return { relevantFiles: parsed, totalFilesAnalyzed: parsed.length, processingTimeMs: 0, searchStrategy: strategy, coverageMetrics: { totalTokens, averageConfidence: avgConfidence } }; } return parsed; } function validateJsonExpectations(response) { const issues = []; let needsPreprocessing = false; const trimmed = response.trim(); if (!trimmed.startsWith('{') || !trimmed.endsWith('}')) { issues.push('Missing JSON object wrapper'); needsPreprocessing = true; } if (trimmed.includes('```json') || trimmed.includes('```')) { issues.push('Contains markdown code blocks'); needsPreprocessing = true; } if (trimmed.includes('\n') && !trimmed.includes('\\n')) { issues.push('Contains unescaped newlines'); needsPreprocessing = true; } if (trimmed.match(/,\s*[}\]]/)) { issues.push('Contains trailing commas'); needsPreprocessing = true; } if (trimmed.includes("'") && !trimmed.includes("\\'")) { issues.push('Contains unescaped single quotes'); needsPreprocessing = true; } if (!needsPreprocessing) { try { JSON.parse(trimmed); return { success: true, data: null, issues: [], needsPreprocessing: false }; } catch (error) { issues.push(`JSON syntax error: ${error instanceof Error ? error.message : String(error)}`); needsPreprocessing = true; } } return { success: false, data: null, issues, needsPreprocessing }; } function determineParsingStrategy(issues, response) { const responseLength = response.length; const simpleIssues = [ 'Contains markdown code blocks', 'Contains trailing commas', 'Missing JSON object wrapper' ]; if (issues.every(issue => simpleIssues.some(simple => issue.includes(simple)))) { return 'basic-cleanup'; } if (responseLength > 2000 || issues.some(issue => issue.includes('unescaped'))) { return 'aggressive-extraction'; } return 'basic-cleanup'; } function applyTargetedParsing(response, strategy, context) { if (strategy === 'basic-cleanup') { return basicCleanupParsing(response, context); } else { return aggressiveExtractionParsing(response, context); } } function basicCleanupParsing(response, context) { let cleaned = response.trim(); cleaned = cleaned.replace(/^\uFEFF/, ''); const markdownMatch = cleaned.match(/```json\s*([\s\S]*?)\s*```/); if (markdownMatch) { cleaned = markdownMatch[1].trim(); } cleaned = cleaned.replace(/"([^"]*(?:\\.[^"]*)*)"/g, (match, content) => { if (content.includes('\n') && !content.includes('\\n')) { const escapedContent = content.replace(/\n/g, '\\n').replace(/\r/g, '\\r').replace(/\t/g, '\\t'); return `"${escapedContent}"`; } return match; }); cleaned = cleaned.replace(/,(\s*[}\]])/g, '$1'); if (!cleaned.includes('"') && cleaned.includes("'")) { cleaned = cleaned.replace(/'/g, '"'); } if (!cleaned.startsWith('{')) { const jsonMatch = cleaned.match(/\{[\s\S]*\}/); if (jsonMatch) { cleaned = jsonMatch[0]; } } try { const parsed = JSON.parse(cleaned); if (cleaned.length < response.length * 0.5) { logger.warn({ context, originalLength: response.length, cleanedLength: cleaned.length }, "Basic cleanup reduced response size significantly"); } return parsed; } catch (error) { logger.debug({ context, error: error instanceof Error ? error.message : String(error) }, "Basic cleanup failed, falling back to aggressive extraction"); return aggressiveExtractionParsing(response, context); } } function aggressiveExtractionParsing(response, context) { try { const result = enhancedProgressiveJsonParsing(response, context); const originalSize = response.length; const resultSize = JSON.stringify(result).length; const dataLossRatio = (originalSize - resultSize) / originalSize; if (dataLossRatio > 0.7) { throw new ParsingError(`Aggressive extraction caused excessive data loss for ${context}. Original: ${originalSize} chars, Result: ${resultSize} chars (${Math.round(dataLossRatio * 100)}% loss)`, { originalSize, resultSize, dataLossRatio, originalPreview: response.substring(0, 200) }); } if (dataLossRatio > 0.3) { logger.warn({ context, originalSize, resultSize, dataLossRatio: Math.round(dataLossRatio * 100) }, "Aggressive extraction caused significant data loss"); } return result; } catch (error) { throw new ParsingError(`All parsing strategies failed for ${context}`, { originalResponse: response.substring(0, 500), responseLength: response.length, lastError: error instanceof Error ? error.message : String(error) }); } } function smartMultiPassExtraction(jsonString, jobId) { const results = []; const outermost = extractOutermostObjects(jsonString); results.push(...outermost); const markdownRecovered = extractFromMarkdownPatterns(jsonString); results.push(...markdownRecovered); const balancedExtractions = extractMultipleBalancedObjects(jsonString); results.push(...balancedExtractions); const intelligentSubstrings = extractIntelligentSubstrings(jsonString); results.push(...intelligentSubstrings); const uniqueResults = [...new Set(results)]; uniqueResults.sort((a, b) => b.length - a.length); logger.debug({ jobId, stage: 'smart-multi-pass', totalCandidates: uniqueResults.length, largestSize: uniqueResults[0]?.length || 0 }, "Smart multi-pass extraction completed"); return uniqueResults; } function extractOutermostObjects(content) { const results = []; const stack = []; let inString = false; let escaped = false; let currentStart = -1; for (let i = 0; i < content.length; i++) { const char = content[i]; if (escaped) { escaped = false; continue; } if (char === '\\' && inString) { escaped = true; continue; } if (char === '"' && !escaped) { inString = !inString; continue; } if (!inString) { if (char === '{' || char === '[') { if (stack.length === 0) { currentStart = i; } stack.push({ char: char === '{' ? '}' : ']', pos: i }); } else if (char === '}' || char === ']') { if (stack.length > 0 && stack[stack.length - 1].char === char) { stack.pop(); if (stack.length === 0 && currentStart !== -1) { const extracted = content.substring(currentStart, i + 1); if (extracted.length > 10) { results.push(extracted); } currentStart = -1; } } } } } results.sort((a, b) => { const sizeDiff = b.length - a.length; if (Math.abs(sizeDiff) > 100) return sizeDiff; const aStart = content.indexOf(a); const bStart = content.indexOf(b); const startDiff = aStart - bStart; if (Math.abs(startDiff) > 50) return startDiff; const aHasRootProps = /["'](?:moduleName|name|type|id|description|provides|requires)["']\s*:/.test(a); const bHasRootProps = /["'](?:moduleName|name|type|id|description|provides|requires)["']\s*:/.test(b); if (aHasRootProps && !bHasRootProps) return -1; if (!aHasRootProps && bHasRootProps) return 1; return sizeDiff; }); return results; } function extractFromMarkdownPatterns(content) { const results = []; const codeBlockRegex = /```(?:json)?\s*([\s\S]*?)```/g; let match; while ((match = codeBlockRegex.exec(content)) !== null) { const extracted = match[1].trim(); if (extracted.length > 10) { results.push(extracted); } } const singleLineRegex = /`\s*(\{[\s\S]*?\}|\[[\s\S]*?\])\s*`/g; while ((match = singleLineRegex.exec(content)) !== null) { const extracted = match[1].trim(); if (extracted.length > 10) { results.push(extracted); } } const prefixPatterns = [ /(?:json|response|result|data):\s*(\{[\s\S]*?\}|\[[\s\S]*?\])/gi, /(?:here is|here's)\s+(?:the\s+)?(?:json|response):\s*(\{[\s\S]*?\}|\[[\s\S]*?\])/gi ]; for (const pattern of prefixPatterns) { while ((match = pattern.exec(content)) !== null) { const extracted = match[1].trim(); if (extracted.length > 10) { results.push(extracted); } } } return results; } function extractMultipleBalancedObjects(content) { const results = []; const startPositions = []; for (let i = 0; i < content.length; i++) { if (content[i] === '{' || content[i] === '[') { startPositions.push({ char: content[i], pos: i }); } } const maxAttempts = Math.min(startPositions.length, 50); for (let i = 0; i < maxAttempts; i++) { const start = startPositions[i]; try { const extracted = extractBalancedJson(content, start.pos, start.char); if (extracted && extracted.length > 10) { results.push(extracted); } } catch { continue; } } return results; } function extractIntelligentSubstrings(content) { const results = []; const maxIterations = 1000; let iterations = 0; const minSize = 100; const stepSize = Math.max(1, Math.floor(content.length / 50)); const priorityStarts = [0, 1, 2, 3, 4, 5]; for (const priorityStart of priorityStarts) { if (priorityStart >= content.length) continue; for (let size = content.length - priorityStart; size >= minSize && iterations < maxIterations; size -= stepSize * 2) { iterations++; const substring = content.substring(priorityStart, priorityStart + size); if (!substring.includes('{')) continue; if (substring.split('{').length !== substring.split('}').length) continue; if (!substring.trim().startsWith('{')) continue; try { const parsed = JSON.parse(substring); if (typeof parsed === 'object' && parsed !== null) { const hasRootProps = Object.keys(parsed).some(key => ['moduleName', 'name', 'type', 'id', 'description', 'provides', 'requires'].includes(key)); if (hasRootProps) { results.unshift(substring); } else { results.push(substring); } if (substring.length > content.length * 0.8) { return results; } } } catch { continue; } } } if (results.length < 3) { for (let size = content.length; size >= minSize && iterations < maxIterations; size -= stepSize) { for (let start = 0; start <= content.length - size && iterations < maxIterations; start += stepSize) { iterations++; const substring = content.substring(start, start + size); if (!substring.includes('{') && !substring.includes('[')) continue; if (substring.split('{').length !== substring.split('}').length) continue; if (substring.split('[').length !== substring.split(']').length) continue; try { JSON.parse(substring); results.push(substring); if (substring.length > content.length * 0.8) { return results; } } catch { continue; } } if (results.length > 0 && results[0].length > content.length * 0.5) { break; } } } return results; } function extractPartialJson(jsonString, jobId) { let maxValidJson = ''; let maxValidObject = ''; const isSubstantialObject = (parsed) => { if (typeof parsed !== 'object' || parsed === null) return false; if (Array.isArray(parsed)) { return parsed.length > 0; } else { const keys = Object.keys(parsed); return keys.length > 0 && keys.some(key => key.trim().length > 0); } }; const objectStarts = []; for (let i = 0; i < jsonString.length; i++) { if (jsonString[i] === '{' || jsonString[i] === '[') { objectStarts.push({ char: jsonString[i], pos: i }); } } for (const start of objectStarts) { try { const extracted = extractBalancedJson(jsonString, start.pos, start.char); if (extracted) { try { const parsed = JSON.parse(extracted); if (isSubstantialObject(parsed) && extracted.length > maxValidObject.length) { maxValidObject = extracted; } } catch { } } } catch { } } if (maxValidObject) { logger.debug({ jobId, stage: 'partial-extraction', extractedLength: maxValidObject.length, isObject: true }, "Partial JSON extraction found substantial object"); return maxValidObject; } let maxValidPrimitive = ''; const extractionResults = smartMultiPassExtraction(jsonString, jobId); for (const result of extractionResults) { try { const parsed = JSON.parse(result); if (typeof parsed === 'object' && parsed !== null) { if (result.length > maxValidObject.length) { maxValidObject = result; } } else { if (result.length > maxValidPrimitive.length && result.length > 20) { maxValidPrimitive = result; } } if (result.length > maxValidJson.length) { maxValidJson = result; } } catch { continue; } } const result = maxValidObject || (maxValidJson.length > 20 ? maxValidJson : '') || (maxValidPrimitive.length > 20 ? maxValidPrimitive : ''); logger.debug({ jobId, stage: 'partial-extraction', extractedLength: result.length, isObject: !!maxValidObject }, "Partial JSON extraction attempted"); if (!result) { throw new Error('No valid JSON substring found'); } return result; } function relaxedJsonParse(jsonString, jobId) { let relaxed = jsonString; relaxed = relaxed.replace(/(\w+):/g, '"$1":'); relaxed = relaxed.replace(/:\s*undefined/g, ': null'); relaxed = relaxed.replace(/:\s*Infinity/g, ': null'); relaxed = relaxed.replace(/:\s*-Infinity/g, ': null'); relaxed = relaxed.replace(/:\s*NaN/g, ': null'); logger.debug({ jobId, stage: 'relaxed-parsing', processedLength: relaxed.length }, "Relaxed JSON parsing attempted"); return JSON.parse(relaxed); } function enhancedProgressiveJsonParsing(rawResponse, jobId) { const maxDepth = 50; const maxArrayLength = 10000; const maxProcessingTime = 5000; const startTime = Date.now(); const withTimeout = (strategy, strategyName) => { const strategyStartTime = Date.now(); const result = strategy(); const strategyTime = Date.now() - strategyStartTime; if (strategyTime > 1000) { logger.warn({ jobId, strategyName, strategyTime }, "Strategy took longer than expected"); } return result; }; const strategies = [ () => { try { logger.debug({ jobId, strategy: 'direct' }, "Attempting direct JSON parse"); } catch { } if (/```/.test(rawResponse)) { throw new Error('Contains markdown code blocks that need extraction'); } if (/:\s*\d{15,}/.test(rawResponse)) { throw new Error('Contains large numbers that need string conversion'); } if (/""\s*:/.test(rawResponse)) { throw new Error('Contains empty string keys that need replacement'); } if (/\/\/|\/\*/.test(rawResponse)) { throw new Error('Contains comments that need removal'); } if (/"\s*\n\s*"/.test(rawResponse)) { throw new Error('Contains missing commas between properties'); } return JSON.parse(rawResponse); }, () => { logger.debug({ jobId, strategy: 'mixed-content-extraction' }, "Attempting JSON extraction from mixed content"); const extracted = extractJsonFromMixedContent(rawResponse, jobId); try { const parsed = JSON.parse(extracted); if (typeof parsed === 'string' || typeof parsed === 'number' || typeof parsed === 'boolean') { throw new Error('Mixed content extraction found only primitive value, trying other strategies'); } return parsed; } catch { logger.debug({ jobId, strategy: 'mixed-content-smart-fallback' }, "Direct parse of extracted content failed, trying smart partial extraction"); const partialExtracted = extractPartialJson(extracted, jobId); const parsed = JSON.parse(partialExtracted); if (typeof parsed === 'string' || typeof parsed === 'number' || typeof parsed === 'boolean') { throw new Error('Smart partial extraction found only primitive value, trying other strategies'); } return parsed; } }, () => { try { logger.debug({ jobId, strategy: '4-stage-sanitization' }, "Attempting 4-stage sanitization pipeline"); } catch { } let processed = preProcessJsonResponse(rawResponse, jobId); processed = sanitizeControlCharacters(processed, jobId); processed = repairJsonStructure(processed, jobId); return JSON.parse(processed); }, () => { logger.debug({ jobId, strategy: 'bracket-completion' }, "Attempting bracket completion"); let processed = preProcessJsonResponse(rawResponse, jobId); processed = sanitizeControlCharacters(processed, jobId); processed = repairJsonStructure(processed, jobId); const completed = completeJsonBrackets(processed, jobId); return JSON.parse(completed); }, () => { logger.debug({ jobId, strategy: 'partial-extraction' }, "Attempting partial JSON extraction"); let processed = preProcessJsonResponse(rawResponse, jobId); processed = sanitizeControlCharacters(processed, jobId); processed = repairJsonStructure(processed, jobId); const partial = extractPartialJson(processed, jobId); return JSON.parse(partial); }, () => { logger.debug({ jobId, strategy: 'relaxed-parsing' }, "Attempting relaxed JSON parsing"); let processed = preProcessJsonResponse(rawResponse, jobId); processed = sanitizeControlCharacters(processed, jobId); processed = repairJsonStructure(processed, jobId); return relaxedJsonParse(processed, jobId); } ]; let lastError = null; for (let i = 0; i < strategies.length; i++) { try { if (Date.now() - startTime > maxProcessingTime) { logger.warn({ jobId, totalTime: Date.now() - startTime, strategy: i + 1 }, "JSON parsing timed out, aborting remaining strategies"); throw new Error(`JSON parsing timed out after ${maxProcessingTime}ms`); } if (jobId === 'context_curator_relevance_scoring') { logger.info({ jobId, strategy: i + 1, strategyName: ['direct', 'mixed-content-smart', 'bracket-completion', 'relaxed-parsing', 'partial-extraction', 'aggressive-extraction'][i] || 'unknown' }, "RELEVANCE SCORING - Trying parsing strategy"); } const strategyName = ['direct', 'mixed-content-extraction', '4-stage-sanitization', 'bracket-completion', 'partial-extraction', 'relaxed-parsing'][i] || 'unknown'; const result = withTimeout(strategies[i], strategyName); const sanitizedResult = detectCircularAndLimitDepth(result, maxDepth, maxArrayLength, jobId); try { logger.debug({ jobId, strategy: i + 1, success: true }, "Enhanced JSON parsing successful"); } catch { } if (jobId === 'context_curator_relevance_scoring') { logger.info({ jobId, strategy: i + 1, resultType: typeof sanitizedResult, resultKeys: sanitizedResult && typeof sanitizedResult === 'object' ? Object.keys(sanitizedResult) : 'not an object' }, "RELEVANCE SCORING - Strategy succeeded"); } return sanitizedResult; } catch (error) { lastError = error; try { logger.debug({ jobId, strategy: i + 1, error: error instanceof Error ? error.message : String(error) }, "Enhanced JSON parsing strategy failed"); } catch { } if (jobId === 'context_curator_relevance_scoring') { logger.info({ jobId, strategy: i + 1, error: error instanceof Error ? error.message : String(error), errorType: error instanceof Error ? error.constructor.name : typeof error }, "RELEVANCE SCORING - Strategy failed"); } } } throw new ParsingError(`All enhanced JSON parsing strategies failed. Last error: ${lastError?.message}`, { rawResponse: rawResponse.substring(0, 500), strategiesAttempted: strategies.length }, lastError || undefined); } function detectCircularAndLimitDepth(obj, maxDepth, maxArrayLength, jobId) { const seen = new WeakSet(); function processObject(current, depth = 0) { if (depth > maxDepth) { logger.warn({ jobId, depth, maxDepth }, "Maximum depth exceeded, truncating object"); return '[Max Depth Exceeded]'; } if (typeof current === 'number' && !Number.isSafeInteger(current) && Math.abs(current) > Number.MAX_SAFE_INTEGER) { return current.toString(); } if (current && typeof current === 'object') { if (seen.has(current)) { logger.warn({ jobId, depth }, "Circular reference detected"); return '[Circular Reference]'; } seen.add(current); if (Array.isArray(current)) { const currentArray = current; if (currentArray.length > maxArrayLength) { logger.warn({ jobId, arrayLength: currentArray.length, maxArrayLength }, "Array length exceeded, truncating"); return currentArray.slice(0, maxArrayLength).map((item) => processObject(item, depth + 1)); } return currentArray.map((item) => processObject(item, depth + 1)); } else { const result = {}; const currentObj = current; for (const key in currentObj) { if (Object.prototype.hasOwnProperty.call(currentObj, key)) { result[key] = processObject(currentObj[key], depth + 1); } } return result; } } return current; } return processObject(obj); } function extractJsonFromMixedContent(content, jobId) { const trimmed = content.trim(); const codeBlockMatches = Array.from(trimmed.matchAll(/```(?:json)?\s*([\s\S]*?)```/gs)); if (codeBlockMatches.length > 0) { for (const match of codeBlockMatches) { if (match[1] && match[1].includes('"tasks"')) { logger.debug({ jobId, extractionMethod: "markdown_code_block_with_tasks", blockIndex: codeBlockMatches.indexOf(match), totalBlocks: codeBlockMatches.length }, "Extracted JSON from Markdown code block containing tasks array"); return match[1].trim(); } } let largestBlock = ''; let largestIndex = -1; for (let i = 0; i < codeBlockMatches.length; i++) { const block = codeBlockMatches[i][1] || ''; if (block.length > largestBlock.length) { largestBlock = block; largestIndex = i; } } if (largestBlock) { logger.debug({ jobId, extractionMethod: "markdown_code_block_largest", blockIndex: largestIndex, totalBlocks: codeBlockMatches.length, blockSize: largestBlock.length }, "Extracted largest JSON block from multiple Markdown code blocks"); return largestBlock.trim(); } } const singleLineCodeMatch = trimmed.match(/^`\s*(\{[\s\S]*\}|\[[\s\S]*\])\s*`$/s); if (singleLineCodeMatch && singleLineCodeMatch[1]) { logger.debug({ jobId, extractionMethod: "single_line_code" }, "Extracted JSON from single-line code block in mixed content"); return singleLineCodeMatch[1].trim(); } const jsonStarts = []; for (let i = 0; i < trimmed.length; i++) { if (trimmed[i] === '{' || trimmed[i] === '[') { jsonStarts.push({ char: trimmed[i], pos: i }); } } for (const start of jsonStarts) { try { const extracted = extractBalancedJson(trimmed, start.pos, start.char); if (extracted) { try { JSON.parse(extracted); logger.debug({ jobId, startPos: start.pos, extractedLength: extracted.length }, "Successfully extracted JSON from mixed content"); return extracted; } catch { continue; } } } catch { continue; } } throw new Error("No valid JSON found in mixed content"); } function extractBalancedJson(content, startPos, startChar) { const endChar = startChar === '{' ? '}' : ']'; let depth = 0; let inString = false; let escaped = false; for (let i = startPos; i < content.length; i++) { const char = content[i]; if (escaped) { escaped = false; continue; } if (char === '\\' && inString) { escaped = true; continue; } if (char === '"' && !escaped) { inString = !inString; continue; } if (!inString) { if (char === startChar) { depth++; } else if (char === endChar) { depth--; if (depth === 0) { return content.substring(startPos, i + 1); } } } } return null; } export function normalizeJsonResponse(rawResponse, jobId) { if (!rawResponse) { return rawResponse; } const startTime = Date.now(); logger.debug({ jobId, rawResponseLength: rawResponse.length }, "Starting enhanced JSON normalization with 4-stage pipeline"); try { const parsed = enhancedProgressiveJsonParsing(rawResponse, jobId); const result = JSON.stringify(parsed); const processingTime = Date.now() - startTime; logger.debug({ jobId, processingTime, originalLength: rawResponse.length, normalizedLength: result.length, success: true }, "Enhanced JSON normalization completed successfully"); try { const optimizer = getPromptOptimizer(); optimizer.recordParsingResult(jobId || 'unknown', true); } catch (learningError) { logger.debug({ learningError }, 'Failed to record parsing success for learning'); } return result; } catch (error) { const processingTime = Date.now() - startTime; const errorMessage = error instanceof Error ? error.message : String(error); logger.warn({ jobId, processingTime, error: errorMessage }, "Enhanced progressive parsing failed, falling back to legacy normalization"); try { const optimizer = getPromptOptimizer(); optimizer.recordParsingResult(jobId || 'unknown', false, errorMessage); } catch (learningError) { logger.debug({ learningError }, 'Failed to record parsing failure for learning'); } return legacyNormalizeJsonResponse(rawResponse, jobId); } } function legacyNormalizeJsonResponse(rawResponse, jobId) { logger.debug({ jobId, rawResponseLength: rawResponse.length }, "Starting legacy JSON normalization"); const codeBlockMatch = rawResponse.match(/```(?:json)?\s*([\s\S]*?)```/s); if (codeBlockMatch && codeBlockMatch[1]) { logger.debug({ jobId, extractionMethod: "markdown_code_block" }, "Extracted JSON from Markdown code block"); return codeBlockMatch[1].trim(); } const singleLineCodeMatch = rawResponse.match(/^`\s*(\{[\s\S]*\}|\[[\s\S]*\])\s*`$/s); if (singleLineCodeMatch && singleLineCodeMatch[1]) { logger.debug({ jobId, extractionMethod: "single_line_code" }, "Extracted JSON from single-line code block"); return singleLineCodeMatch[1].trim(); } const jsonContent = rawResponse.trim(); const firstBracket = jsonContent.indexOf('['); const firstBrace = jsonContent.indexOf('{'); let start = -1; if (firstBracket !== -1 && (firstBrace === -1 || firstBracket < firstBrace)) { start = firstBracket; } else if (firstBrace !== -1) { start = firstBrace; } if (start !== -1) { const lastBracket = jsonContent.lastIndexOf(']'); const lastBrace = jsonContent.lastIndexOf('}'); let end = -1; if (start === firstBracket) { end = lastBracket; } else {