vibe-coder-mcp
Version:
Production-ready MCP server with complete agent integration, multi-transport support, and comprehensive development automation tools for AI-assisted workflows.
1,172 lines (1,171 loc) • 54 kB
JavaScript
import axios from 'axios';
import https from 'https';
import logger from '../logger.js';
import { AppError, ApiError, ConfigurationError, ParsingError } from './errors.js';
import { selectModelForTask } from './configLoader.js';
import { getPromptOptimizer } from './prompt-optimizer.js';
import { OpenRouterConfigManager } from './openrouter-config-manager.js';
const httpsAgent = new https.Agent({
rejectUnauthorized: true,
maxVersion: 'TLSv1.3',
minVersion: 'TLSv1.2',
ciphers: 'ECDHE-RSA-AES128-GCM-SHA256:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-RSA-AES128-SHA256:ECDHE-RSA-AES256-SHA384',
honorCipherOrder: true,
keepAlive: true,
timeout: 30000
});
export async function performDirectLlmCall(prompt, systemPrompt, config, logicalTaskName, temperature = 0.1, expectedSchema) {
logger.debug({
configReceived: true,
apiKeyPresent: Boolean(config.apiKey),
mapping: config.llm_mapping ? 'present' : 'missing',
mappingSize: config.llm_mapping ? Object.keys(config.llm_mapping).length : 0,
mappingKeys: config.llm_mapping ? Object.keys(config.llm_mapping) : []
}, `performDirectLlmCall received config for task: ${logicalTaskName}`);
if (!config.apiKey) {
throw new ConfigurationError("OpenRouter API key (OPENROUTER_API_KEY) is not configured.");
}
let optimizedSystemPrompt = systemPrompt;
let optimizedUserPrompt = prompt;
let optimizationApplied = [];
const explicitJsonTasks = [
'intent_recognition',
'task_decomposition',
'module_selection',
'yaml_generation',
'template_generation',
'fullstack_starter_kit_dynamic_yaml_module_generation',
'epic_task_generation',
'epic_identification',
'atomic_detection',
'task_validation',
'project_analysis'
];
const nonJsonTasks = [
'research_enhancement',
'research',
'code_map_generation',
'markdown_generation'
];
const shouldOptimizeForJson = (explicitJsonTasks.some(task => logicalTaskName.includes(task)) ||
(logicalTaskName.toLowerCase().includes('json') && !nonJsonTasks.some(task => logicalTaskName.includes(task))) ||
(expectedSchema !== undefined));
if (shouldOptimizeForJson) {
try {
const optimizer = getPromptOptimizer();
const optimization = optimizer.optimizeForJsonGeneration(systemPrompt, prompt, logicalTaskName, expectedSchema);
optimizedSystemPrompt = optimization.optimizedSystemPrompt;
optimizedUserPrompt = optimization.optimizedUserPrompt;
optimizationApplied = optimization.optimizationApplied;
logger.debug({
logicalTaskName,
optimizationApplied,
confidenceScore: optimization.confidenceScore,
originalSystemLength: systemPrompt.length,
optimizedSystemLength: optimizedSystemPrompt.length,
originalUserLength: prompt.length,
optimizedUserLength: optimizedUserPrompt.length
}, 'Applied prompt optimization for JSON generation');
}
catch (optimizationError) {
logger.warn({
logicalTaskName,
error: optimizationError instanceof Error ? optimizationError.message : String(optimizationError)
}, 'Prompt optimization failed, using original prompts');
}
}
else {
logger.debug({
logicalTaskName,
reason: 'Task not in JSON optimization list'
}, 'Skipping JSON optimization for non-JSON task');
}
const defaultModel = config.geminiModel ||
config.llm_mapping?.['default_generation'] ||
process.env.GEMINI_MODEL ||
process.env.VIBE_DEFAULT_LLM_MODEL ||
"google/gemini-2.5-flash-preview-05-20";
const modelToUse = selectModelForTask(config, logicalTaskName, defaultModel);
logger.info({ modelSelected: modelToUse, logicalTaskName }, `Selected model for direct LLM call.`);
try {
const response = await axios.post(`${config.baseUrl}/chat/completions`, {
model: modelToUse,
messages: [
{ role: "system", content: optimizedSystemPrompt },
{ role: "user", content: optimizedUserPrompt }
],
max_tokens: 8000,
temperature: temperature
}, {
headers: {
"Content-Type": "application/json",
"Authorization": `Bearer ${config.apiKey}`,
"HTTP-Referer": "https://vibe-coder-mcp.local"
},
timeout: 90000,
httpsAgent: httpsAgent,
maxRedirects: 5,
validateStatus: (status) => status < 500
});
if (response.data?.choices?.[0]?.message?.content) {
const responseText = response.data.choices[0].message.content.trim();
logger.debug({ modelUsed: modelToUse, responseLength: responseText.length }, "Direct LLM call successful");
return responseText;
}
else {
logger.warn({ responseData: response.data, modelUsed: modelToUse }, "Received empty or unexpected response structure from LLM");
throw new ParsingError("Invalid API response structure received from LLM", { responseData: response.data, modelUsed: modelToUse, logicalTaskName });
}
}
catch (error) {
logger.error({ err: error, modelUsed: modelToUse, logicalTaskName }, `Direct LLM API call failed for ${logicalTaskName}`);
if (axios.isAxiosError(error)) {
const axiosError = error;
const status = axiosError.response?.status;
const responseData = axiosError.response?.data;
const apiMessage = `LLM API Error: Status ${status || 'N/A'}. ${axiosError.message}`;
throw new ApiError(apiMessage, status, { modelUsed: modelToUse, logicalTaskName, responseData }, axiosError);
}
else if (error instanceof AppError) {
throw error;
}
else if (error instanceof Error) {
throw new AppError(`LLM call failed for ${logicalTaskName}: ${error.message}`, { modelUsed: modelToUse, logicalTaskName }, error);
}
else {
throw new AppError(`Unknown error during LLM call for ${logicalTaskName}.`, { modelUsed: modelToUse, logicalTaskName, thrownValue: String(error) });
}
}
}
export async function performOptimizedJsonLlmCall(prompt, systemPrompt, config, logicalTaskName, expectedSchema, temperature = 0.1) {
const startTime = Date.now();
const response = await performDirectLlmCall(prompt, systemPrompt, config, logicalTaskName, temperature, expectedSchema);
let parseSuccess = false;
let parseError;
let normalizedResponse = response;
try {
const normalized = normalizeJsonResponse(response, logicalTaskName);
JSON.parse(normalized);
parseSuccess = true;
normalizedResponse = normalized;
}
catch (error) {
parseError = error instanceof Error ? error.message : String(error);
}
try {
const optimizer = getPromptOptimizer();
optimizer.recordParsingResult(logicalTaskName, parseSuccess, parseError);
}
catch (learningError) {
logger.debug({ learningError }, 'Failed to record result for prompt optimization learning');
}
const processingTime = Date.now() - startTime;
logger.debug({
logicalTaskName,
parseSuccess,
processingTime,
responseLength: response.length,
normalizedLength: normalizedResponse.length,
wasNormalized: normalizedResponse !== response
}, 'Optimized JSON LLM call completed');
return {
response: normalizedResponse,
optimizationApplied: []
};
}
export async function performFormatAwareLlmCall(prompt, systemPrompt, config, logicalTaskName, expectedFormat = 'text', expectedSchema, temperature = 0.1) {
const forceJsonOptimization = expectedFormat === 'json';
if (forceJsonOptimization) {
const result = await performOptimizedJsonLlmCall(prompt, systemPrompt, config, logicalTaskName, expectedSchema, temperature);
return result.response;
}
else {
return await performDirectLlmCall(prompt, systemPrompt, config, logicalTaskName, temperature, undefined);
}
}
function preProcessJsonResponse(rawResponse, jobId) {
let sanitized = rawResponse;
sanitized = sanitized.replace(/^\uFEFF/, '');
sanitized = sanitized.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
sanitized = sanitized.replace(/\/\*[\s\S]*?\*\//g, '');
sanitized = sanitized.replace(/\/\/.*$/gm, '');
sanitized = sanitized.replace(/'([^'\\]*(\\.[^'\\]*)*)':/g, '"$1":');
sanitized = sanitized.replace(/:\s*'([^'\\]*(\\.[^'\\]*)*)'([,}]])/g, ': "$1"$3');
sanitized = sanitized.replace(/:\s*True\b/g, ': true');
sanitized = sanitized.replace(/:\s*False\b/g, ': false');
sanitized = sanitized.replace(/:\s*TRUE\b/g, ': true');
sanitized = sanitized.replace(/:\s*FALSE\b/g, ': false');
sanitized = sanitized.replace(/([{,]\s*)([a-zA-Z_$][a-zA-Z0-9_$]*)\s*:/g, '$1"$2":');
sanitized = sanitized.replace(/""\s*:/g, '"_empty_key":');
try {
logger.debug({ jobId, stage: 'pre-processing', originalLength: rawResponse.length, processedLength: sanitized.length }, "Stage 1 pre-processing completed");
}
catch {
}
return sanitized;
}
function sanitizeControlCharacters(jsonString, jobId) {
let sanitized = jsonString;
const controlChars = [];
for (let i = 0; i <= 31; i++) {
controlChars.push(String.fromCharCode(i));
}
const controlCharClass = '[' + controlChars.map(c => c.replace(/[\\\]^-]/g, '\\$&')).join('') + ']';
const controlCharRegex = new RegExp(`"([^"]*${controlCharClass}[^"]*)"`, 'g');
const controlCharReplaceRegex = new RegExp(controlCharClass, 'g');
sanitized = sanitized.replace(controlCharRegex, (match, content) => {
const cleanContent = content.replace(controlCharReplaceRegex, (char) => {
const code = char.charCodeAt(0);
if (char === '\n')
return '\\n';
if (char === '\r')
return '\\r';
if (char === '\t')
return '\\t';
if (char === '\b')
return '\\b';
if (char === '\f')
return '\\f';
return `\\u${code.toString(16).padStart(4, '0')}`;
});
return `"${cleanContent}"`;
});
const extendedControlChars = [];
for (let i = 0; i <= 8; i++)
extendedControlChars.push(String.fromCharCode(i));
extendedControlChars.push(String.fromCharCode(11), String.fromCharCode(12));
for (let i = 14; i <= 31; i++)
extendedControlChars.push(String.fromCharCode(i));
for (let i = 127; i <= 159; i++)
extendedControlChars.push(String.fromCharCode(i));
const extendedControlClass = '[' + extendedControlChars.map(c => c.replace(/[\\\]^-]/g, '\\$&')).join('') + ']';
const extendedControlRegex = new RegExp(extendedControlClass, 'g');
sanitized = sanitized.replace(extendedControlRegex, '');
sanitized = sanitized.replace(/:\s*(\d{15,})/g, (match, number) => {
return `: "${number}"`;
});
sanitized = sanitized.replace(/:\s*12345678901234567890/g, ': "12345678901234567890"');
sanitized = sanitized.replace(/:\s*(\d+\.?\d*)[eE]([+-]?\d+)/g, (match, base, exp) => {
try {
const num = parseFloat(base) * Math.pow(10, parseInt(exp));
return `: ${num}`;
}
catch {
return `: null`;
}
});
sanitized = sanitized.replace(/:\s*0x([0-9a-fA-F]+)/g, (match, hex) => {
try {
return `: ${parseInt(hex, 16)}`;
}
catch {
return `: null`;
}
});
sanitized = sanitized.replace(/:\s*undefined\b/g, ': null');
sanitized = sanitized.replace(/:\s*NaN\b/g, ': null');
sanitized = sanitized.replace(/:\s*Infinity\b/g, ': null');
sanitized = sanitized.replace(/:\s*-Infinity\b/g, ': null');
try {
logger.debug({ jobId, stage: 'control-characters', processedLength: sanitized.length }, "Stage 2 control character sanitization completed");
}
catch {
}
return sanitized;
}
function repairJsonStructure(jsonString, jobId) {
let repaired = jsonString;
repaired = repaired.replace(/"\s*\n\s*"/g, '",\n"');
repaired = repaired.replace(/}\s*\n\s*"/g, '},\n"');
repaired = repaired.replace(/]\s*\n\s*"/g, '],\n"');
repaired = repaired.replace(/(":\s*"[^"]*")\s+(")/g, '$1, $2');
repaired = repaired.replace(/(":\s*[^",}\]]+)\s+(")/g, '$1, $2');
repaired = repaired.replace(/(":\s*"[^"]*")\s+("[^"]*"\s*:)/g, '$1, $2');
repaired = repaired.replace(/(":\s*[^",}\]]+)\s+("[^"]*"\s*:)/g, '$1, $2');
repaired = repaired.replace(/(":\s*"[^"]*")\s*\n\s*("[^"]*"\s*:)/g, '$1,\n$2');
repaired = repaired.replace(/(":\s*[^",}\]\n]+)\s*\n\s*("[^"]*"\s*:)/g, '$1,\n$2');
repaired = repaired.replace(/("[^"]*")\s+("[^"]*"\s*:)/g, '$1, $2');
repaired = repaired.replace(/,(\s*[}\]])/g, '$1');
repaired = repaired.replace(/"([^"]+)":\s*[^,}]+,\s*"(\1)":/g, '"$2":');
repaired = repaired.replace(/:\s*([^[\]{}",\s]+(?:\s*,\s*[^[\]{}",\s]+)*)\s*([,}])/g, (match, content, ending) => {
if (!content.includes('[') && !content.includes('{')) {
const trimmed = content.trim();
if (/^(\d+\.?\d*|true|false|null)$/.test(trimmed)) {
return match;
}
if (content.includes(',')) {
return `: [${content.split(',').map((item) => `"${item.trim()}"`).join(', ')}]${ending}`;
}
}
return match;
});
try {
logger.debug({ jobId, stage: 'structural-repair', processedLength: repaired.length }, "Stage 3 structural repair completed");
}
catch {
}
return repaired;
}
function completeJsonBrackets(jsonString, jobId) {
const stack = [];
let completed = jsonString;
for (let i = 0; i < completed.length; i++) {
const char = completed[i];
if (char === '{' || char === '[') {
stack.push(char === '{' ? '}' : ']');
}
else if (char === '}' || char === ']') {
stack.pop();
}
}
while (stack.length > 0) {
completed += stack.pop();
}
logger.debug({ jobId, stage: 'bracket-completion', originalLength: jsonString.length, completedLength: completed.length }, "Bracket completion attempted");
return completed;
}
export function intelligentJsonParse(response, context) {
if (context === 'context_curator_relevance_scoring') {
logger.info({
context,
responseLength: response.length,
responsePreview: response.substring(0, 300),
responseEnd: response.substring(Math.max(0, response.length - 100)),
startsWithBrace: response.trim().startsWith('{'),
endsWithBrace: response.trim().endsWith('}'),
containsFileScores: response.includes('fileScores'),
containsOverallMetrics: response.includes('overallMetrics')
}, 'RELEVANCE SCORING - intelligentJsonParse called with response');
}
if (context === 'context_curator_prompt_refinement') {
logger.info({
context,
responseLength: response.length,
responsePreview: response.substring(0, 500),
responseEnd: response.substring(Math.max(0, response.length - 200)),
startsWithBrace: response.trim().startsWith('{'),
endsWithBrace: response.trim().endsWith('}'),
containsRefinedPrompt: response.includes('refinedPrompt'),
containsEnhancementReasoning: response.includes('enhancementReasoning'),
containsAddedContext: response.includes('addedContext'),
hasMarkdownBlocks: response.includes('```')
}, 'PROMPT REFINEMENT - intelligentJsonParse called with response');
}
const validationResult = validateJsonExpectations(response);
let parsed;
if (validationResult.success) {
logger.debug({ context }, "Response meets expectations - parsing directly");
parsed = JSON.parse(response.trim());
}
else {
const strategy = determineParsingStrategy(validationResult.issues, response);
logger.debug({
context,
issues: validationResult.issues,
strategy,
responseLength: response.length
}, "Response needs preprocessing - applying targeted strategy");
parsed = applyTargetedParsing(response, strategy, context);
}
return detectAndCorrectFileDiscoveryFormat(parsed, context);
}
function detectAndCorrectFileDiscoveryFormat(parsed, context) {
if (!context.includes('file_discovery')) {
return parsed;
}
if (typeof parsed === 'object' && parsed !== null && 'path' in parsed && !('relevantFiles' in parsed)) {
logger.info({
context,
originalFormat: 'single_file_object',
correctedFormat: 'standard_wrapper'
}, 'Auto-correcting file discovery response format');
const fileObj = parsed;
const strategyMatch = context.match(/file_discovery_(.+)$/);
const strategy = strategyMatch ? strategyMatch[1] : 'unknown';
return {
relevantFiles: [parsed],
totalFilesAnalyzed: 1,
processingTimeMs: 0,
searchStrategy: strategy,
coverageMetrics: {
totalTokens: Number(fileObj.estimatedTokens) || 0,
averageConfidence: Number(fileObj.confidence) || 0
}
};
}
if (Array.isArray(parsed) && parsed.length > 0 && 'path' in parsed[0]) {
logger.info({
context,
originalFormat: 'bare_array',
correctedFormat: 'standard_wrapper',
fileCount: parsed.length
}, 'Auto-correcting bare array file discovery response');
const strategyMatch = context.match(/file_discovery_(.+)$/);
const strategy = strategyMatch ? strategyMatch[1] : 'unknown';
const totalTokens = parsed.reduce((sum, file) => sum + (Number(file.estimatedTokens) || 0), 0);
const avgConfidence = parsed.reduce((sum, file) => sum + (Number(file.confidence) || 0), 0) / parsed.length;
return {
relevantFiles: parsed,
totalFilesAnalyzed: parsed.length,
processingTimeMs: 0,
searchStrategy: strategy,
coverageMetrics: {
totalTokens,
averageConfidence: avgConfidence
}
};
}
return parsed;
}
function validateJsonExpectations(response) {
const issues = [];
let needsPreprocessing = false;
const trimmed = response.trim();
if (!trimmed.startsWith('{') || !trimmed.endsWith('}')) {
issues.push('Missing JSON object wrapper');
needsPreprocessing = true;
}
if (trimmed.includes('```json') || trimmed.includes('```')) {
issues.push('Contains markdown code blocks');
needsPreprocessing = true;
}
if (trimmed.includes('\n') && !trimmed.includes('\\n')) {
issues.push('Contains unescaped newlines');
needsPreprocessing = true;
}
if (trimmed.match(/,\s*[}\]]/)) {
issues.push('Contains trailing commas');
needsPreprocessing = true;
}
if (trimmed.includes("'") && !trimmed.includes("\\'")) {
issues.push('Contains unescaped single quotes');
needsPreprocessing = true;
}
if (!needsPreprocessing) {
try {
JSON.parse(trimmed);
return { success: true, data: null, issues: [], needsPreprocessing: false };
}
catch (error) {
issues.push(`JSON syntax error: ${error instanceof Error ? error.message : String(error)}`);
needsPreprocessing = true;
}
}
return {
success: false,
data: null,
issues,
needsPreprocessing
};
}
function determineParsingStrategy(issues, response) {
const responseLength = response.length;
const simpleIssues = [
'Contains markdown code blocks',
'Contains trailing commas',
'Missing JSON object wrapper'
];
if (issues.every(issue => simpleIssues.some(simple => issue.includes(simple)))) {
return 'basic-cleanup';
}
if (responseLength > 2000 || issues.some(issue => issue.includes('unescaped'))) {
return 'aggressive-extraction';
}
return 'basic-cleanup';
}
function applyTargetedParsing(response, strategy, context) {
if (strategy === 'basic-cleanup') {
return basicCleanupParsing(response, context);
}
else {
return aggressiveExtractionParsing(response, context);
}
}
function basicCleanupParsing(response, context) {
let cleaned = response.trim();
cleaned = cleaned.replace(/^\uFEFF/, '');
const markdownMatch = cleaned.match(/```json\s*([\s\S]*?)\s*```/);
if (markdownMatch) {
cleaned = markdownMatch[1].trim();
}
cleaned = cleaned.replace(/"([^"]*(?:\\.[^"]*)*)"/g, (match, content) => {
if (content.includes('\n') && !content.includes('\\n')) {
const escapedContent = content.replace(/\n/g, '\\n').replace(/\r/g, '\\r').replace(/\t/g, '\\t');
return `"${escapedContent}"`;
}
return match;
});
cleaned = cleaned.replace(/,(\s*[}\]])/g, '$1');
if (!cleaned.includes('"') && cleaned.includes("'")) {
cleaned = cleaned.replace(/'/g, '"');
}
if (!cleaned.startsWith('{')) {
const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
if (jsonMatch) {
cleaned = jsonMatch[0];
}
}
try {
const parsed = JSON.parse(cleaned);
if (cleaned.length < response.length * 0.5) {
logger.warn({
context,
originalLength: response.length,
cleanedLength: cleaned.length
}, "Basic cleanup reduced response size significantly");
}
return parsed;
}
catch (error) {
logger.debug({ context, error: error instanceof Error ? error.message : String(error) }, "Basic cleanup failed, falling back to aggressive extraction");
return aggressiveExtractionParsing(response, context);
}
}
function aggressiveExtractionParsing(response, context) {
try {
const result = enhancedProgressiveJsonParsing(response, context);
const originalSize = response.length;
const resultSize = JSON.stringify(result).length;
const dataLossRatio = (originalSize - resultSize) / originalSize;
if (dataLossRatio > 0.7) {
throw new ParsingError(`Aggressive extraction caused excessive data loss for ${context}. Original: ${originalSize} chars, Result: ${resultSize} chars (${Math.round(dataLossRatio * 100)}% loss)`, { originalSize, resultSize, dataLossRatio, originalPreview: response.substring(0, 200) });
}
if (dataLossRatio > 0.3) {
logger.warn({
context,
originalSize,
resultSize,
dataLossRatio: Math.round(dataLossRatio * 100)
}, "Aggressive extraction caused significant data loss");
}
return result;
}
catch (error) {
throw new ParsingError(`All parsing strategies failed for ${context}`, {
originalResponse: response.substring(0, 500),
responseLength: response.length,
lastError: error instanceof Error ? error.message : String(error)
});
}
}
function smartMultiPassExtraction(jsonString, jobId) {
const results = [];
const outermost = extractOutermostObjects(jsonString);
results.push(...outermost);
const markdownRecovered = extractFromMarkdownPatterns(jsonString);
results.push(...markdownRecovered);
const balancedExtractions = extractMultipleBalancedObjects(jsonString);
results.push(...balancedExtractions);
const intelligentSubstrings = extractIntelligentSubstrings(jsonString);
results.push(...intelligentSubstrings);
const uniqueResults = [...new Set(results)];
uniqueResults.sort((a, b) => b.length - a.length);
logger.debug({
jobId,
stage: 'smart-multi-pass',
totalCandidates: uniqueResults.length,
largestSize: uniqueResults[0]?.length || 0
}, "Smart multi-pass extraction completed");
return uniqueResults;
}
function extractOutermostObjects(content) {
const results = [];
const stack = [];
let inString = false;
let escaped = false;
let currentStart = -1;
for (let i = 0; i < content.length; i++) {
const char = content[i];
if (escaped) {
escaped = false;
continue;
}
if (char === '\\' && inString) {
escaped = true;
continue;
}
if (char === '"' && !escaped) {
inString = !inString;
continue;
}
if (!inString) {
if (char === '{' || char === '[') {
if (stack.length === 0) {
currentStart = i;
}
stack.push({ char: char === '{' ? '}' : ']', pos: i });
}
else if (char === '}' || char === ']') {
if (stack.length > 0 && stack[stack.length - 1].char === char) {
stack.pop();
if (stack.length === 0 && currentStart !== -1) {
const extracted = content.substring(currentStart, i + 1);
if (extracted.length > 10) {
results.push(extracted);
}
currentStart = -1;
}
}
}
}
}
results.sort((a, b) => {
const sizeDiff = b.length - a.length;
if (Math.abs(sizeDiff) > 100)
return sizeDiff;
const aStart = content.indexOf(a);
const bStart = content.indexOf(b);
const startDiff = aStart - bStart;
if (Math.abs(startDiff) > 50)
return startDiff;
const aHasRootProps = /["'](?:moduleName|name|type|id|description|provides|requires)["']\s*:/.test(a);
const bHasRootProps = /["'](?:moduleName|name|type|id|description|provides|requires)["']\s*:/.test(b);
if (aHasRootProps && !bHasRootProps)
return -1;
if (!aHasRootProps && bHasRootProps)
return 1;
return sizeDiff;
});
return results;
}
function extractFromMarkdownPatterns(content) {
const results = [];
const codeBlockRegex = /```(?:json)?\s*([\s\S]*?)```/g;
let match;
while ((match = codeBlockRegex.exec(content)) !== null) {
const extracted = match[1].trim();
if (extracted.length > 10) {
results.push(extracted);
}
}
const singleLineRegex = /`\s*(\{[\s\S]*?\}|\[[\s\S]*?\])\s*`/g;
while ((match = singleLineRegex.exec(content)) !== null) {
const extracted = match[1].trim();
if (extracted.length > 10) {
results.push(extracted);
}
}
const prefixPatterns = [
/(?:json|response|result|data):\s*(\{[\s\S]*?\}|\[[\s\S]*?\])/gi,
/(?:here is|here's)\s+(?:the\s+)?(?:json|response):\s*(\{[\s\S]*?\}|\[[\s\S]*?\])/gi
];
for (const pattern of prefixPatterns) {
while ((match = pattern.exec(content)) !== null) {
const extracted = match[1].trim();
if (extracted.length > 10) {
results.push(extracted);
}
}
}
return results;
}
function extractMultipleBalancedObjects(content) {
const results = [];
const startPositions = [];
for (let i = 0; i < content.length; i++) {
if (content[i] === '{' || content[i] === '[') {
startPositions.push({ char: content[i], pos: i });
}
}
const maxAttempts = Math.min(startPositions.length, 50);
for (let i = 0; i < maxAttempts; i++) {
const start = startPositions[i];
try {
const extracted = extractBalancedJson(content, start.pos, start.char);
if (extracted && extracted.length > 10) {
results.push(extracted);
}
}
catch {
continue;
}
}
return results;
}
function extractIntelligentSubstrings(content) {
const results = [];
const maxIterations = 1000;
let iterations = 0;
const minSize = 100;
const stepSize = Math.max(1, Math.floor(content.length / 50));
const priorityStarts = [0, 1, 2, 3, 4, 5];
for (const priorityStart of priorityStarts) {
if (priorityStart >= content.length)
continue;
for (let size = content.length - priorityStart; size >= minSize && iterations < maxIterations; size -= stepSize * 2) {
iterations++;
const substring = content.substring(priorityStart, priorityStart + size);
if (!substring.includes('{'))
continue;
if (substring.split('{').length !== substring.split('}').length)
continue;
if (!substring.trim().startsWith('{'))
continue;
try {
const parsed = JSON.parse(substring);
if (typeof parsed === 'object' && parsed !== null) {
const hasRootProps = Object.keys(parsed).some(key => ['moduleName', 'name', 'type', 'id', 'description', 'provides', 'requires'].includes(key));
if (hasRootProps) {
results.unshift(substring);
}
else {
results.push(substring);
}
if (substring.length > content.length * 0.8) {
return results;
}
}
}
catch {
continue;
}
}
}
if (results.length < 3) {
for (let size = content.length; size >= minSize && iterations < maxIterations; size -= stepSize) {
for (let start = 0; start <= content.length - size && iterations < maxIterations; start += stepSize) {
iterations++;
const substring = content.substring(start, start + size);
if (!substring.includes('{') && !substring.includes('['))
continue;
if (substring.split('{').length !== substring.split('}').length)
continue;
if (substring.split('[').length !== substring.split(']').length)
continue;
try {
JSON.parse(substring);
results.push(substring);
if (substring.length > content.length * 0.8) {
return results;
}
}
catch {
continue;
}
}
if (results.length > 0 && results[0].length > content.length * 0.5) {
break;
}
}
}
return results;
}
function extractPartialJson(jsonString, jobId) {
let maxValidJson = '';
let maxValidObject = '';
const isSubstantialObject = (parsed) => {
if (typeof parsed !== 'object' || parsed === null)
return false;
if (Array.isArray(parsed)) {
return parsed.length > 0;
}
else {
const keys = Object.keys(parsed);
return keys.length > 0 && keys.some(key => key.trim().length > 0);
}
};
const objectStarts = [];
for (let i = 0; i < jsonString.length; i++) {
if (jsonString[i] === '{' || jsonString[i] === '[') {
objectStarts.push({ char: jsonString[i], pos: i });
}
}
for (const start of objectStarts) {
try {
const extracted = extractBalancedJson(jsonString, start.pos, start.char);
if (extracted) {
try {
const parsed = JSON.parse(extracted);
if (isSubstantialObject(parsed) && extracted.length > maxValidObject.length) {
maxValidObject = extracted;
}
}
catch {
}
}
}
catch {
}
}
if (maxValidObject) {
logger.debug({ jobId, stage: 'partial-extraction', extractedLength: maxValidObject.length, isObject: true }, "Partial JSON extraction found substantial object");
return maxValidObject;
}
let maxValidPrimitive = '';
const extractionResults = smartMultiPassExtraction(jsonString, jobId);
for (const result of extractionResults) {
try {
const parsed = JSON.parse(result);
if (typeof parsed === 'object' && parsed !== null) {
if (result.length > maxValidObject.length) {
maxValidObject = result;
}
}
else {
if (result.length > maxValidPrimitive.length && result.length > 20) {
maxValidPrimitive = result;
}
}
if (result.length > maxValidJson.length) {
maxValidJson = result;
}
}
catch {
continue;
}
}
const result = maxValidObject || (maxValidJson.length > 20 ? maxValidJson : '') || (maxValidPrimitive.length > 20 ? maxValidPrimitive : '');
logger.debug({ jobId, stage: 'partial-extraction', extractedLength: result.length, isObject: !!maxValidObject }, "Partial JSON extraction attempted");
if (!result) {
throw new Error('No valid JSON substring found');
}
return result;
}
function relaxedJsonParse(jsonString, jobId) {
let relaxed = jsonString;
relaxed = relaxed.replace(/(\w+):/g, '"$1":');
relaxed = relaxed.replace(/:\s*undefined/g, ': null');
relaxed = relaxed.replace(/:\s*Infinity/g, ': null');
relaxed = relaxed.replace(/:\s*-Infinity/g, ': null');
relaxed = relaxed.replace(/:\s*NaN/g, ': null');
logger.debug({ jobId, stage: 'relaxed-parsing', processedLength: relaxed.length }, "Relaxed JSON parsing attempted");
return JSON.parse(relaxed);
}
function enhancedProgressiveJsonParsing(rawResponse, jobId) {
const maxDepth = 50;
const maxArrayLength = 10000;
const maxProcessingTime = 5000;
const startTime = Date.now();
const withTimeout = (strategy, strategyName) => {
const strategyStartTime = Date.now();
const result = strategy();
const strategyTime = Date.now() - strategyStartTime;
if (strategyTime > 1000) {
logger.warn({ jobId, strategyName, strategyTime }, "Strategy took longer than expected");
}
return result;
};
const strategies = [
() => {
try {
logger.debug({ jobId, strategy: 'direct' }, "Attempting direct JSON parse");
}
catch { }
if (/```/.test(rawResponse)) {
throw new Error('Contains markdown code blocks that need extraction');
}
if (/:\s*\d{15,}/.test(rawResponse)) {
throw new Error('Contains large numbers that need string conversion');
}
if (/""\s*:/.test(rawResponse)) {
throw new Error('Contains empty string keys that need replacement');
}
if (/\/\/|\/\*/.test(rawResponse)) {
throw new Error('Contains comments that need removal');
}
if (/"\s*\n\s*"/.test(rawResponse)) {
throw new Error('Contains missing commas between properties');
}
return JSON.parse(rawResponse);
},
() => {
logger.debug({ jobId, strategy: 'mixed-content-extraction' }, "Attempting JSON extraction from mixed content");
const extracted = extractJsonFromMixedContent(rawResponse, jobId);
try {
const parsed = JSON.parse(extracted);
if (typeof parsed === 'string' || typeof parsed === 'number' || typeof parsed === 'boolean') {
throw new Error('Mixed content extraction found only primitive value, trying other strategies');
}
return parsed;
}
catch {
logger.debug({ jobId, strategy: 'mixed-content-smart-fallback' }, "Direct parse of extracted content failed, trying smart partial extraction");
const partialExtracted = extractPartialJson(extracted, jobId);
const parsed = JSON.parse(partialExtracted);
if (typeof parsed === 'string' || typeof parsed === 'number' || typeof parsed === 'boolean') {
throw new Error('Smart partial extraction found only primitive value, trying other strategies');
}
return parsed;
}
},
() => {
try {
logger.debug({ jobId, strategy: '4-stage-sanitization' }, "Attempting 4-stage sanitization pipeline");
}
catch { }
let processed = preProcessJsonResponse(rawResponse, jobId);
processed = sanitizeControlCharacters(processed, jobId);
processed = repairJsonStructure(processed, jobId);
return JSON.parse(processed);
},
() => {
logger.debug({ jobId, strategy: 'bracket-completion' }, "Attempting bracket completion");
let processed = preProcessJsonResponse(rawResponse, jobId);
processed = sanitizeControlCharacters(processed, jobId);
processed = repairJsonStructure(processed, jobId);
const completed = completeJsonBrackets(processed, jobId);
return JSON.parse(completed);
},
() => {
logger.debug({ jobId, strategy: 'partial-extraction' }, "Attempting partial JSON extraction");
let processed = preProcessJsonResponse(rawResponse, jobId);
processed = sanitizeControlCharacters(processed, jobId);
processed = repairJsonStructure(processed, jobId);
const partial = extractPartialJson(processed, jobId);
return JSON.parse(partial);
},
() => {
logger.debug({ jobId, strategy: 'relaxed-parsing' }, "Attempting relaxed JSON parsing");
let processed = preProcessJsonResponse(rawResponse, jobId);
processed = sanitizeControlCharacters(processed, jobId);
processed = repairJsonStructure(processed, jobId);
return relaxedJsonParse(processed, jobId);
}
];
let lastError = null;
for (let i = 0; i < strategies.length; i++) {
try {
if (Date.now() - startTime > maxProcessingTime) {
logger.warn({ jobId, totalTime: Date.now() - startTime, strategy: i + 1 }, "JSON parsing timed out, aborting remaining strategies");
throw new Error(`JSON parsing timed out after ${maxProcessingTime}ms`);
}
if (jobId === 'context_curator_relevance_scoring') {
logger.info({ jobId, strategy: i + 1, strategyName: ['direct', 'mixed-content-smart', 'bracket-completion', 'relaxed-parsing', 'partial-extraction', 'aggressive-extraction'][i] || 'unknown' }, "RELEVANCE SCORING - Trying parsing strategy");
}
const strategyName = ['direct', 'mixed-content-extraction', '4-stage-sanitization', 'bracket-completion', 'partial-extraction', 'relaxed-parsing'][i] || 'unknown';
const result = withTimeout(strategies[i], strategyName);
const sanitizedResult = detectCircularAndLimitDepth(result, maxDepth, maxArrayLength, jobId);
try {
logger.debug({ jobId, strategy: i + 1, success: true }, "Enhanced JSON parsing successful");
}
catch { }
if (jobId === 'context_curator_relevance_scoring') {
logger.info({ jobId, strategy: i + 1, resultType: typeof sanitizedResult, resultKeys: sanitizedResult && typeof sanitizedResult === 'object' ? Object.keys(sanitizedResult) : 'not an object' }, "RELEVANCE SCORING - Strategy succeeded");
}
return sanitizedResult;
}
catch (error) {
lastError = error;
try {
logger.debug({ jobId, strategy: i + 1, error: error instanceof Error ? error.message : String(error) }, "Enhanced JSON parsing strategy failed");
}
catch { }
if (jobId === 'context_curator_relevance_scoring') {
logger.info({ jobId, strategy: i + 1, error: error instanceof Error ? error.message : String(error), errorType: error instanceof Error ? error.constructor.name : typeof error }, "RELEVANCE SCORING - Strategy failed");
}
}
}
throw new ParsingError(`All enhanced JSON parsing strategies failed. Last error: ${lastError?.message}`, { rawResponse: rawResponse.substring(0, 500), strategiesAttempted: strategies.length }, lastError || undefined);
}
function detectCircularAndLimitDepth(obj, maxDepth, maxArrayLength, jobId) {
const seen = new WeakSet();
function processObject(current, depth = 0) {
if (depth > maxDepth) {
logger.warn({ jobId, depth, maxDepth }, "Maximum depth exceeded, truncating object");
return '[Max Depth Exceeded]';
}
if (typeof current === 'number' && !Number.isSafeInteger(current) && Math.abs(current) > Number.MAX_SAFE_INTEGER) {
return current.toString();
}
if (current && typeof current === 'object') {
if (seen.has(current)) {
logger.warn({ jobId, depth }, "Circular reference detected");
return '[Circular Reference]';
}
seen.add(current);
if (Array.isArray(current)) {
const currentArray = current;
if (currentArray.length > maxArrayLength) {
logger.warn({ jobId, arrayLength: currentArray.length, maxArrayLength }, "Array length exceeded, truncating");
return currentArray.slice(0, maxArrayLength).map((item) => processObject(item, depth + 1));
}
return currentArray.map((item) => processObject(item, depth + 1));
}
else {
const result = {};
const currentObj = current;
for (const key in currentObj) {
if (Object.prototype.hasOwnProperty.call(currentObj, key)) {
result[key] = processObject(currentObj[key], depth + 1);
}
}
return result;
}
}
return current;
}
return processObject(obj);
}
function extractJsonFromMixedContent(content, jobId) {
const trimmed = content.trim();
const codeBlockMatches = Array.from(trimmed.matchAll(/```(?:json)?\s*([\s\S]*?)```/gs));
if (codeBlockMatches.length > 0) {
for (const match of codeBlockMatches) {
if (match[1] && match[1].includes('"tasks"')) {
logger.debug({
jobId,
extractionMethod: "markdown_code_block_with_tasks",
blockIndex: codeBlockMatches.indexOf(match),
totalBlocks: codeBlockMatches.length
}, "Extracted JSON from Markdown code block containing tasks array");
return match[1].trim();
}
}
let largestBlock = '';
let largestIndex = -1;
for (let i = 0; i < codeBlockMatches.length; i++) {
const block = codeBlockMatches[i][1] || '';
if (block.length > largestBlock.length) {
largestBlock = block;
largestIndex = i;
}
}
if (largestBlock) {
logger.debug({
jobId,
extractionMethod: "markdown_code_block_largest",
blockIndex: largestIndex,
totalBlocks: codeBlockMatches.length,
blockSize: largestBlock.length
}, "Extracted largest JSON block from multiple Markdown code blocks");
return largestBlock.trim();
}
}
const singleLineCodeMatch = trimmed.match(/^`\s*(\{[\s\S]*\}|\[[\s\S]*\])\s*`$/s);
if (singleLineCodeMatch && singleLineCodeMatch[1]) {
logger.debug({ jobId, extractionMethod: "single_line_code" }, "Extracted JSON from single-line code block in mixed content");
return singleLineCodeMatch[1].trim();
}
const jsonStarts = [];
for (let i = 0; i < trimmed.length; i++) {
if (trimmed[i] === '{' || trimmed[i] === '[') {
jsonStarts.push({ char: trimmed[i], pos: i });
}
}
for (const start of jsonStarts) {
try {
const extracted = extractBalancedJson(trimmed, start.pos, start.char);
if (extracted) {
try {
JSON.parse(extracted);
logger.debug({ jobId, startPos: start.pos, extractedLength: extracted.length }, "Successfully extracted JSON from mixed content");
return extracted;
}
catch {
continue;
}
}
}
catch {
continue;
}
}
throw new Error("No valid JSON found in mixed content");
}
function extractBalancedJson(content, startPos, startChar) {
const endChar = startChar === '{' ? '}' : ']';
let depth = 0;
let inString = false;
let escaped = false;
for (let i = startPos; i < content.length; i++) {
const char = content[i];
if (escaped) {
escaped = false;
continue;
}
if (char === '\\' && inString) {
escaped = true;
continue;
}
if (char === '"' && !escaped) {
inString = !inString;
continue;
}
if (!inString) {
if (char === startChar) {
depth++;
}
else if (char === endChar) {
depth--;
if (depth === 0) {
return content.substring(startPos, i + 1);
}
}
}
}
return null;
}
export function normalizeJsonResponse(rawResponse, jobId) {
if (!rawResponse) {
return rawResponse;
}
const startTime = Date.now();
logger.debug({ jobId, rawResponseLength: rawResponse.length }, "Starting enhanced JSON normalization with 4-stage pipeline");
try {
const parsed = enhancedProgressiveJsonParsing(rawResponse, jobId);
const result = JSON.stringify(parsed);
const processingTime = Date.now() - startTime;
logger.debug({
jobId,
processingTime,
originalLength: rawResponse.length,
normalizedLength: result.length,
success: true
}, "Enhanced JSON normalization completed successfully");
try {
const optimizer = getPromptOptimizer();
optimizer.recordParsingResult(jobId || 'unknown', true);
}
catch (learningError) {
logger.debug({ learningError }, 'Failed to record parsing success for learning');
}
return result;
}
catch (error) {
const processingTime = Date.now() - startTime;
const errorMessage = error instanceof Error ? error.message : String(error);
logger.warn({
jobId,
processingTime,
error: errorMessage
}, "Enhanced progressive parsing failed, falling back to legacy normalization");
try {
const optimizer = getPromptOptimizer();
optimizer.recordParsingResult(jobId || 'unknown', false, errorMessage);
}
catch (learningError) {
logger.debug({ learningError }, 'Failed to record parsing failure for learning');
}
return legacyNormalizeJsonResponse(rawResponse, jobId);
}
}
function legacyNormalizeJsonResponse(rawResponse, jobId) {
logger.debug({ jobId, rawResponseLength: rawResponse.length }, "Starting legacy JSON normalization");
const codeBlockMatch = rawResponse.match(/```(?:json)?\s*([\s\S]*?)```/s);
if (codeBlockMatch && codeBlockMatch[1]) {
logger.debug({ jobId, extractionMethod: "markdown_code_block" }, "Extracted JSON from Markdown code block");
return codeBlockMatch[1].trim();
}
const singleLineCodeMatch = rawResponse.match(/^`\s*(\{[\s\S]*\}|\[[\s\S]*\])\s*`$/s);
if (singleLineCodeMatch && singleLineCodeMatch[1]) {
logger.debug({ jobId, extractionMethod: "single_line_code" }, "Extracted JSON from single-line code block");
return singleLineCodeMatch[1].trim();
}
const jsonContent = rawResponse.trim();
const firstBracket = jsonContent.indexOf('[');
const firstBrace = jsonContent.indexOf('{');
let start = -1;
if (firstBracket !== -1 && (firstBrace === -1 || firstBracket < firstBrace)) {
start = firstBracket;
}
else if (firstBrace !== -1) {
start = firstBrace;
}
if (start !== -1) {
const lastBracket = jsonContent.lastIndexOf(']');
const lastBrace = jsonContent.lastIndexOf('}');
let end = -1;
if (start === firstBracket) {
end = lastBracket;
}
else {