@probelabs/probe
Version:
Node.js wrapper for the probe code search tool
1,344 lines (1,147 loc) • 65.3 kB
JavaScript
/**
* Utility functions for cleaning and validating schema responses from AI models
* Supports JSON and Mermaid diagram validation
*/
import { createMessagePreview } from '../tools/common.js';
import { validate, fixText, extractMermaidBlocks } from '@probelabs/maid';
import Ajv from 'ajv';
/**
* Recursively apply additionalProperties: false to all object schemas
* This ensures strict validation at all nesting levels
* @param {Object} schema - JSON schema object
* @returns {Object} - Modified schema with additionalProperties enforced
*/
function enforceNoAdditionalProperties(schema) {
if (!schema || typeof schema !== 'object') {
return schema;
}
// Create a deep clone to avoid modifying the original
const cloned = JSON.parse(JSON.stringify(schema));
function applyRecursively(obj) {
if (!obj || typeof obj !== 'object') {
return;
}
// If this is an object type schema and doesn't have additionalProperties set
if (obj.type === 'object' && obj.additionalProperties === undefined) {
obj.additionalProperties = false;
}
// Recursively process properties
if (obj.properties && typeof obj.properties === 'object') {
Object.values(obj.properties).forEach(applyRecursively);
}
// Process items for arrays
if (obj.items) {
if (Array.isArray(obj.items)) {
obj.items.forEach(applyRecursively);
} else {
applyRecursively(obj.items);
}
}
// Process nested schemas in oneOf, anyOf, allOf
['oneOf', 'anyOf', 'allOf'].forEach(key => {
if (Array.isArray(obj[key])) {
obj[key].forEach(applyRecursively);
}
});
// Process definitions/defs
if (obj.definitions && typeof obj.definitions === 'object') {
Object.values(obj.definitions).forEach(applyRecursively);
}
if (obj.$defs && typeof obj.$defs === 'object') {
Object.values(obj.$defs).forEach(applyRecursively);
}
}
applyRecursively(cloned);
return cloned;
}
/**
* HTML entity decoder map for common entities that might appear in mermaid diagrams
*/
const HTML_ENTITY_MAP = {
'<': '<',
'>': '>',
'&': '&',
'"': '"',
''': "'",
''': "'", // Also handle XML/HTML5 apostrophe entity
' ': ' '
};
/**
* Decode HTML entities in text without requiring external dependencies
* @param {string} text - Text that may contain HTML entities
* @returns {string} - Text with HTML entities decoded
*/
export function decodeHtmlEntities(text) {
if (!text || typeof text !== 'string') {
return text;
}
let decoded = text;
for (const [entity, character] of Object.entries(HTML_ENTITY_MAP)) {
// Use global replacement to catch all instances
decoded = decoded.replace(new RegExp(entity, 'g'), character);
}
return decoded;
}
/**
* Clean AI response by extracting JSON content when response contains JSON
* Only processes responses that contain JSON structures { or [
* @param {string} response - Raw AI response
* @returns {string} - Cleaned response with JSON boundaries extracted if applicable
*/
export function cleanSchemaResponse(response) {
if (!response || typeof response !== 'string') {
return response;
}
const trimmed = response.trim();
// First, look for JSON after code block markers - similar to mermaid extraction
// Try with json language specifier
const jsonBlockMatch = trimmed.match(/```json\s*\n([\s\S]*?)\n```/);
if (jsonBlockMatch) {
return jsonBlockMatch[1].trim();
}
// Try any code block with JSON content
const anyBlockMatch = trimmed.match(/```\s*\n([{\[][\s\S]*?[}\]])\s*```/);
if (anyBlockMatch) {
return anyBlockMatch[1].trim();
}
// Legacy patterns for more specific matching
const codeBlockPatterns = [
/```json\s*\n?([{\[][\s\S]*?[}\]])\s*\n?```/,
/```\s*\n?([{\[][\s\S]*?[}\]])\s*\n?```/,
/`([{\[][\s\S]*?[}\]])`/
];
for (const pattern of codeBlockPatterns) {
const match = trimmed.match(pattern);
if (match) {
return match[1].trim();
}
}
// Look for code block start followed immediately by JSON
const codeBlockStartPattern = /```(?:json)?\s*\n?\s*([{\[])/;
const codeBlockMatch = trimmed.match(codeBlockStartPattern);
if (codeBlockMatch) {
const startIndex = codeBlockMatch.index + codeBlockMatch[0].length - 1; // Position of the bracket
// Find the matching closing bracket
const openChar = codeBlockMatch[1];
const closeChar = openChar === '{' ? '}' : ']';
let bracketCount = 1;
let endIndex = startIndex + 1;
while (endIndex < trimmed.length && bracketCount > 0) {
const char = trimmed[endIndex];
if (char === openChar) {
bracketCount++;
} else if (char === closeChar) {
bracketCount--;
}
endIndex++;
}
if (bracketCount === 0) {
return trimmed.substring(startIndex, endIndex);
}
}
// Fallback: Check if response is JSON after removing code block markers and whitespace
// First, remove common code block markers from top and bottom
let cleaned = trimmed;
// Remove opening code block markers (```json, ```, etc.)
cleaned = cleaned.replace(/^```(?:json)?\s*\n?/i, '');
// Remove closing code block markers
cleaned = cleaned.replace(/\n?```\s*$/, '');
// Trim whitespace and newlines
cleaned = cleaned.trim();
// Now check if first and last characters are valid JSON boundaries
const firstChar = cleaned[0];
const lastChar = cleaned[cleaned.length - 1];
const isJsonObject = firstChar === '{' && lastChar === '}';
const isJsonArray = firstChar === '[' && lastChar === ']';
if (isJsonObject || isJsonArray) {
return cleaned;
}
return response; // Return original if no extractable JSON found
}
/**
* Validate that the cleaned response is valid JSON if expected
* @param {string} response - Cleaned response
* @param {Object} options - Options for validation
* @param {boolean} [options.debug=false] - Enable debug logging
* @returns {Object} - {isValid: boolean, parsed?: Object, error?: string}
*/
export function validateJsonResponse(response, options = {}) {
const { debug = false, schema = null, strictSchema = true } = options;
if (debug) {
console.log(`[DEBUG] JSON validation: Starting validation for response (${response.length} chars)`);
const preview = createMessagePreview(response);
console.log(`[DEBUG] JSON validation: Preview: ${preview}`);
if (schema) {
console.log(`[DEBUG] JSON validation: Schema validation enabled`);
}
}
try {
const parseStart = Date.now();
const parsed = JSON.parse(response);
const parseTime = Date.now() - parseStart;
if (debug) {
console.log(`[DEBUG] JSON validation: Successfully parsed in ${parseTime}ms`);
console.log(`[DEBUG] JSON validation: Object type: ${typeof parsed}, keys: ${Object.keys(parsed || {}).length}`);
}
// If schema provided, validate against it
if (schema) {
const schemaStart = Date.now();
let schemaObj;
try {
// Parse schema if it's a string
schemaObj = typeof schema === 'string' ? JSON.parse(schema) : schema;
// Apply strict mode: enforce additionalProperties: false on all nested objects
// unless explicitly disabled via strictSchema: false option
if (strictSchema) {
schemaObj = enforceNoAdditionalProperties(schemaObj);
if (debug) {
console.log(`[DEBUG] JSON validation: Applied strict mode - additionalProperties: false enforced on all levels`);
}
}
} catch (schemaParseError) {
if (debug) {
console.log(`[DEBUG] JSON validation: Failed to parse schema: ${schemaParseError.message}`);
}
return {
isValid: false,
error: 'Invalid schema provided',
schemaError: schemaParseError.message
};
}
// Create AJV validator with strict mode
const ajv = new Ajv({
strict: false, // Don't fail on unknown schema keywords
allErrors: true, // Return all errors, not just the first
verbose: true // Include schema and data in errors
});
let validate;
try {
validate = ajv.compile(schemaObj);
} catch (compileError) {
if (debug) {
console.log(`[DEBUG] JSON validation: Schema compilation failed: ${compileError.message}`);
}
return {
isValid: false,
error: 'Schema compilation failed',
schemaError: compileError.message
};
}
const valid = validate(parsed);
const schemaTime = Date.now() - schemaStart;
if (debug) {
console.log(`[DEBUG] JSON validation: Schema validation completed in ${schemaTime}ms, valid: ${valid}`);
}
if (!valid) {
// Format schema errors for better readability and actionability
const formattedErrors = validate.errors.map(err => {
// Convert JSON Pointer path to dot notation for readability
// /user/profile/age -> user.profile.age
const path = err.instancePath
? err.instancePath.substring(1).replace(/\//g, '.')
: '<root>';
let message = '';
let suggestion = '';
// Create crisp, actionable error messages based on error type
if (err.keyword === 'additionalProperties') {
const extraField = err.params.additionalProperty;
message = `Extra field '${extraField}' is not allowed`;
suggestion = `Remove '${extraField}' or add it to the schema`;
} else if (err.keyword === 'required') {
const missingField = err.params.missingProperty;
message = `Missing required field '${missingField}'`;
suggestion = `Add '${missingField}' to this object`;
} else if (err.keyword === 'type') {
const expected = err.params.type;
const actual = typeof err.data;
const value = JSON.stringify(err.data);
message = `Wrong type: expected ${expected}, got ${actual} (value: ${value})`;
suggestion = `Change value to ${expected} type`;
} else if (err.keyword === 'enum') {
const allowed = err.params.allowedValues;
const value = JSON.stringify(err.data);
message = `Invalid value ${value}. Allowed: ${allowed.map(v => JSON.stringify(v)).join(', ')}`;
suggestion = `Use one of the allowed values`;
} else if (err.keyword === 'minimum' || err.keyword === 'maximum') {
const limit = err.params.limit;
const comparison = err.params.comparison;
message = `Value ${err.data} ${err.message} (${comparison} ${limit})`;
suggestion = `Adjust value to meet constraint`;
} else if (err.keyword === 'minLength' || err.keyword === 'maxLength') {
const limit = err.params.limit;
message = `String length ${err.message} (current: ${err.data.length}, ${err.keyword}: ${limit})`;
suggestion = `Adjust string length`;
} else if (err.keyword === 'pattern') {
message = `Value doesn't match required pattern: ${err.params.pattern}`;
suggestion = `Update value to match the pattern`;
} else {
// Fallback for other error types
message = err.message;
suggestion = '';
}
// Format: "path: message | suggestion"
const location = path ? `at '${path}'` : 'at root';
return suggestion
? `${location}: ${message} → ${suggestion}`
: `${location}: ${message}`;
});
const errorSummary = formattedErrors.join('\n ');
if (debug) {
console.log(`[DEBUG] JSON validation: Schema validation errors:\n ${errorSummary}`);
}
return {
isValid: false,
error: 'Schema validation failed',
schemaErrors: validate.errors,
formattedErrors: formattedErrors,
errorSummary: `Schema validation failed:\n ${errorSummary}`,
parsed: parsed // Still return parsed data for debugging
};
}
if (debug) {
console.log(`[DEBUG] JSON validation: Schema validation passed`);
}
}
return { isValid: true, parsed };
} catch (error) {
// Extract error position from error message if available
// Old format: "Unexpected token < in JSON at position 0"
// New format: "Unexpected token '<', \"...\" is not valid JSON"
const positionMatch = error.message.match(/position (\d+)/);
let errorPosition = positionMatch ? parseInt(positionMatch[1], 10) : null;
// If position not found in old format, try to extract from new format
if (errorPosition === null) {
// Try to find the problematic token in the new error format
const tokenMatch = error.message.match(/Unexpected token '(.)', /);
if (tokenMatch && tokenMatch[1]) {
const problematicToken = tokenMatch[1];
// Find first occurrence of this token in the response
errorPosition = response.indexOf(problematicToken);
}
}
// Create enhanced error message with context snippet
let enhancedError = error.message;
let errorContext = null;
if (errorPosition !== null && errorPosition >= 0 && response && response.length > 0) {
// Calculate context window (50 chars before and after)
const contextRadius = 50;
const startPos = Math.max(0, errorPosition - contextRadius);
const endPos = Math.min(response.length, errorPosition + contextRadius);
// Extract context snippet
const beforeError = response.substring(startPos, errorPosition);
const atError = response[errorPosition] || '';
const afterError = response.substring(errorPosition + 1, endPos);
// Build error context with visual pointer
const snippet = beforeError + atError + afterError;
const pointerOffset = beforeError.length;
const pointer = ' '.repeat(pointerOffset) + '^';
errorContext = {
position: errorPosition,
snippet: snippet,
pointer: pointer,
beforeError: beforeError,
atError: atError,
afterError: afterError
};
// Create human-readable error context for display
enhancedError = `${error.message}
Error location (position ${errorPosition}):
${snippet}
${pointer} here`;
}
if (debug) {
console.log(`[DEBUG] JSON validation: Parse failed with error: ${error.message}`);
console.log(`[DEBUG] JSON validation: Error at position: ${errorPosition !== null ? errorPosition : 'unknown'}`);
if (errorContext) {
console.log(`[DEBUG] JSON validation: Error context:\n${errorContext.snippet}\n${errorContext.pointer}`);
}
// Try to identify common JSON issues
if (error.message.includes('Unexpected token')) {
console.log(`[DEBUG] JSON validation: Likely syntax error - unexpected character`);
} else if (error.message.includes('Unexpected end')) {
console.log(`[DEBUG] JSON validation: Likely incomplete JSON - missing closing brackets`);
} else if (error.message.includes('property name')) {
console.log(`[DEBUG] JSON validation: Likely unquoted property names`);
}
}
return {
isValid: false,
error: error.message,
enhancedError: enhancedError,
errorContext: errorContext
};
}
}
/**
* Validate that the cleaned response is valid XML if expected
* @param {string} response - Cleaned response
* @returns {Object} - {isValid: boolean, error?: string}
*/
export function validateXmlResponse(response) {
// Basic XML validation - check for matching opening/closing tags
const xmlPattern = /<\/?[\w\s="'.-]+>/g;
const tags = response.match(xmlPattern);
if (!tags) {
return { isValid: false, error: 'No XML tags found' };
}
// Simple check for basic XML structure
if (response.includes('<') && response.includes('>')) {
return { isValid: true };
}
return { isValid: false, error: 'Invalid XML structure' };
}
/**
* Process schema response with cleaning and optional validation
* @param {string} response - Raw AI response
* @param {string} schema - Original schema for context
* @param {Object} options - Processing options
* @returns {Object} - {cleaned: string, validation?: Object}
*/
export function processSchemaResponse(response, schema, options = {}) {
const { validateJson = false, validateXml = false, debug = false } = options;
if (debug) {
console.log(`[DEBUG] Schema processing: Starting with response length ${response.length}`);
console.log(`[DEBUG] Schema processing: Schema type detection...`);
if (isJsonSchema(schema)) {
console.log(`[DEBUG] Schema processing: Detected JSON schema`);
} else {
console.log(`[DEBUG] Schema processing: Non-JSON schema detected`);
}
}
// Clean the response
const cleanStart = Date.now();
const cleaned = cleanSchemaResponse(response);
const cleanTime = Date.now() - cleanStart;
const result = { cleaned };
if (debug) {
console.log(`[DEBUG] Schema processing: Cleaning completed in ${cleanTime}ms`);
result.debug = {
originalLength: response.length,
cleanedLength: cleaned.length,
wasModified: response !== cleaned,
cleaningTimeMs: cleanTime,
removedContent: response !== cleaned ? {
before: response.substring(0, 100) + (response.length > 100 ? '...' : ''),
after: cleaned.substring(0, 100) + (cleaned.length > 100 ? '...' : '')
} : null
};
if (response !== cleaned) {
console.log(`[DEBUG] Schema processing: Response was modified during cleaning`);
console.log(`[DEBUG] Schema processing: Original length: ${response.length}, cleaned length: ${cleaned.length}`);
} else {
console.log(`[DEBUG] Schema processing: Response unchanged during cleaning`);
}
}
// Optional validation
if (validateJson) {
if (debug) {
console.log(`[DEBUG] Schema processing: Running JSON validation...`);
}
result.jsonValidation = validateJsonResponse(cleaned, { debug });
}
if (validateXml) {
if (debug) {
console.log(`[DEBUG] Schema processing: Running XML validation...`);
}
result.xmlValidation = validateXmlResponse(cleaned);
}
return result;
}
/**
* Detect if a schema expects JSON output
* @param {string} schema - The schema string
* @returns {boolean} - True if schema appears to be JSON-based
*/
export function isJsonSchema(schema) {
if (!schema || typeof schema !== 'string') {
return false;
}
const trimmedSchema = schema.trim().toLowerCase();
// Check for JSON-like patterns
const jsonIndicators = [
trimmedSchema.startsWith('{') && trimmedSchema.includes('}'),
trimmedSchema.startsWith('[') && trimmedSchema.includes(']'),
trimmedSchema.includes('"type"') && trimmedSchema.includes('object'),
trimmedSchema.includes('"properties"'),
trimmedSchema.includes('json'),
trimmedSchema.includes('application/json')
];
// Return true if any JSON indicators are found
return jsonIndicators.some(indicator => indicator);
}
/**
* Detect if a JSON response is actually a JSON schema definition instead of data
* @param {string} jsonString - The JSON string to check
* @param {Object} options - Options
* @param {boolean} [options.debug=false] - Enable debug logging
* @returns {boolean} - True if this appears to be a schema definition
*/
export function isJsonSchemaDefinition(jsonString, options = {}) {
const { debug = false } = options;
if (!jsonString || typeof jsonString !== 'string') {
if (debug) {
console.log(`[DEBUG] Schema definition check: Invalid input (${typeof jsonString})`);
}
return false;
}
try {
const parsed = JSON.parse(jsonString);
if (debug) {
console.log(`[DEBUG] Schema definition check: JSON parsed successfully, checking indicators...`);
}
// Check for common JSON schema properties
const schemaIndicators = [
parsed.$schema,
parsed.$id,
parsed.title && parsed.description,
parsed.type === 'object' && parsed.properties,
parsed.type === 'array' && parsed.items,
parsed.required && Array.isArray(parsed.required),
parsed.definitions,
parsed.additionalProperties !== undefined,
parsed.patternProperties,
parsed.anyOf || parsed.oneOf || parsed.allOf
];
const indicatorCount = schemaIndicators.filter(Boolean).length;
const isSchemaDefinition = indicatorCount >= 2;
if (debug) {
console.log(`[DEBUG] Schema definition check: Found ${indicatorCount} schema indicators`);
console.log(`[DEBUG] Schema definition check: Indicators found: ${schemaIndicators.map((indicator, i) => {
const names = ['$schema', '$id', 'title+description', 'object+properties', 'array+items', 'required', 'definitions', 'additionalProperties', 'patternProperties', 'anyOf/oneOf/allOf'];
return indicator ? names[i] : null;
}).filter(Boolean).join(', ')}`);
console.log(`[DEBUG] Schema definition check: Is schema definition: ${isSchemaDefinition}`);
}
return isSchemaDefinition;
} catch (error) {
if (debug) {
console.log(`[DEBUG] Schema definition check: JSON parse failed: ${error.message}`);
}
return false;
}
}
/**
* Create a correction prompt for invalid JSON
* @param {string} invalidResponse - The invalid JSON response
* @param {string} schema - The original schema
* @param {string|Object} errorOrValidation - The JSON parsing error string or validation result object
* @param {number} [retryCount=0] - The current retry attempt (0-based)
* @returns {string} - Correction prompt for the AI
*/
export function createJsonCorrectionPrompt(invalidResponse, schema, errorOrValidation, retryCount = 0) {
// Extract error information from validation result or string
let errorMessage;
let enhancedError;
if (typeof errorOrValidation === 'object' && errorOrValidation !== null) {
// It's a validation result object
errorMessage = errorOrValidation.error;
enhancedError = errorOrValidation.enhancedError || errorMessage;
} else {
// It's a plain error string (backwards compatibility)
errorMessage = errorOrValidation;
enhancedError = errorMessage;
}
// Create increasingly stronger prompts based on retry attempt
const strengthLevels = [
{
prefix: "CRITICAL JSON ERROR:",
instruction: "You MUST fix this and return ONLY valid JSON.",
emphasis: "Return ONLY the corrected JSON, with no additional text or markdown formatting."
},
{
prefix: "URGENT - JSON PARSING FAILED:",
instruction: "This is your second chance. Return ONLY valid JSON that can be parsed by JSON.parse().",
emphasis: "ABSOLUTELY NO explanatory text, greetings, or formatting. ONLY JSON."
},
{
prefix: "FINAL ATTEMPT - CRITICAL JSON ERROR:",
instruction: "This is the final retry. You MUST return ONLY raw JSON without any other content.",
emphasis: "EXAMPLE: {\"key\": \"value\"} NOT: ```json{\"key\": \"value\"}``` NOT: Here is the JSON: {\"key\": \"value\"}"
}
];
const level = Math.min(retryCount, strengthLevels.length - 1);
const currentLevel = strengthLevels[level];
let prompt = `${currentLevel.prefix} Your previous response is not valid JSON and cannot be parsed. Here's what you returned:
${invalidResponse.substring(0, 500)}${invalidResponse.length > 500 ? '...' : ''}
Error: ${enhancedError}
${currentLevel.instruction}
Schema to match:
${schema}
${currentLevel.emphasis}`;
return prompt;
}
/**
* Create a correction prompt specifically for when AI returns schema definition instead of data
* @param {string} schemaDefinition - The JSON schema definition that was incorrectly returned
* @param {string} originalSchema - The original schema that should be followed
* @param {number} [retryCount=0] - The current retry attempt (0-based)
* @returns {string} - Correction prompt for the AI
*/
export function createSchemaDefinitionCorrectionPrompt(schemaDefinition, originalSchema, retryCount = 0) {
const strengthLevels = [
{
prefix: "CRITICAL MISUNDERSTANDING:",
instruction: "You returned a JSON schema definition instead of data. You must return ACTUAL DATA that follows the schema.",
example: "Instead of: {\"type\": \"object\", \"properties\": {...}}\nReturn: {\"actualData\": \"value\", \"realField\": 123}"
},
{
prefix: "URGENT - WRONG RESPONSE TYPE:",
instruction: "You are returning the SCHEMA DEFINITION itself. I need DATA that MATCHES the schema, not the schema structure.",
example: "Schema defines structure - you provide content that fits that structure!"
},
{
prefix: "FINAL ATTEMPT - SCHEMA VS DATA CONFUSION:",
instruction: "STOP returning schema definitions! Return REAL DATA that conforms to the schema structure.",
example: "If schema has 'properties.name', return {\"name\": \"actual_value\"} NOT {\"properties\": {\"name\": {...}}}"
}
];
const level = Math.min(retryCount, strengthLevels.length - 1);
const currentLevel = strengthLevels[level];
let prompt = `${currentLevel.prefix} You returned a JSON schema definition when I asked for data that matches a schema.
What you returned (WRONG - this is a schema definition):
${schemaDefinition.substring(0, 300)}${schemaDefinition.length > 300 ? '...' : ''}
What I need: ACTUAL DATA that conforms to this schema structure:
${originalSchema}
${currentLevel.instruction}
${currentLevel.example}
Return ONLY the JSON data object/array that follows the schema structure. NO schema definitions, NO explanations, NO markdown formatting.`;
return prompt;
}
/**
* Detect if a schema expects Mermaid diagram output
* @param {string} schema - The schema string
* @returns {boolean} - True if schema appears to expect Mermaid diagrams
*/
export function isMermaidSchema(schema) {
if (!schema || typeof schema !== 'string') {
return false;
}
const trimmedSchema = schema.trim().toLowerCase();
// Check for Mermaid-related keywords
const mermaidIndicators = [
trimmedSchema.includes('mermaid'),
trimmedSchema.includes('diagram'),
trimmedSchema.includes('flowchart'),
trimmedSchema.includes('sequence'),
trimmedSchema.includes('gantt'),
trimmedSchema.includes('pie chart'),
trimmedSchema.includes('state diagram'),
trimmedSchema.includes('class diagram'),
trimmedSchema.includes('entity relationship'),
trimmedSchema.includes('user journey'),
trimmedSchema.includes('git graph'),
trimmedSchema.includes('requirement diagram'),
trimmedSchema.includes('c4 context')
];
return mermaidIndicators.some(indicator => indicator);
}
/**
* Extract Mermaid diagrams from markdown code blocks with position tracking
* @param {string} response - Response that may contain markdown with mermaid blocks
* @returns {Object} - {diagrams: Array<{content: string, fullMatch: string, startIndex: number, endIndex: number}>, cleanedResponse: string}
*/
export function extractMermaidFromMarkdown(response) {
if (!response || typeof response !== 'string') {
return { diagrams: [], cleanedResponse: response };
}
// Find all mermaid code blocks with enhanced regex to capture more variations
// This regex captures optional attributes on same line as ```mermaid, and all diagram content
const mermaidBlockRegex = /```mermaid([^\n]*)\n([\s\S]*?)```/gi;
const diagrams = [];
let match;
while ((match = mermaidBlockRegex.exec(response)) !== null) {
const attributes = match[1] ? match[1].trim() : '';
// Don't trim the content - maid 0.0.6 requires trailing newlines for sequence diagrams
const fullContent = match[2];
// If attributes exist, they were captured separately, so fullContent is just the diagram
// If no attributes, the first line of fullContent might be diagram type or actual content
diagrams.push({
content: fullContent,
fullMatch: match[0],
startIndex: match.index,
endIndex: match.index + match[0].length,
attributes: attributes
});
}
// Return cleaned response (original for now, could be modified if needed)
return { diagrams, cleanedResponse: response };
}
/**
* Replace mermaid diagrams in original markdown with corrected versions
* @param {string} originalResponse - Original response with markdown
* @param {Array} correctedDiagrams - Array of corrected diagram objects
* @returns {string} - Response with corrected diagrams in original format
*/
export function replaceMermaidDiagramsInMarkdown(originalResponse, correctedDiagrams) {
if (!originalResponse || typeof originalResponse !== 'string') {
return originalResponse;
}
if (!correctedDiagrams || correctedDiagrams.length === 0) {
return originalResponse;
}
let modifiedResponse = originalResponse;
// Sort diagrams by start index in reverse order to preserve indices during replacement
const sortedDiagrams = [...correctedDiagrams].sort((a, b) => b.startIndex - a.startIndex);
for (const diagram of sortedDiagrams) {
// Reconstruct the code block with original attributes if they existed
const attributesStr = diagram.attributes ? ` ${diagram.attributes}` : '';
const newCodeBlock = `\`\`\`mermaid${attributesStr}\n${diagram.content}\n\`\`\``;
// Replace the original code block
modifiedResponse = modifiedResponse.slice(0, diagram.startIndex) +
newCodeBlock +
modifiedResponse.slice(diagram.endIndex);
}
return modifiedResponse;
}
/**
* Validate a single Mermaid diagram
* @param {string} diagram - Mermaid diagram code
* @returns {Promise<Object>} - {isValid: boolean, diagramType?: string, error?: string, detailedError?: string}
*/
export async function validateMermaidDiagram(diagram) {
if (!diagram || typeof diagram !== 'string') {
return { isValid: false, error: 'Empty or invalid diagram input' };
}
try {
// Don't trim the diagram - maid 0.0.6 requires trailing newlines for sequence diagrams
// and handles leading/trailing whitespace correctly
// Check for markdown code block markers
if (diagram.includes('```')) {
return {
isValid: false,
error: 'Diagram contains markdown code block markers',
detailedError: 'Mermaid diagram should not contain ``` markers when extracted from markdown'
};
}
// Use maid to validate the diagram
const result = validate(diagram);
// Maid returns { type: string, errors: array }
// Only count actual errors (severity: 'error'), not warnings
const actualErrors = (result.errors || []).filter(err => err.severity === 'error');
// Valid if no actual errors (warnings are OK)
if (actualErrors.length === 0) {
return {
isValid: true,
diagramType: result.type || 'unknown'
};
} else {
// Format maid errors into a readable error message
const errorMessages = actualErrors.map(err => {
const location = err.line ? `line ${err.line}${err.column ? `:${err.column}` : ''}` : '';
return location ? `${location} - ${err.message}` : err.message;
});
return {
isValid: false,
diagramType: result.type || 'unknown',
error: errorMessages[0] || 'Validation failed',
detailedError: errorMessages.join('\n'),
errors: actualErrors // Include only actual errors for AI fixing
};
}
} catch (error) {
return {
isValid: false,
error: error.message || 'Unknown mermaid parsing error',
detailedError: error.stack || error.toString()
};
}
}
/**
* Validate all Mermaid diagrams in a response
* @param {string} response - Response that may contain mermaid diagrams
* @returns {Promise<Object>} - {isValid: boolean, diagrams: Array, errors?: Array}
*/
export async function validateMermaidResponse(response) {
const { diagrams } = extractMermaidFromMarkdown(response);
if (diagrams.length === 0) {
return { isValid: false, diagrams: [], errors: ['No mermaid diagrams found in response'] };
}
const results = [];
const errors = [];
for (let i = 0; i < diagrams.length; i++) {
const diagramObj = diagrams[i];
const validation = await validateMermaidDiagram(diagramObj.content);
results.push({
...diagramObj,
...validation
});
if (!validation.isValid) {
errors.push(`Diagram ${i + 1}: ${validation.error}`);
}
}
const isValid = results.every(result => result.isValid);
return {
isValid,
diagrams: results,
errors: errors.length > 0 ? errors : undefined
};
}
/**
* Create a correction prompt for invalid Mermaid diagrams
* @param {string} invalidResponse - The response with invalid Mermaid
* @param {string} schema - The original schema
* @param {Array} errors - Array of validation errors
* @param {Array} diagrams - Array of diagram validation results
* @returns {string} - Correction prompt for the AI
*/
export function createMermaidCorrectionPrompt(invalidResponse, schema, errors, diagrams) {
let prompt = `Your previous response contains invalid Mermaid diagrams that cannot be parsed. Here's what you returned:
${invalidResponse}
Validation Errors:`;
errors.forEach((error, index) => {
prompt += `\n${index + 1}. ${error}`;
});
if (diagrams && diagrams.length > 0) {
prompt += `\n\nDiagram Details:`;
diagrams.forEach((diagramResult, index) => {
if (!diagramResult.isValid) {
prompt += `\n\nDiagram ${index + 1}:`;
const diagramContent = diagramResult.content || diagramResult.diagram || '';
prompt += `\n- Content: ${diagramContent.substring(0, 100)}${diagramContent.length > 100 ? '...' : ''}`;
prompt += `\n- Error: ${diagramResult.error}`;
if (diagramResult.detailedError && diagramResult.detailedError !== diagramResult.error) {
prompt += `\n- Details: ${diagramResult.detailedError}`;
}
}
});
}
prompt += `\n\nPlease correct your response to include valid Mermaid diagrams that match this schema:
${schema}
Ensure all Mermaid diagrams are properly formatted within \`\`\`mermaid code blocks and follow correct Mermaid syntax.`;
return prompt;
}
// Counter to ensure unique session IDs even when created in the same millisecond
let sessionIdCounter = 0;
/**
* Specialized JSON fixing agent
* Uses a separate ProbeAgent instance optimized for JSON syntax correction
*/
export class JsonFixingAgent {
constructor(options = {}) {
// Import ProbeAgent dynamically to avoid circular dependencies
this.ProbeAgent = null;
this.options = {
sessionId: options.sessionId || `json-fixer-${Date.now()}-${sessionIdCounter++}`,
path: options.path || process.cwd(),
provider: options.provider,
model: options.model,
debug: options.debug,
tracer: options.tracer,
// Set to false since we're only fixing JSON syntax, not implementing code
allowEdit: false
};
}
/**
* Get the specialized prompt for JSON fixing
*/
getJsonFixingPrompt() {
return `You are a world-class JSON syntax correction specialist. Your expertise lies in analyzing and fixing JSON syntax errors while preserving the original data structure and intent.
CORE RESPONSIBILITIES:
- Analyze JSON for syntax errors and structural issues
- Fix syntax errors while maintaining the original data's semantic meaning
- Ensure JSON follows proper RFC 8259 specification
- Handle all JSON structures: objects, arrays, primitives, nested structures
JSON SYNTAX RULES:
1. **Property names**: Must be enclosed in double quotes
2. **String values**: Must use double quotes (not single quotes)
3. **Numbers**: Can be integers or decimals, no quotes needed
4. **Booleans**: true or false (lowercase, no quotes)
5. **Null**: null (lowercase, no quotes)
6. **Arrays**: Comma-separated values in square brackets [...]
7. **Objects**: Comma-separated key-value pairs in curly braces {...}
8. **No trailing commas**: Last item in array/object must not have a trailing comma
9. **Escape sequences**: Special characters must be escaped (\\n, \\t, \\", \\\\, etc.)
COMMON ERRORS TO FIX:
1. **Unquoted property names**: {name: "value"} → {"name": "value"}
2. **Single quotes**: {'key': 'value'} → {"key": "value"}
3. **Trailing commas**: {"a": 1,} → {"a": 1}
4. **Unquoted strings**: {key: value} → {"key": "value"}
5. **Missing commas**: {"a": 1 "b": 2} → {"a": 1, "b": 2}
6. **Extra commas**: {"a": 1,, "b": 2} → {"a": 1, "b": 2}
7. **Unclosed brackets/braces**: {"key": "value" → {"key": "value"}
8. **Invalid escape sequences**: Fix or remove
9. **Comments**: Remove // or /* */ comments (not allowed in JSON)
10. **Undefined values**: Replace undefined with null
FIXING METHODOLOGY:
1. **Identify the error location** from the error message
2. **Analyze the context** around the error
3. **Apply the appropriate fix** based on JSON syntax rules
4. **Preserve data intent** - never change the meaning of the data
5. **Validate the result** - ensure it's parseable JSON
CRITICAL RULES:
- ALWAYS output only the corrected JSON
- NEVER add explanations, comments, or additional text
- NEVER wrap in markdown code blocks (no \`\`\`json)
- PRESERVE the original data structure and values
- FIX only syntax errors, don't modify the data itself
- ENSURE the output is valid, parseable JSON
When presented with broken JSON, analyze it thoroughly and provide the corrected version that maintains the original intent while fixing all syntax issues.`;
}
/**
* Initialize the ProbeAgent if not already done
*/
async initializeAgent() {
if (!this.ProbeAgent) {
// Dynamic import to avoid circular dependency
const { ProbeAgent } = await import('./ProbeAgent.js');
this.ProbeAgent = ProbeAgent;
}
if (!this.agent) {
this.agent = new this.ProbeAgent({
sessionId: this.options.sessionId,
customPrompt: this.getJsonFixingPrompt(),
path: this.options.path,
provider: this.options.provider,
model: this.options.model,
debug: this.options.debug,
tracer: this.options.tracer,
allowEdit: this.options.allowEdit,
maxIterations: 5, // Allow multiple iterations for JSON fixing
disableJsonValidation: true // CRITICAL: Disable JSON validation in nested agent to prevent infinite recursion
});
}
return this.agent;
}
/**
* Fix invalid JSON using the specialized agent
* @param {string} invalidJson - The broken JSON string
* @param {string} schema - The original schema for context
* @param {Object} validationResult - Validation result with error details
* @param {number} attemptNumber - Current attempt number (for logging)
* @returns {Promise<string>} - The corrected JSON
*/
async fixJson(invalidJson, schema, validationResult, attemptNumber = 1) {
await this.initializeAgent();
// Build error context from validation result
let errorContext = validationResult.error;
if (validationResult.enhancedError) {
errorContext = validationResult.enhancedError;
}
// Add schema validation errors if present
let schemaErrorDetails = '';
if (validationResult.errorSummary) {
schemaErrorDetails = `\n\nSchema Validation Errors:\n${validationResult.errorSummary}`;
} else if (validationResult.schemaErrors && validationResult.schemaErrors.length > 0) {
const errors = validationResult.schemaErrors.map(err => {
const path = err.instancePath || '(root)';
return ` ${path}: ${err.message}`;
}).join('\n');
schemaErrorDetails = `\n\nSchema Validation Errors:\n${errors}`;
}
const prompt = `Fix the following invalid JSON.
Error: ${errorContext}${schemaErrorDetails}
Invalid JSON:
${invalidJson}
Expected schema structure:
${schema}
${schemaErrorDetails ? 'CRITICAL: Pay special attention to the schema validation errors above. The JSON may be syntactically valid but does not conform to the required schema. Make sure to:\n- Include all required fields\n- Use correct data types\n- Remove any additional properties not defined in the schema (if additionalProperties is false)\n- Ensure all values match their schema constraints\n\n' : ''}Provide only the corrected JSON without any markdown formatting or explanations.`;
try {
if (this.options.debug) {
console.log(`[DEBUG] JSON fixing: Attempt ${attemptNumber} to fix JSON with separate agent`);
}
// Call the specialized JSON fixing agent
const result = await this.agent.answer(prompt, []);
// Clean the result (in case AI added markdown despite instructions)
const cleaned = cleanSchemaResponse(result);
if (this.options.debug) {
console.log(`[DEBUG] JSON fixing: Agent returned ${cleaned.length} chars`);
}
return cleaned;
} catch (error) {
if (this.options.debug) {
console.error(`[DEBUG] JSON fixing failed: ${error.message}`);
}
throw new Error(`Failed to fix JSON: ${error.message}`);
}
}
/**
* Get token usage information from the specialized agent
* @returns {Object} - Token usage statistics
*/
getTokenUsage() {
return this.agent ? this.agent.getTokenUsage() : null;
}
/**
* Cancel any ongoing operations
*/
cancel() {
if (this.agent) {
this.agent.cancel();
}
}
}
/**
* Use maid to attempt auto-fixing of mermaid diagrams
* @param {string} diagramContent - The diagram content to fix
* @param {Object} options - Fix options
* @returns {Object} - {fixed: string, wasFixed: boolean, errors: Array}
*/
export async function tryMaidAutoFix(diagramContent, options = {}) {
const { debug = false } = options;
try {
// Always use 'all' level fixes (most aggressive)
if (debug) {
console.log(`[DEBUG] Mermaid maid: Trying 'all' level auto-fixes...`);
}
const result = fixText(diagramContent, { level: 'all' });
const validation = validate(result.fixed);
// Maid validation returns { type, errors }
// Valid if errors array is empty
if (validation.errors && validation.errors.length === 0) {
if (debug) {
console.log(`[DEBUG] Mermaid maid: 'All' level fixes succeeded`);
}
return {
fixed: result.fixed,
wasFixed: result.fixed !== diagramContent,
errors: [],
fixLevel: 'all'
};
}
// Maid couldn't fix it completely, return the best attempt with remaining errors
if (debug) {
console.log(`[DEBUG] Mermaid maid: Auto-fixes couldn't resolve all issues, ${validation.errors?.length || 0} errors remain`);
}
return {
fixed: result.fixed,
wasFixed: result.fixed !== diagramContent,
errors: validation.errors || [], // Pass maid's structured errors for AI fixing
fixLevel: 'all'
};
} catch (error) {
if (debug) {
console.error(`[DEBUG] Mermaid maid: Auto-fix error: ${error.message}`);
}
return {
fixed: diagramContent,
wasFixed: false,
errors: [{ message: error.message }],
fixLevel: null
};
}
}
/**
* Specialized Mermaid diagram fixing agent
* Uses a separate ProbeAgent instance optimized for Mermaid syntax correction
*/
export class MermaidFixingAgent {
constructor(options = {}) {
// Import ProbeAgent dynamically to avoid circular dependencies
this.ProbeAgent = null;
this.options = {
sessionId: options.sessionId || `mermaid-fixer-${Date.now()}-${sessionIdCounter++}`,
path: options.path || process.cwd(),
provider: options.provider,
model: options.model,
debug: options.debug,
tracer: options.tracer,
// Set to false since we're only fixing syntax, not implementing code
allowEdit: false
};
}
/**
* Get the specialized prompt for mermaid diagram fixing
*/
getMermaidFixingPrompt() {
return `You are a world-class Mermaid diagram syntax correction specialist. Your expertise lies in analyzing and fixing Mermaid diagram syntax errors while preserving the original intent, structure, and semantic meaning.
CORE RESPONSIBILITIES:
- Analyze Mermaid diagrams for syntax errors and structural issues
- Fix syntax errors while maintaining the original diagram's logical flow
- Ensure diagrams follow proper Mermaid syntax rules and best practices
- Handle all diagram types: flowchart, sequence, gantt, pie, state, class, er, journey, gitgraph, requirement, c4
MERMAID DIAGRAM TYPES & SYNTAX RULES:
1. **Flowchart/Graph**: Start with 'graph' or 'flowchart', use proper node definitions and arrows
2. **Sequence**: Start with 'sequenceDiagram', use proper participant and message syntax
3. **Gantt**: Start with 'gantt', use proper date formats and task definitions
4. **State**: Start with 'stateDiagram-v2', use proper state transitions
5. **Class**: Start with 'classDiagram', use proper class and relationship syntax
6. **Entity-Relationship**: Start with 'erDiagram', use proper entity and relationship syntax
FIXING METHODOLOGY:
1. **Identify diagram type** from the first line or content analysis
2. **Validate syntax** against Mermaid specification for that diagram type
3. **Fix errors systematically**:
- Unclosed brackets, parentheses, or quotes
- Missing or incorrect arrows and connectors
- Invalid node IDs or labels
- Incorrect formatting for diagram-specific elements
- **Parentheses in node labels or subgraph names**: Wrap text containing parentheses in double quotes to prevent GitHub parsing errors
- Single quotes in node labels (GitHub's parser expects double quotes)
- **Edge/Arrow labels with spaces**: MUST use pipe syntax like "A --|Label Text|--> B" or "A -- |Label Text| --> B". NEVER use double quotes like "A -- \\"Label\\" --> B" which is INVALID
4. **Preserve semantic meaning** - never change the intended flow or relationships
5. **Use proper escaping** for special characters and spaces
6. **Ensure consistency** in naming conventions and formatting
CRITICAL RULES:
- ALWAYS output only the corrected Mermaid code within a \`\`\`mermaid code block
- NEVER add explanations, comments, or additional text outside the code block
- PRESERVE the original diagram's intended meaning and flow
- FIX syntax errors without changing the logical structure
- ENSURE the output is valid, parseable Mermaid syntax
- WRAP text containing parentheses in double quotes for GitHub compatibility
When presented with a broken Mermaid diagram, analyze it thoroughly and provide the corrected version that maintains the original intent while fixing all syntax issues.`;
}
/**
* Initialize the ProbeAgent if not already done
*/
async initializeAgent() {
if (!this.ProbeAgent) {
// Dynamic import to avoid circular dependency
const { ProbeAgent } = await import('./ProbeAgent.js');
this.ProbeAgent = ProbeAgent;
}
if (!this.agent) {
this.agent = new this.ProbeAgent({
sessionId: this.options.sessionId,
customPrompt: this.getMermaidFixingPrompt(),
path: this.options.path,
provider: this.options.provider,
model: this.options.model,
debug: this.options.debug,
tracer: this.options.tracer,
allowEdit: this.options.allowEdit,
maxIterations: 10, // Allow more iterations for mermaid fixing to handle complex diagrams
disableMermaidValidation: true // CRITICAL: Disable mermaid validation in nested agent to prevent infinite recursion
});
}
return this.agent;
}
/**
* Fix a single Mermaid diagram using the specialized agent
* @param {string} diagramContent - The broken Mermaid diagram content
* @param {Array} originalErrors - Array of errors detected in the original diagram
* @param {Object} diagramInfo - Additional context about the diagram (type, position, etc.)
* @returns {Promise<string>} - The corrected Mermaid diagram
*/
async fixMermaidDiagram(diagramContent, originalErrors = [], diagramInfo = {}) {
// First, try auto-fixing HTML entities without AI
const decodedContent = decodeHtmlEntities(diagramContent);
// If HTML entity decoding changed the content, validate it first
if (decodedContent !== diagramContent) {
try {
const quickValidation = await validateMermaidDiagram(decodedContent);
if (quickValidation.isValid) {
// HTML entity decoding fixed the issue, no need for AI
if (this.options.debug) {
console.error('[DEBUG] Fixed Mermaid diagram with HTML entity decoding only');
}
return decodedContent;
}
} catch (error) {
// If validation fails, continue with AI fixing using decoded content
if (this.options.debug) {
console.error('[DEBUG] HTML entity decoding didn\'t fully fix diagram, continuing with AI fixing');
}
}
}
await this.initializeAgent();
// Format error context - handle both simple strings and maid's structured errors
let errorContext = '';
if (originalErrors.length > 0) {
const formattedErrors = originalErrors.map(err => {
// Check if this is a maid structured error object
if (typeof err === 'object' && err.message) {
const location = err.line ? `line ${err.line}${err.column ? `:${err.column}` : ''}` : '';
const hint = err.hint ? `\n Hint: ${err.hint}` : '';
return location ? `- ${location}: ${err.message}${hint}` : `- ${err.message}${hint}`;
}
// Handle simple string errors
return `- ${err}`;