llmverify
Version:
AI Output Verification Toolkit — Local-first LLM safety, hallucination detection, PII redaction, prompt injection defense, and runtime monitoring. Zero telemetry. OWASP LLM Top 10 aligned.
214 lines • 20.1 kB
JavaScript
;
/**
* JSON Detection and Repair Module
*
* Detects JSON in text and attempts to repair malformed JSON.
*
* @module engines/classification/json-repair
* @author Haiec
* @license MIT
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.detectAndRepairJson = detectAndRepairJson;
/**
* Extracts JSON candidate from text.
*/
function extractJsonCandidate(text) {
// Try to find JSON in code blocks first
const codeBlockMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/);
if (codeBlockMatch) {
const content = codeBlockMatch[1].trim();
if (content.startsWith('{') || content.startsWith('[')) {
return content;
}
}
// Look for JSON-like patterns
const jsonPatterns = [
/(\{[\s\S]*\})/, // Object
/(\[[\s\S]*\])/ // Array
];
for (const pattern of jsonPatterns) {
const match = text.match(pattern);
if (match) {
return match[1];
}
}
return null;
}
/**
* Attempts to parse JSON, returns null if invalid.
*/
function tryParse(json) {
try {
return JSON.parse(json);
}
catch {
return null;
}
}
/**
* Repair step: Remove trailing commas.
*/
function removeTrailingCommas(json) {
// Remove trailing commas before } or ]
return json.replace(/,(\s*[}\]])/g, '$1');
}
/**
* Repair step: Add missing quotes to keys.
*/
function quoteUnquotedKeys(json) {
// Match unquoted keys followed by :
return json.replace(/([{,]\s*)([a-zA-Z_][a-zA-Z0-9_]*)(\s*:)/g, '$1"$2"$3');
}
/**
* Repair step: Replace single quotes with double quotes.
*/
function replaceSingleQuotes(json) {
// Simple replacement - may not handle all edge cases
let result = '';
let inString = false;
let stringChar = '';
for (let i = 0; i < json.length; i++) {
const char = json[i];
const prevChar = i > 0 ? json[i - 1] : '';
if (!inString) {
if (char === '"' || char === "'") {
inString = true;
stringChar = char;
result += '"';
}
else {
result += char;
}
}
else {
if (char === stringChar && prevChar !== '\\') {
inString = false;
result += '"';
}
else if (char === '"' && stringChar === "'") {
result += '\\"';
}
else {
result += char;
}
}
}
return result;
}
/**
* Repair step: Escape unescaped newlines in strings.
*/
function escapeNewlines(json) {
// This is a simplified version - proper implementation would need state tracking
return json.replace(/(?<!\\)\n/g, '\\n');
}
/**
* Repair step: Close unclosed braces and brackets.
*/
function closeBrackets(json) {
let braceCount = 0;
let bracketCount = 0;
for (const char of json) {
if (char === '{')
braceCount++;
else if (char === '}')
braceCount--;
else if (char === '[')
bracketCount++;
else if (char === ']')
bracketCount--;
}
let result = json;
while (bracketCount > 0) {
result += ']';
bracketCount--;
}
while (braceCount > 0) {
result += '}';
braceCount--;
}
return result;
}
/**
* Repair step: Remove JavaScript-style comments.
*/
function removeComments(json) {
// Remove single-line comments
let result = json.replace(/\/\/[^\n]*/g, '');
// Remove multi-line comments
result = result.replace(/\/\*[\s\S]*?\*\//g, '');
return result;
}
/** Default maximum repair steps */
const DEFAULT_MAX_REPAIR_STEPS = 6;
/**
* Detects and repairs JSON in text.
*
* @param text - The text to analyze
* @param maxRepairSteps - Maximum repair steps to attempt (default: 6)
* @returns JSON detection result with repair information
*/
function detectAndRepairJson(text, maxRepairSteps = DEFAULT_MAX_REPAIR_STEPS) {
const candidate = extractJsonCandidate(text);
const repairSteps = [];
if (!candidate) {
return {
isJson: false,
candidate: null,
repairSteps: [],
repairSucceeded: false
};
}
// Try parsing as-is first
let parsed = tryParse(candidate);
if (parsed !== null) {
return {
isJson: true,
normalizedJson: parsed,
candidate,
repairSteps: [],
repairSucceeded: true
};
}
// Apply repair steps in order (limited by maxRepairSteps)
const repairs = [
{ name: 'remove_comments', fn: removeComments },
{ name: 'remove_trailing_commas', fn: removeTrailingCommas },
{ name: 'quote_unquoted_keys', fn: quoteUnquotedKeys },
{ name: 'replace_single_quotes', fn: replaceSingleQuotes },
{ name: 'escape_newlines', fn: escapeNewlines },
{ name: 'close_brackets', fn: closeBrackets }
].slice(0, maxRepairSteps);
let current = candidate;
for (const repair of repairs) {
const before = current;
current = repair.fn(current);
const applied = current !== before;
repairSteps.push({
step: repair.name,
applied
});
// Try parsing after each step
if (applied) {
parsed = tryParse(current);
if (parsed !== null) {
return {
isJson: true,
normalizedJson: parsed,
candidate,
repairSteps,
repairSucceeded: true
};
}
}
}
// All repairs failed
return {
isJson: false,
candidate,
repairSteps,
repairSucceeded: false
};
}
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"json-repair.js","sourceRoot":"","sources":["../../../src/engines/classification/json-repair.ts"],"names":[],"mappings":";AAAA;;;;;;;;GAQG;;AAmKH,kDAwEC;AA5ND;;GAEG;AACH,SAAS,oBAAoB,CAAC,IAAY;IACxC,wCAAwC;IACxC,MAAM,cAAc,GAAG,IAAI,CAAC,KAAK,CAAC,8BAA8B,CAAC,CAAC;IAClE,IAAI,cAAc,EAAE,CAAC;QACnB,MAAM,OAAO,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACzC,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YACvD,OAAO,OAAO,CAAC;QACjB,CAAC;IACH,CAAC;IAED,8BAA8B;IAC9B,MAAM,YAAY,GAAG;QACnB,eAAe,EAAG,SAAS;QAC3B,eAAe,CAAG,QAAQ;KAC3B,CAAC;IAEF,KAAK,MAAM,OAAO,IAAI,YAAY,EAAE,CAAC;QACnC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAClC,IAAI,KAAK,EAAE,CAAC;YACV,OAAO,KAAK,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,SAAS,QAAQ,CAAC,IAAY;IAC5B,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,oBAAoB,CAAC,IAAY;IACxC,uCAAuC;IACvC,OAAO,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,IAAI,CAAC,CAAC;AAC5C,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,IAAY;IACrC,oCAAoC;IACpC,OAAO,IAAI,CAAC,OAAO,CAAC,0CAA0C,EAAE,UAAU,CAAC,CAAC;AAC9E,CAAC;AAED;;GAEG;AACH,SAAS,mBAAmB,CAAC,IAAY;IACvC,qDAAqD;IACrD,IAAI,MAAM,GAAG,EAAE,CAAC;IAChB,IAAI,QAAQ,GAAG,KAAK,CAAC;IACrB,IAAI,UAAU,GAAG,EAAE,CAAC;IAEpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACrB,MAAM,QAAQ,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAE1C,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,IAAI,IAAI,KAAK,GAAG,IAAI,IAAI,KAAK,GAAG,EAAE,CAAC;gBACjC,QAAQ,GAAG,IAAI,CAAC;gBAChB,UAAU,GAAG,IAAI,CAAC;gBAClB,MAAM,IAAI,GAAG,CAAC;YAChB,CAAC;iBAAM,CAAC;gBACN,MAAM,IAAI,IAAI,CAAC;YACjB,CAAC;QACH,CAAC;aAAM,CAAC;YACN,IAAI,IAAI,KAAK,UAAU,IAAI,QAAQ,KAAK,IAAI,EAAE,CAAC;gBAC7C,QAAQ,GAAG,KAAK,CAAC;gBACjB,MAAM,IAAI,GAAG,CAAC;YAChB,CAAC;iBAAM,IAAI,IAAI,KAAK,GAAG,IAAI,UAAU,KAAK,GAAG,EAAE,CAAC;gBAC9C,MAAM,IAAI,KAAK,CAAC;YAClB,CAAC;iBAAM,CAAC;gBACN,MAAM,IAAI,IAAI,CAAC;YACjB,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,IAAY;IAClC,iFAAiF;IACjF,OAAO,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,KAAK,CAAC,CAAC;AAC3C,CAAC;AAED;;GAEG;AACH,SAAS,aAAa,CAAC,IAAY;IACjC,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,KAAK,MAAM,IAAI,IAAI,IAAI,EAAE,CAAC;QACxB,IAAI,IAAI,KAAK,GAAG;YAAE,UAAU,EAAE,CAAC;aAC1B,IAAI,IAAI,KAAK,GAAG;YAAE,UAAU,EAAE,CAAC;aAC/B,IAAI,IAAI,KAAK,GAAG;YAAE,YAAY,EAAE,CAAC;aACjC,IAAI,IAAI,KAAK,GAAG;YAAE,YAAY,EAAE,CAAC;IACxC,CAAC;IAED,IAAI,MAAM,GAAG,IAAI,CAAC;IAClB,OAAO,YAAY,GAAG,CAAC,EAAE,CAAC;QACxB,MAAM,IAAI,GAAG,CAAC;QACd,YAAY,EAAE,CAAC;IACjB,CAAC;IACD,OAAO,UAAU,GAAG,CAAC,EAAE,CAAC;QACtB,MAAM,IAAI,GAAG,CAAC;QACd,UAAU,EAAE,CAAC;IACf,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,IAAY;IAClC,8BAA8B;IAC9B,IAAI,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC;IAC7C,6BAA6B;IAC7B,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,mBAAmB,EAAE,EAAE,CAAC,CAAC;IACjD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,mCAAmC;AACnC,MAAM,wBAAwB,GAAG,CAAC,CAAC;AAEnC;;;;;;GAMG;AACH,SAAgB,mBAAmB,CACjC,IAAY,EACZ,iBAAyB,wBAAwB;IAEjD,MAAM,SAAS,GAAG,oBAAoB,CAAC,IAAI,CAAC,CAAC;IAC7C,MAAM,WAAW,GAAqB,EAAE,CAAC;IAEzC,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,OAAO;YACL,MAAM,EAAE,KAAK;YACb,SAAS,EAAE,IAAI;YACf,WAAW,EAAE,EAAE;YACf,eAAe,EAAE,KAAK;SACvB,CAAC;IACJ,CAAC;IAED,0BAA0B;IAC1B,IAAI,MAAM,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAC;IACjC,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;QACpB,OAAO;YACL,MAAM,EAAE,IAAI;YACZ,cAAc,EAAE,MAAM;YACtB,SAAS;YACT,WAAW,EAAE,EAAE;YACf,eAAe,EAAE,IAAI;SACtB,CAAC;IACJ,CAAC;IAED,0DAA0D;IAC1D,MAAM,OAAO,GAAuD;QAClE,EAAE,IAAI,EAAE,iBAAiB,EAAE,EAAE,EAAE,cAAc,EAAE;QAC/C,EAAE,IAAI,EAAE,wBAAwB,EAAE,EAAE,EAAE,oBAAoB,EAAE;QAC5D,EAAE,IAAI,EAAE,qBAAqB,EAAE,EAAE,EAAE,iBAAiB,EAAE;QACtD,EAAE,IAAI,EAAE,uBAAuB,EAAE,EAAE,EAAE,mBAAmB,EAAE;QAC1D,EAAE,IAAI,EAAE,iBAAiB,EAAE,EAAE,EAAE,cAAc,EAAE;QAC/C,EAAE,IAAI,EAAE,gBAAgB,EAAE,EAAE,EAAE,aAAa,EAAE;KAC9C,CAAC,KAAK,CAAC,CAAC,EAAE,cAAc,CAAC,CAAC;IAE3B,IAAI,OAAO,GAAG,SAAS,CAAC;IAExB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,MAAM,GAAG,OAAO,CAAC;QACvB,OAAO,GAAG,MAAM,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC;QAC7B,MAAM,OAAO,GAAG,OAAO,KAAK,MAAM,CAAC;QAEnC,WAAW,CAAC,IAAI,CAAC;YACf,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,OAAO;SACR,CAAC,CAAC;QAEH,8BAA8B;QAC9B,IAAI,OAAO,EAAE,CAAC;YACZ,MAAM,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC;YAC3B,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;gBACpB,OAAO;oBACL,MAAM,EAAE,IAAI;oBACZ,cAAc,EAAE,MAAM;oBACtB,SAAS;oBACT,WAAW;oBACX,eAAe,EAAE,IAAI;iBACtB,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC;IAED,qBAAqB;IACrB,OAAO;QACL,MAAM,EAAE,KAAK;QACb,SAAS;QACT,WAAW;QACX,eAAe,EAAE,KAAK;KACvB,CAAC;AACJ,CAAC","sourcesContent":["/**\n * JSON Detection and Repair Module\n * \n * Detects JSON in text and attempts to repair malformed JSON.\n * \n * @module engines/classification/json-repair\n * @author Haiec\n * @license MIT\n */\n\nimport { JsonRepairStep } from './types';\n\n/**\n * Result of JSON detection and repair.\n */\nexport interface JsonDetectionResult {\n  isJson: boolean;\n  normalizedJson?: unknown;\n  candidate: string | null;\n  repairSteps: JsonRepairStep[];\n  repairSucceeded: boolean;\n}\n\n/**\n * Extracts JSON candidate from text.\n */\nfunction extractJsonCandidate(text: string): string | null {\n  // Try to find JSON in code blocks first\n  const codeBlockMatch = text.match(/```(?:json)?\\s*([\\s\\S]*?)```/);\n  if (codeBlockMatch) {\n    const content = codeBlockMatch[1].trim();\n    if (content.startsWith('{') || content.startsWith('[')) {\n      return content;\n    }\n  }\n  \n  // Look for JSON-like patterns\n  const jsonPatterns = [\n    /(\\{[\\s\\S]*\\})/,  // Object\n    /(\\[[\\s\\S]*\\])/   // Array\n  ];\n  \n  for (const pattern of jsonPatterns) {\n    const match = text.match(pattern);\n    if (match) {\n      return match[1];\n    }\n  }\n  \n  return null;\n}\n\n/**\n * Attempts to parse JSON, returns null if invalid.\n */\nfunction tryParse(json: string): unknown | null {\n  try {\n    return JSON.parse(json);\n  } catch {\n    return null;\n  }\n}\n\n/**\n * Repair step: Remove trailing commas.\n */\nfunction removeTrailingCommas(json: string): string {\n  // Remove trailing commas before } or ]\n  return json.replace(/,(\\s*[}\\]])/g, '$1');\n}\n\n/**\n * Repair step: Add missing quotes to keys.\n */\nfunction quoteUnquotedKeys(json: string): string {\n  // Match unquoted keys followed by :\n  return json.replace(/([{,]\\s*)([a-zA-Z_][a-zA-Z0-9_]*)(\\s*:)/g, '$1\"$2\"$3');\n}\n\n/**\n * Repair step: Replace single quotes with double quotes.\n */\nfunction replaceSingleQuotes(json: string): string {\n  // Simple replacement - may not handle all edge cases\n  let result = '';\n  let inString = false;\n  let stringChar = '';\n  \n  for (let i = 0; i < json.length; i++) {\n    const char = json[i];\n    const prevChar = i > 0 ? json[i - 1] : '';\n    \n    if (!inString) {\n      if (char === '\"' || char === \"'\") {\n        inString = true;\n        stringChar = char;\n        result += '\"';\n      } else {\n        result += char;\n      }\n    } else {\n      if (char === stringChar && prevChar !== '\\\\') {\n        inString = false;\n        result += '\"';\n      } else if (char === '\"' && stringChar === \"'\") {\n        result += '\\\\\"';\n      } else {\n        result += char;\n      }\n    }\n  }\n  \n  return result;\n}\n\n/**\n * Repair step: Escape unescaped newlines in strings.\n */\nfunction escapeNewlines(json: string): string {\n  // This is a simplified version - proper implementation would need state tracking\n  return json.replace(/(?<!\\\\)\\n/g, '\\\\n');\n}\n\n/**\n * Repair step: Close unclosed braces and brackets.\n */\nfunction closeBrackets(json: string): string {\n  let braceCount = 0;\n  let bracketCount = 0;\n  \n  for (const char of json) {\n    if (char === '{') braceCount++;\n    else if (char === '}') braceCount--;\n    else if (char === '[') bracketCount++;\n    else if (char === ']') bracketCount--;\n  }\n  \n  let result = json;\n  while (bracketCount > 0) {\n    result += ']';\n    bracketCount--;\n  }\n  while (braceCount > 0) {\n    result += '}';\n    braceCount--;\n  }\n  \n  return result;\n}\n\n/**\n * Repair step: Remove JavaScript-style comments.\n */\nfunction removeComments(json: string): string {\n  // Remove single-line comments\n  let result = json.replace(/\\/\\/[^\\n]*/g, '');\n  // Remove multi-line comments\n  result = result.replace(/\\/\\*[\\s\\S]*?\\*\\//g, '');\n  return result;\n}\n\n/** Default maximum repair steps */\nconst DEFAULT_MAX_REPAIR_STEPS = 6;\n\n/**\n * Detects and repairs JSON in text.\n * \n * @param text - The text to analyze\n * @param maxRepairSteps - Maximum repair steps to attempt (default: 6)\n * @returns JSON detection result with repair information\n */\nexport function detectAndRepairJson(\n  text: string,\n  maxRepairSteps: number = DEFAULT_MAX_REPAIR_STEPS\n): JsonDetectionResult {\n  const candidate = extractJsonCandidate(text);\n  const repairSteps: JsonRepairStep[] = [];\n  \n  if (!candidate) {\n    return {\n      isJson: false,\n      candidate: null,\n      repairSteps: [],\n      repairSucceeded: false\n    };\n  }\n  \n  // Try parsing as-is first\n  let parsed = tryParse(candidate);\n  if (parsed !== null) {\n    return {\n      isJson: true,\n      normalizedJson: parsed,\n      candidate,\n      repairSteps: [],\n      repairSucceeded: true\n    };\n  }\n  \n  // Apply repair steps in order (limited by maxRepairSteps)\n  const repairs: Array<{ name: string; fn: (s: string) => string }> = [\n    { name: 'remove_comments', fn: removeComments },\n    { name: 'remove_trailing_commas', fn: removeTrailingCommas },\n    { name: 'quote_unquoted_keys', fn: quoteUnquotedKeys },\n    { name: 'replace_single_quotes', fn: replaceSingleQuotes },\n    { name: 'escape_newlines', fn: escapeNewlines },\n    { name: 'close_brackets', fn: closeBrackets }\n  ].slice(0, maxRepairSteps);\n  \n  let current = candidate;\n  \n  for (const repair of repairs) {\n    const before = current;\n    current = repair.fn(current);\n    const applied = current !== before;\n    \n    repairSteps.push({\n      step: repair.name,\n      applied\n    });\n    \n    // Try parsing after each step\n    if (applied) {\n      parsed = tryParse(current);\n      if (parsed !== null) {\n        return {\n          isJson: true,\n          normalizedJson: parsed,\n          candidate,\n          repairSteps,\n          repairSucceeded: true\n        };\n      }\n    }\n  }\n  \n  // All repairs failed\n  return {\n    isJson: false,\n    candidate,\n    repairSteps,\n    repairSucceeded: false\n  };\n}\n"]}