UNPKG

llmverify

Version:

AI Output Verification Toolkit — Local-first LLM safety, hallucination detection, PII redaction, prompt injection defense, and runtime monitoring. Zero telemetry. OWASP LLM Top 10 aligned.

214 lines 20.1 kB
"use strict"; /** * JSON Detection and Repair Module * * Detects JSON in text and attempts to repair malformed JSON. * * @module engines/classification/json-repair * @author Haiec * @license MIT */ Object.defineProperty(exports, "__esModule", { value: true }); exports.detectAndRepairJson = detectAndRepairJson; /** * Extracts JSON candidate from text. */ function extractJsonCandidate(text) { // Try to find JSON in code blocks first const codeBlockMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/); if (codeBlockMatch) { const content = codeBlockMatch[1].trim(); if (content.startsWith('{') || content.startsWith('[')) { return content; } } // Look for JSON-like patterns const jsonPatterns = [ /(\{[\s\S]*\})/, // Object /(\[[\s\S]*\])/ // Array ]; for (const pattern of jsonPatterns) { const match = text.match(pattern); if (match) { return match[1]; } } return null; } /** * Attempts to parse JSON, returns null if invalid. */ function tryParse(json) { try { return JSON.parse(json); } catch { return null; } } /** * Repair step: Remove trailing commas. */ function removeTrailingCommas(json) { // Remove trailing commas before } or ] return json.replace(/,(\s*[}\]])/g, '$1'); } /** * Repair step: Add missing quotes to keys. */ function quoteUnquotedKeys(json) { // Match unquoted keys followed by : return json.replace(/([{,]\s*)([a-zA-Z_][a-zA-Z0-9_]*)(\s*:)/g, '$1"$2"$3'); } /** * Repair step: Replace single quotes with double quotes. */ function replaceSingleQuotes(json) { // Simple replacement - may not handle all edge cases let result = ''; let inString = false; let stringChar = ''; for (let i = 0; i < json.length; i++) { const char = json[i]; const prevChar = i > 0 ? json[i - 1] : ''; if (!inString) { if (char === '"' || char === "'") { inString = true; stringChar = char; result += '"'; } else { result += char; } } else { if (char === stringChar && prevChar !== '\\') { inString = false; result += '"'; } else if (char === '"' && stringChar === "'") { result += '\\"'; } else { result += char; } } } return result; } /** * Repair step: Escape unescaped newlines in strings. */ function escapeNewlines(json) { // This is a simplified version - proper implementation would need state tracking return json.replace(/(?<!\\)\n/g, '\\n'); } /** * Repair step: Close unclosed braces and brackets. */ function closeBrackets(json) { let braceCount = 0; let bracketCount = 0; for (const char of json) { if (char === '{') braceCount++; else if (char === '}') braceCount--; else if (char === '[') bracketCount++; else if (char === ']') bracketCount--; } let result = json; while (bracketCount > 0) { result += ']'; bracketCount--; } while (braceCount > 0) { result += '}'; braceCount--; } return result; } /** * Repair step: Remove JavaScript-style comments. */ function removeComments(json) { // Remove single-line comments let result = json.replace(/\/\/[^\n]*/g, ''); // Remove multi-line comments result = result.replace(/\/\*[\s\S]*?\*\//g, ''); return result; } /** Default maximum repair steps */ const DEFAULT_MAX_REPAIR_STEPS = 6; /** * Detects and repairs JSON in text. * * @param text - The text to analyze * @param maxRepairSteps - Maximum repair steps to attempt (default: 6) * @returns JSON detection result with repair information */ function detectAndRepairJson(text, maxRepairSteps = DEFAULT_MAX_REPAIR_STEPS) { const candidate = extractJsonCandidate(text); const repairSteps = []; if (!candidate) { return { isJson: false, candidate: null, repairSteps: [], repairSucceeded: false }; } // Try parsing as-is first let parsed = tryParse(candidate); if (parsed !== null) { return { isJson: true, normalizedJson: parsed, candidate, repairSteps: [], repairSucceeded: true }; } // Apply repair steps in order (limited by maxRepairSteps) const repairs = [ { name: 'remove_comments', fn: removeComments }, { name: 'remove_trailing_commas', fn: removeTrailingCommas }, { name: 'quote_unquoted_keys', fn: quoteUnquotedKeys }, { name: 'replace_single_quotes', fn: replaceSingleQuotes }, { name: 'escape_newlines', fn: escapeNewlines }, { name: 'close_brackets', fn: closeBrackets } ].slice(0, maxRepairSteps); let current = candidate; for (const repair of repairs) { const before = current; current = repair.fn(current); const applied = current !== before; repairSteps.push({ step: repair.name, applied }); // Try parsing after each step if (applied) { parsed = tryParse(current); if (parsed !== null) { return { isJson: true, normalizedJson: parsed, candidate, repairSteps, repairSucceeded: true }; } } } // All repairs failed return { isJson: false, candidate, repairSteps, repairSucceeded: false }; } //# sourceMappingURL=data:application/json;base64,{"version":3,"file":"json-repair.js","sourceRoot":"","sources":["../../../src/engines/classification/json-repair.ts"],"names":[],"mappings":";AAAA;;;;;;;;GAQG;;AAmKH,kDAwEC;AA5ND;;GAEG;AACH,SAAS,oBAAoB,CAAC,IAAY;IACxC,wCAAwC;IACxC,MAAM,cAAc,GAAG,IAAI,CAAC,KAAK,CAAC,8BAA8B,CAAC,CAAC;IAClE,IAAI,cAAc,EAAE,CAAC;QACnB,MAAM,OAAO,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACzC,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YACvD,OAAO,OAAO,CAAC;QACjB,CAAC;IACH,CAAC;IAED,8BAA8B;IAC9B,MAAM,YAAY,GAAG;QACnB,eAAe,EAAG,SAAS;QAC3B,eAAe,CAAG,QAAQ;KAC3B,CAAC;IAEF,KAAK,MAAM,OAAO,IAAI,YAAY,EAAE,CAAC;QACnC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAClC,IAAI,KAAK,EAAE,CAAC;YACV,OAAO,KAAK,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,SAAS,QAAQ,CAAC,IAAY;IAC5B,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC1B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,oBAAoB,CAAC,IAAY;IACxC,uCAAuC;IACvC,OAAO,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,IAAI,CAAC,CAAC;AAC5C,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,IAAY;IACrC,oCAAoC;IACpC,OAAO,IAAI,CAAC,OAAO,CAAC,0CAA0C,EAAE,UAAU,CAAC,CAAC;AAC9E,CAAC;AAED;;GAEG;AACH,SAAS,mBAAmB,CAAC,IAAY;IACvC,qDAAqD;IACrD,IAAI,MAAM,GAAG,EAAE,CAAC;IAChB,IAAI,QAAQ,GAAG,KAAK,CAAC;IACrB,IAAI,UAAU,GAAG,EAAE,CAAC;IAEpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACrB,MAAM,QAAQ,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAE1C,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,IAAI,IAAI,KAAK,GAAG,IAAI,IAAI,KAAK,GAAG,EAAE,CAAC;gBACjC,QAAQ,GAAG,IAAI,CAAC;gBAChB,UAAU,GAAG,IAAI,CAAC;gBAClB,MAAM,IAAI,GAAG,CAAC;YAChB,CAAC;iBAAM,CAAC;gBACN,MAAM,IAAI,IAAI,CAAC;YACjB,CAAC;QACH,CAAC;aAAM,CAAC;YACN,IAAI,IAAI,KAAK,UAAU,IAAI,QAAQ,KAAK,IAAI,EAAE,CAAC;gBAC7C,QAAQ,GAAG,KAAK,CAAC;gBACjB,MAAM,IAAI,GAAG,CAAC;YAChB,CAAC;iBAAM,IAAI,IAAI,KAAK,GAAG,IAAI,UAAU,KAAK,GAAG,EAAE,CAAC;gBAC9C,MAAM,IAAI,KAAK,CAAC;YAClB,CAAC;iBAAM,CAAC;gBACN,MAAM,IAAI,IAAI,CAAC;YACjB,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,IAAY;IAClC,iFAAiF;IACjF,OAAO,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,KAAK,CAAC,CAAC;AAC3C,CAAC;AAED;;GAEG;AACH,SAAS,aAAa,CAAC,IAAY;IACjC,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,KAAK,MAAM,IAAI,IAAI,IAAI,EAAE,CAAC;QACxB,IAAI,IAAI,KAAK,GAAG;YAAE,UAAU,EAAE,CAAC;aAC1B,IAAI,IAAI,KAAK,GAAG;YAAE,UAAU,EAAE,CAAC;aAC/B,IAAI,IAAI,KAAK,GAAG;YAAE,YAAY,EAAE,CAAC;aACjC,IAAI,IAAI,KAAK,GAAG;YAAE,YAAY,EAAE,CAAC;IACxC,CAAC;IAED,IAAI,MAAM,GAAG,IAAI,CAAC;IAClB,OAAO,YAAY,GAAG,CAAC,EAAE,CAAC;QACxB,MAAM,IAAI,GAAG,CAAC;QACd,YAAY,EAAE,CAAC;IACjB,CAAC;IACD,OAAO,UAAU,GAAG,CAAC,EAAE,CAAC;QACtB,MAAM,IAAI,GAAG,CAAC;QACd,UAAU,EAAE,CAAC;IACf,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,IAAY;IAClC,8BAA8B;IAC9B,IAAI,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC;IAC7C,6BAA6B;IAC7B,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,mBAAmB,EAAE,EAAE,CAAC,CAAC;IACjD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,mCAAmC;AACnC,MAAM,wBAAwB,GAAG,CAAC,CAAC;AAEnC;;;;;;GAMG;AACH,SAAgB,mBAAmB,CACjC,IAAY,EACZ,iBAAyB,wBAAwB;IAEjD,MAAM,SAAS,GAAG,oBAAoB,CAAC,IAAI,CAAC,CAAC;IAC7C,MAAM,WAAW,GAAqB,EAAE,CAAC;IAEzC,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,OAAO;YACL,MAAM,EAAE,KAAK;YACb,SAAS,EAAE,IAAI;YACf,WAAW,EAAE,EAAE;YACf,eAAe,EAAE,KAAK;SACvB,CAAC;IACJ,CAAC;IAED,0BAA0B;IAC1B,IAAI,MAAM,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAC;IACjC,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;QACpB,OAAO;YACL,MAAM,EAAE,IAAI;YACZ,cAAc,EAAE,MAAM;YACtB,SAAS;YACT,WAAW,EAAE,EAAE;YACf,eAAe,EAAE,IAAI;SACtB,CAAC;IACJ,CAAC;IAED,0DAA0D;IAC1D,MAAM,OAAO,GAAuD;QAClE,EAAE,IAAI,EAAE,iBAAiB,EAAE,EAAE,EAAE,cAAc,EAAE;QAC/C,EAAE,IAAI,EAAE,wBAAwB,EAAE,EAAE,EAAE,oBAAoB,EAAE;QAC5D,EAAE,IAAI,EAAE,qBAAqB,EAAE,EAAE,EAAE,iBAAiB,EAAE;QACtD,EAAE,IAAI,EAAE,uBAAuB,EAAE,EAAE,EAAE,mBAAmB,EAAE;QAC1D,EAAE,IAAI,EAAE,iBAAiB,EAAE,EAAE,EAAE,cAAc,EAAE;QAC/C,EAAE,IAAI,EAAE,gBAAgB,EAAE,EAAE,EAAE,aAAa,EAAE;KAC9C,CAAC,KAAK,CAAC,CAAC,EAAE,cAAc,CAAC,CAAC;IAE3B,IAAI,OAAO,GAAG,SAAS,CAAC;IAExB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,MAAM,GAAG,OAAO,CAAC;QACvB,OAAO,GAAG,MAAM,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC;QAC7B,MAAM,OAAO,GAAG,OAAO,KAAK,MAAM,CAAC;QAEnC,WAAW,CAAC,IAAI,CAAC;YACf,IAAI,EAAE,MAAM,CAAC,IAAI;YACjB,OAAO;SACR,CAAC,CAAC;QAEH,8BAA8B;QAC9B,IAAI,OAAO,EAAE,CAAC;YACZ,MAAM,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC;YAC3B,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;gBACpB,OAAO;oBACL,MAAM,EAAE,IAAI;oBACZ,cAAc,EAAE,MAAM;oBACtB,SAAS;oBACT,WAAW;oBACX,eAAe,EAAE,IAAI;iBACtB,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC;IAED,qBAAqB;IACrB,OAAO;QACL,MAAM,EAAE,KAAK;QACb,SAAS;QACT,WAAW;QACX,eAAe,EAAE,KAAK;KACvB,CAAC;AACJ,CAAC","sourcesContent":["/**\n * JSON Detection and Repair Module\n * \n * Detects JSON in text and attempts to repair malformed JSON.\n * \n * @module engines/classification/json-repair\n * @author Haiec\n * @license MIT\n */\n\nimport { JsonRepairStep } from './types';\n\n/**\n * Result of JSON detection and repair.\n */\nexport interface JsonDetectionResult {\n  isJson: boolean;\n  normalizedJson?: unknown;\n  candidate: string | null;\n  repairSteps: JsonRepairStep[];\n  repairSucceeded: boolean;\n}\n\n/**\n * Extracts JSON candidate from text.\n */\nfunction extractJsonCandidate(text: string): string | null {\n  // Try to find JSON in code blocks first\n  const codeBlockMatch = text.match(/```(?:json)?\\s*([\\s\\S]*?)```/);\n  if (codeBlockMatch) {\n    const content = codeBlockMatch[1].trim();\n    if (content.startsWith('{') || content.startsWith('[')) {\n      return content;\n    }\n  }\n  \n  // Look for JSON-like patterns\n  const jsonPatterns = [\n    /(\\{[\\s\\S]*\\})/,  // Object\n    /(\\[[\\s\\S]*\\])/   // Array\n  ];\n  \n  for (const pattern of jsonPatterns) {\n    const match = text.match(pattern);\n    if (match) {\n      return match[1];\n    }\n  }\n  \n  return null;\n}\n\n/**\n * Attempts to parse JSON, returns null if invalid.\n */\nfunction tryParse(json: string): unknown | null {\n  try {\n    return JSON.parse(json);\n  } catch {\n    return null;\n  }\n}\n\n/**\n * Repair step: Remove trailing commas.\n */\nfunction removeTrailingCommas(json: string): string {\n  // Remove trailing commas before } or ]\n  return json.replace(/,(\\s*[}\\]])/g, '$1');\n}\n\n/**\n * Repair step: Add missing quotes to keys.\n */\nfunction quoteUnquotedKeys(json: string): string {\n  // Match unquoted keys followed by :\n  return json.replace(/([{,]\\s*)([a-zA-Z_][a-zA-Z0-9_]*)(\\s*:)/g, '$1\"$2\"$3');\n}\n\n/**\n * Repair step: Replace single quotes with double quotes.\n */\nfunction replaceSingleQuotes(json: string): string {\n  // Simple replacement - may not handle all edge cases\n  let result = '';\n  let inString = false;\n  let stringChar = '';\n  \n  for (let i = 0; i < json.length; i++) {\n    const char = json[i];\n    const prevChar = i > 0 ? json[i - 1] : '';\n    \n    if (!inString) {\n      if (char === '\"' || char === \"'\") {\n        inString = true;\n        stringChar = char;\n        result += '\"';\n      } else {\n        result += char;\n      }\n    } else {\n      if (char === stringChar && prevChar !== '\\\\') {\n        inString = false;\n        result += '\"';\n      } else if (char === '\"' && stringChar === \"'\") {\n        result += '\\\\\"';\n      } else {\n        result += char;\n      }\n    }\n  }\n  \n  return result;\n}\n\n/**\n * Repair step: Escape unescaped newlines in strings.\n */\nfunction escapeNewlines(json: string): string {\n  // This is a simplified version - proper implementation would need state tracking\n  return json.replace(/(?<!\\\\)\\n/g, '\\\\n');\n}\n\n/**\n * Repair step: Close unclosed braces and brackets.\n */\nfunction closeBrackets(json: string): string {\n  let braceCount = 0;\n  let bracketCount = 0;\n  \n  for (const char of json) {\n    if (char === '{') braceCount++;\n    else if (char === '}') braceCount--;\n    else if (char === '[') bracketCount++;\n    else if (char === ']') bracketCount--;\n  }\n  \n  let result = json;\n  while (bracketCount > 0) {\n    result += ']';\n    bracketCount--;\n  }\n  while (braceCount > 0) {\n    result += '}';\n    braceCount--;\n  }\n  \n  return result;\n}\n\n/**\n * Repair step: Remove JavaScript-style comments.\n */\nfunction removeComments(json: string): string {\n  // Remove single-line comments\n  let result = json.replace(/\\/\\/[^\\n]*/g, '');\n  // Remove multi-line comments\n  result = result.replace(/\\/\\*[\\s\\S]*?\\*\\//g, '');\n  return result;\n}\n\n/** Default maximum repair steps */\nconst DEFAULT_MAX_REPAIR_STEPS = 6;\n\n/**\n * Detects and repairs JSON in text.\n * \n * @param text - The text to analyze\n * @param maxRepairSteps - Maximum repair steps to attempt (default: 6)\n * @returns JSON detection result with repair information\n */\nexport function detectAndRepairJson(\n  text: string,\n  maxRepairSteps: number = DEFAULT_MAX_REPAIR_STEPS\n): JsonDetectionResult {\n  const candidate = extractJsonCandidate(text);\n  const repairSteps: JsonRepairStep[] = [];\n  \n  if (!candidate) {\n    return {\n      isJson: false,\n      candidate: null,\n      repairSteps: [],\n      repairSucceeded: false\n    };\n  }\n  \n  // Try parsing as-is first\n  let parsed = tryParse(candidate);\n  if (parsed !== null) {\n    return {\n      isJson: true,\n      normalizedJson: parsed,\n      candidate,\n      repairSteps: [],\n      repairSucceeded: true\n    };\n  }\n  \n  // Apply repair steps in order (limited by maxRepairSteps)\n  const repairs: Array<{ name: string; fn: (s: string) => string }> = [\n    { name: 'remove_comments', fn: removeComments },\n    { name: 'remove_trailing_commas', fn: removeTrailingCommas },\n    { name: 'quote_unquoted_keys', fn: quoteUnquotedKeys },\n    { name: 'replace_single_quotes', fn: replaceSingleQuotes },\n    { name: 'escape_newlines', fn: escapeNewlines },\n    { name: 'close_brackets', fn: closeBrackets }\n  ].slice(0, maxRepairSteps);\n  \n  let current = candidate;\n  \n  for (const repair of repairs) {\n    const before = current;\n    current = repair.fn(current);\n    const applied = current !== before;\n    \n    repairSteps.push({\n      step: repair.name,\n      applied\n    });\n    \n    // Try parsing after each step\n    if (applied) {\n      parsed = tryParse(current);\n      if (parsed !== null) {\n        return {\n          isJson: true,\n          normalizedJson: parsed,\n          candidate,\n          repairSteps,\n          repairSucceeded: true\n        };\n      }\n    }\n  }\n  \n  // All repairs failed\n  return {\n    isJson: false,\n    candidate,\n    repairSteps,\n    repairSucceeded: false\n  };\n}\n"]}