UNPKG

llmverify

Version:

AI Output Verification Toolkit — Local-first LLM safety, hallucination detection, PII redaction, prompt injection defense, and runtime monitoring. Zero telemetry. OWASP LLM Top 10 aligned.

179 lines 18.6 kB
"use strict"; /** * Claim Extractor * * Extracts claims from text for risk analysis. * * @module engines/hallucination/claim-extractor * @author Haiec * @license MIT */ Object.defineProperty(exports, "__esModule", { value: true }); exports.extractClaims = extractClaims; const text_1 = require("../../utils/text"); /** * Extract claims from text */ function extractClaims(text) { const sentences = (0, text_1.splitSentences)(text); const claims = []; let currentIndex = 0; for (const sentence of sentences) { const startIndex = text.indexOf(sentence, currentIndex); const endIndex = startIndex + sentence.length; const claim = analyzeSentence(sentence, startIndex, endIndex); if (claim) { claims.push(claim); } currentIndex = endIndex; } return claims; } /** * Analyze a sentence to determine if it's a claim */ function analyzeSentence(sentence, start, end) { const trimmed = sentence.trim(); if (trimmed.length < 10) return null; const type = classifySentence(trimmed); const verifiable = isVerifiable(trimmed, type); const riskIndicators = { lackOfSpecificity: calculateSpecificityRisk(trimmed), missingCitation: checkMissingCitation(trimmed, type), vagueLanguage: checkVagueLanguage(trimmed), contradictionSignal: false // Set during consistency check }; const confidence = calculateClaimConfidence(riskIndicators); return { text: trimmed, span: [start, end], type, verifiable, riskIndicators, confidence, limitations: [ 'Pattern-based claim extraction', 'May miss implicit claims', 'Context not fully analyzed' ] }; } /** * Classify sentence type */ function classifySentence(sentence) { const lower = sentence.toLowerCase(); // Instructions if (/^(please|you should|you must|do not|don't|always|never)\b/i.test(lower)) { return 'instruction'; } // Opinions if (/\b(i think|i believe|in my opinion|seems|appears|might|could|may)\b/i.test(lower)) { return 'opinion'; } // Metadata if (/^(note:|warning:|important:|disclaimer:)/i.test(lower)) { return 'metadata'; } // Default to factual return 'factual'; } /** * Check if claim is verifiable */ function isVerifiable(sentence, type) { if (type === 'opinion' || type === 'instruction') { return false; } // Contains numbers or dates if (/\d+/.test(sentence)) { return true; } // Contains proper nouns (capitalized words not at start) if (/\s[A-Z][a-z]+/.test(sentence)) { return true; } return true; } /** * Calculate specificity risk (0-1, higher = more risky) */ function calculateSpecificityRisk(sentence) { let risk = 0.5; // Base risk // Reduce risk if contains specific numbers if (/\b\d+(\.\d+)?%?\b/.test(sentence)) { risk -= 0.2; } // Reduce risk if contains dates if (/\b(19|20)\d{2}\b|\b(january|february|march|april|may|june|july|august|september|october|november|december)\b/i.test(sentence)) { risk -= 0.15; } // Reduce risk if contains proper nouns const properNouns = sentence.match(/\b[A-Z][a-z]+\b/g) || []; if (properNouns.length > 1) { risk -= 0.1; } // Increase risk for vague quantifiers if (/\b(many|some|few|several|various|numerous|countless)\b/i.test(sentence)) { risk += 0.15; } // Increase risk for hedging language if (/\b(generally|usually|often|sometimes|typically)\b/i.test(sentence)) { risk += 0.1; } return Math.max(0, Math.min(1, risk)); } /** * Check for missing citation */ function checkMissingCitation(sentence, type) { if (type !== 'factual') return false; // Has citation markers if (/\[\d+\]|\(\d{4}\)|according to|source:|cited from/i.test(sentence)) { return false; } // Contains strong factual claims without citation const strongClaims = /\b(studies show|research indicates|scientists found|data shows|evidence suggests|proven|confirmed)\b/i; return strongClaims.test(sentence); } /** * Check for vague language */ function checkVagueLanguage(sentence) { const vaguePatterns = [ /\b(somehow|something|somewhere|someone|somewhat)\b/i, /\b(kind of|sort of|type of)\b/i, /\b(stuff|things|etc\.?)\b/i, /\b(basically|essentially|virtually)\b/i ]; return vaguePatterns.some(p => p.test(sentence)); } /** * Calculate confidence score for claim analysis */ function calculateClaimConfidence(riskIndicators) { // Base confidence for pattern-based analysis const baseConfidence = 0.65; // Adjust based on clarity of signals let adjustment = 0; if (riskIndicators.lackOfSpecificity > 0.7 || riskIndicators.lackOfSpecificity < 0.3) { adjustment += 0.1; // Clear signal } if (riskIndicators.missingCitation) { adjustment += 0.05; } const value = Math.min(0.85, baseConfidence + adjustment); const margin = 0.15; return { value, interval: [Math.max(0, value - margin), Math.min(1, value + margin)], method: 'heuristic', factors: { patternStrength: value, contextClarity: 0.6 } }; } //# sourceMappingURL=data:application/json;base64,{"version":3,"file":"claim-extractor.js","sourceRoot":"","sources":["../../../src/engines/hallucination/claim-extractor.ts"],"names":[],"mappings":";AAAA;;;;;;;;GAQG;;AAQH,sCAmBC;AAxBD,2CAAkD;AAElD;;GAEG;AACH,SAAgB,aAAa,CAAC,IAAY;IACxC,MAAM,SAAS,GAAG,IAAA,qBAAc,EAAC,IAAI,CAAC,CAAC;IACvC,MAAM,MAAM,GAAY,EAAE,CAAC;IAE3B,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;QACxD,MAAM,QAAQ,GAAG,UAAU,GAAG,QAAQ,CAAC,MAAM,CAAC;QAE9C,MAAM,KAAK,GAAG,eAAe,CAAC,QAAQ,EAAE,UAAU,EAAE,QAAQ,CAAC,CAAC;QAC9D,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC;QAED,YAAY,GAAG,QAAQ,CAAC;IAC1B,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,eAAe,CACtB,QAAgB,EAChB,KAAa,EACb,GAAW;IAEX,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC;IAChC,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;QAAE,OAAO,IAAI,CAAC;IAErC,MAAM,IAAI,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAC;IACvC,MAAM,UAAU,GAAG,YAAY,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;IAE/C,MAAM,cAAc,GAAG;QACrB,iBAAiB,EAAE,wBAAwB,CAAC,OAAO,CAAC;QACpD,eAAe,EAAE,oBAAoB,CAAC,OAAO,EAAE,IAAI,CAAC;QACpD,aAAa,EAAE,kBAAkB,CAAC,OAAO,CAAC;QAC1C,mBAAmB,EAAE,KAAK,CAAC,+BAA+B;KAC3D,CAAC;IAEF,MAAM,UAAU,GAAG,wBAAwB,CAAC,cAAc,CAAC,CAAC;IAE5D,OAAO;QACL,IAAI,EAAE,OAAO;QACb,IAAI,EAAE,CAAC,KAAK,EAAE,GAAG,CAAC;QAClB,IAAI;QACJ,UAAU;QACV,cAAc;QACd,UAAU;QACV,WAAW,EAAE;YACX,gCAAgC;YAChC,0BAA0B;YAC1B,4BAA4B;SAC7B;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CACvB,QAAgB;IAEhB,MAAM,KAAK,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC;IAErC,eAAe;IACf,IAAI,4DAA4D,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;QAC7E,OAAO,aAAa,CAAC;IACvB,CAAC;IAED,WAAW;IACX,IAAI,sEAAsE,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;QACvF,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,WAAW;IACX,IAAI,2CAA2C,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;QAC5D,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,qBAAqB;IACrB,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,YAAY,CAAC,QAAgB,EAAE,IAAY;IAClD,IAAI,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,aAAa,EAAE,CAAC;QACjD,OAAO,KAAK,CAAC;IACf,CAAC;IAED,4BAA4B;IAC5B,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,yDAAyD;IACzD,IAAI,eAAe,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;QACnC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,SAAS,wBAAwB,CAAC,QAAgB;IAChD,IAAI,IAAI,GAAG,GAAG,CAAC,CAAC,YAAY;IAE5B,2CAA2C;IAC3C,IAAI,mBAAmB,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;QACvC,IAAI,IAAI,GAAG,CAAC;IACd,CAAC;IAED,gCAAgC;IAChC,IAAI,+GAA+G,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;QACnI,IAAI,IAAI,IAAI,CAAC;IACf,CAAC;IAED,uCAAuC;IACvC,MAAM,WAAW,GAAG,QAAQ,CAAC,KAAK,CAAC,kBAAkB,CAAC,IAAI,EAAE,CAAC;IAC7D,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC3B,IAAI,IAAI,GAAG,CAAC;IACd,CAAC;IAED,sCAAsC;IACtC,IAAI,yDAAyD,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC7E,IAAI,IAAI,IAAI,CAAC;IACf,CAAC;IAED,qCAAqC;IACrC,IAAI,oDAAoD,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;QACxE,IAAI,IAAI,GAAG,CAAC;IACd,CAAC;IAED,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC;AACxC,CAAC;AAED;;GAEG;AACH,SAAS,oBAAoB,CAAC,QAAgB,EAAE,IAAY;IAC1D,IAAI,IAAI,KAAK,SAAS;QAAE,OAAO,KAAK,CAAC;IAErC,uBAAuB;IACvB,IAAI,oDAAoD,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;QACxE,OAAO,KAAK,CAAC;IACf,CAAC;IAED,kDAAkD;IAClD,MAAM,YAAY,GAAG,uGAAuG,CAAC;IAE7H,OAAO,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;AACrC,CAAC;AAED;;GAEG;AACH,SAAS,kBAAkB,CAAC,QAAgB;IAC1C,MAAM,aAAa,GAAG;QACpB,qDAAqD;QACrD,gCAAgC;QAChC,4BAA4B;QAC5B,wCAAwC;KACzC,CAAC;IAEF,OAAO,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC;AACnD,CAAC;AAED;;GAEG;AACH,SAAS,wBAAwB,CAAC,cAKjC;IACC,6CAA6C;IAC7C,MAAM,cAAc,GAAG,IAAI,CAAC;IAE5B,qCAAqC;IACrC,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,IAAI,cAAc,CAAC,iBAAiB,GAAG,GAAG,IAAI,cAAc,CAAC,iBAAiB,GAAG,GAAG,EAAE,CAAC;QACrF,UAAU,IAAI,GAAG,CAAC,CAAC,eAAe;IACpC,CAAC;IAED,IAAI,cAAc,CAAC,eAAe,EAAE,CAAC;QACnC,UAAU,IAAI,IAAI,CAAC;IACrB,CAAC;IAED,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,cAAc,GAAG,UAAU,CAAC,CAAC;IAC1D,MAAM,MAAM,GAAG,IAAI,CAAC;IAEpB,OAAO;QACL,KAAK;QACL,QAAQ,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,GAAG,MAAM,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,GAAG,MAAM,CAAC,CAAC;QACpE,MAAM,EAAE,WAAW;QACnB,OAAO,EAAE;YACP,eAAe,EAAE,KAAK;YACtB,cAAc,EAAE,GAAG;SACpB;KACF,CAAC;AACJ,CAAC","sourcesContent":["/**\n * Claim Extractor\n * \n * Extracts claims from text for risk analysis.\n * \n * @module engines/hallucination/claim-extractor\n * @author Haiec\n * @license MIT\n */\n\nimport { Claim, ConfidenceScore } from '../../types/results';\nimport { splitSentences } from '../../utils/text';\n\n/**\n * Extract claims from text\n */\nexport function extractClaims(text: string): Claim[] {\n  const sentences = splitSentences(text);\n  const claims: Claim[] = [];\n  \n  let currentIndex = 0;\n  \n  for (const sentence of sentences) {\n    const startIndex = text.indexOf(sentence, currentIndex);\n    const endIndex = startIndex + sentence.length;\n    \n    const claim = analyzeSentence(sentence, startIndex, endIndex);\n    if (claim) {\n      claims.push(claim);\n    }\n    \n    currentIndex = endIndex;\n  }\n  \n  return claims;\n}\n\n/**\n * Analyze a sentence to determine if it's a claim\n */\nfunction analyzeSentence(\n  sentence: string, \n  start: number, \n  end: number\n): Claim | null {\n  const trimmed = sentence.trim();\n  if (trimmed.length < 10) return null;\n  \n  const type = classifySentence(trimmed);\n  const verifiable = isVerifiable(trimmed, type);\n  \n  const riskIndicators = {\n    lackOfSpecificity: calculateSpecificityRisk(trimmed),\n    missingCitation: checkMissingCitation(trimmed, type),\n    vagueLanguage: checkVagueLanguage(trimmed),\n    contradictionSignal: false // Set during consistency check\n  };\n  \n  const confidence = calculateClaimConfidence(riskIndicators);\n  \n  return {\n    text: trimmed,\n    span: [start, end],\n    type,\n    verifiable,\n    riskIndicators,\n    confidence,\n    limitations: [\n      'Pattern-based claim extraction',\n      'May miss implicit claims',\n      'Context not fully analyzed'\n    ]\n  };\n}\n\n/**\n * Classify sentence type\n */\nfunction classifySentence(\n  sentence: string\n): 'factual' | 'opinion' | 'instruction' | 'metadata' {\n  const lower = sentence.toLowerCase();\n  \n  // Instructions\n  if (/^(please|you should|you must|do not|don't|always|never)\\b/i.test(lower)) {\n    return 'instruction';\n  }\n  \n  // Opinions\n  if (/\\b(i think|i believe|in my opinion|seems|appears|might|could|may)\\b/i.test(lower)) {\n    return 'opinion';\n  }\n  \n  // Metadata\n  if (/^(note:|warning:|important:|disclaimer:)/i.test(lower)) {\n    return 'metadata';\n  }\n  \n  // Default to factual\n  return 'factual';\n}\n\n/**\n * Check if claim is verifiable\n */\nfunction isVerifiable(sentence: string, type: string): boolean {\n  if (type === 'opinion' || type === 'instruction') {\n    return false;\n  }\n  \n  // Contains numbers or dates\n  if (/\\d+/.test(sentence)) {\n    return true;\n  }\n  \n  // Contains proper nouns (capitalized words not at start)\n  if (/\\s[A-Z][a-z]+/.test(sentence)) {\n    return true;\n  }\n  \n  return true;\n}\n\n/**\n * Calculate specificity risk (0-1, higher = more risky)\n */\nfunction calculateSpecificityRisk(sentence: string): number {\n  let risk = 0.5; // Base risk\n  \n  // Reduce risk if contains specific numbers\n  if (/\\b\\d+(\\.\\d+)?%?\\b/.test(sentence)) {\n    risk -= 0.2;\n  }\n  \n  // Reduce risk if contains dates\n  if (/\\b(19|20)\\d{2}\\b|\\b(january|february|march|april|may|june|july|august|september|october|november|december)\\b/i.test(sentence)) {\n    risk -= 0.15;\n  }\n  \n  // Reduce risk if contains proper nouns\n  const properNouns = sentence.match(/\\b[A-Z][a-z]+\\b/g) || [];\n  if (properNouns.length > 1) {\n    risk -= 0.1;\n  }\n  \n  // Increase risk for vague quantifiers\n  if (/\\b(many|some|few|several|various|numerous|countless)\\b/i.test(sentence)) {\n    risk += 0.15;\n  }\n  \n  // Increase risk for hedging language\n  if (/\\b(generally|usually|often|sometimes|typically)\\b/i.test(sentence)) {\n    risk += 0.1;\n  }\n  \n  return Math.max(0, Math.min(1, risk));\n}\n\n/**\n * Check for missing citation\n */\nfunction checkMissingCitation(sentence: string, type: string): boolean {\n  if (type !== 'factual') return false;\n  \n  // Has citation markers\n  if (/\\[\\d+\\]|\\(\\d{4}\\)|according to|source:|cited from/i.test(sentence)) {\n    return false;\n  }\n  \n  // Contains strong factual claims without citation\n  const strongClaims = /\\b(studies show|research indicates|scientists found|data shows|evidence suggests|proven|confirmed)\\b/i;\n  \n  return strongClaims.test(sentence);\n}\n\n/**\n * Check for vague language\n */\nfunction checkVagueLanguage(sentence: string): boolean {\n  const vaguePatterns = [\n    /\\b(somehow|something|somewhere|someone|somewhat)\\b/i,\n    /\\b(kind of|sort of|type of)\\b/i,\n    /\\b(stuff|things|etc\\.?)\\b/i,\n    /\\b(basically|essentially|virtually)\\b/i\n  ];\n  \n  return vaguePatterns.some(p => p.test(sentence));\n}\n\n/**\n * Calculate confidence score for claim analysis\n */\nfunction calculateClaimConfidence(riskIndicators: {\n  lackOfSpecificity: number;\n  missingCitation: boolean;\n  vagueLanguage: boolean;\n  contradictionSignal: boolean;\n}): ConfidenceScore {\n  // Base confidence for pattern-based analysis\n  const baseConfidence = 0.65;\n  \n  // Adjust based on clarity of signals\n  let adjustment = 0;\n  \n  if (riskIndicators.lackOfSpecificity > 0.7 || riskIndicators.lackOfSpecificity < 0.3) {\n    adjustment += 0.1; // Clear signal\n  }\n  \n  if (riskIndicators.missingCitation) {\n    adjustment += 0.05;\n  }\n  \n  const value = Math.min(0.85, baseConfidence + adjustment);\n  const margin = 0.15;\n  \n  return {\n    value,\n    interval: [Math.max(0, value - margin), Math.min(1, value + margin)],\n    method: 'heuristic',\n    factors: {\n      patternStrength: value,\n      contextClarity: 0.6\n    }\n  };\n}\n"]}