UNPKG

llmverify

Version:

AI Output Verification Toolkit — Local-first LLM safety, hallucination detection, PII redaction, prompt injection defense, and runtime monitoring. Zero telemetry. OWASP LLM Top 10 aligned.

154 lines 16.5 kB
"use strict"; /** * Classification Engine Utilities * * Shared helper functions for classification. * * @module engines/classification/utils * @author Haiec * @license MIT */ Object.defineProperty(exports, "__esModule", { value: true }); exports.STOPWORDS = void 0; exports.clamp = clamp; exports.computeEntropy = computeEntropy; exports.tokenize = tokenize; exports.countSentences = countSentences; exports.countBullets = countBullets; exports.extractCapitalizedTokens = extractCapitalizedTokens; exports.computeWordFrequency = computeWordFrequency; exports.getMaxWordFrequency = getMaxWordFrequency; exports.containsAny = containsAny; exports.countMatches = countMatches; /** * Clamps a value between min and max. */ function clamp(value, min, max) { return Math.min(max, Math.max(min, value)); } /** * Computes Shannon entropy of text. */ function computeEntropy(text) { if (!text || text.length === 0) return 0; const freq = {}; for (const ch of text) { freq[ch] = (freq[ch] || 0) + 1; } const len = text.length; let entropy = 0; for (const ch in freq) { const p = freq[ch] / len; entropy -= p * Math.log2(p); } return entropy; } /** * Tokenizes text into words. */ function tokenize(text) { return text.split(/\s+/).filter(w => w.length > 0); } /** * Counts sentences in text. */ function countSentences(text) { const sentences = text.split(/[.!?]+/).filter(s => s.trim().length > 0); return sentences.length; } /** * Counts bullet points/list items in text. */ function countBullets(text) { const patterns = [ /^[-*•]\s+/gm, // Bullet points /^\d+[.)]\s+/gm, // Numbered lists /^[a-z][.)]\s+/gim // Lettered lists ]; let count = 0; for (const pattern of patterns) { const matches = text.match(pattern); if (matches) count += matches.length; } return count; } /** * Common English stopwords. */ exports.STOPWORDS = new Set([ 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'from', 'as', 'is', 'was', 'are', 'were', 'been', 'be', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'shall', 'can', 'need', 'dare', 'ought', 'used', 'it', 'its', 'this', 'that', 'these', 'those', 'i', 'you', 'he', 'she', 'we', 'they', 'what', 'which', 'who', 'whom', 'when', 'where', 'why', 'how', 'all', 'each', 'every', 'both', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 'just', 'also', 'now', 'here', 'there', 'then' ]); /** * Extracts capitalized tokens (potential entities). */ function extractCapitalizedTokens(text) { const tokens = new Set(); const words = text.split(/\s+/); for (let i = 0; i < words.length; i++) { const word = words[i].replace(/[^a-zA-Z]/g, ''); if (word.length > 1 && /^[A-Z]/.test(word)) { // Skip if at sentence start (after . ! ?) if (i > 0) { const prev = words[i - 1]; if (!/[.!?]$/.test(prev)) { const lower = word.toLowerCase(); if (!exports.STOPWORDS.has(lower)) { tokens.add(word); } } } } } return tokens; } /** * Computes word frequency excluding stopwords. */ function computeWordFrequency(text) { const freq = new Map(); const words = tokenize(text.toLowerCase()); for (const word of words) { const clean = word.replace(/[^a-z]/g, ''); if (clean.length > 2 && !exports.STOPWORDS.has(clean)) { freq.set(clean, (freq.get(clean) || 0) + 1); } } return freq; } /** * Gets the most frequent word count. */ function getMaxWordFrequency(text) { const freq = computeWordFrequency(text); let max = 0; for (const count of freq.values()) { if (count > max) max = count; } return max; } /** * Checks if text contains any of the patterns (case-insensitive). */ function containsAny(text, patterns) { const lower = text.toLowerCase(); return patterns.some(p => lower.includes(p.toLowerCase())); } /** * Counts how many patterns are found in text. */ function countMatches(text, patterns) { const lower = text.toLowerCase(); return patterns.filter(p => lower.includes(p.toLowerCase())).length; } //# sourceMappingURL=data:application/json;base64,{"version":3,"file":"utils.js","sourceRoot":"","sources":["../../../src/engines/classification/utils.ts"],"names":[],"mappings":";AAAA;;;;;;;;GAQG;;;AAKH,sBAEC;AAKD,wCAgBC;AAKD,4BAEC;AAKD,wCAGC;AAKD,oCAcC;AAoBD,4DAqBC;AAKD,oDAYC;AAKD,kDAOC;AAKD,kCAGC;AAKD,oCAGC;AAlJD;;GAEG;AACH,SAAgB,KAAK,CAAC,KAAa,EAAE,GAAW,EAAE,GAAW;IAC3D,OAAO,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC;AAC7C,CAAC;AAED;;GAEG;AACH,SAAgB,cAAc,CAAC,IAAY;IACzC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAEzC,MAAM,IAAI,GAA2B,EAAE,CAAC;IACxC,KAAK,MAAM,EAAE,IAAI,IAAI,EAAE,CAAC;QACtB,IAAI,CAAC,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;IACjC,CAAC;IAED,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC;IACxB,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,KAAK,MAAM,EAAE,IAAI,IAAI,EAAE,CAAC;QACtB,MAAM,CAAC,GAAG,IAAI,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC;QACzB,OAAO,IAAI,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC9B,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,SAAgB,QAAQ,CAAC,IAAY;IACnC,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACrD,CAAC;AAED;;GAEG;AACH,SAAgB,cAAc,CAAC,IAAY;IACzC,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACxE,OAAO,SAAS,CAAC,MAAM,CAAC;AAC1B,CAAC;AAED;;GAEG;AACH,SAAgB,YAAY,CAAC,IAAY;IACvC,MAAM,QAAQ,GAAG;QACf,aAAa,EAAY,gBAAgB;QACzC,eAAe,EAAU,iBAAiB;QAC1C,kBAAkB,CAAO,iBAAiB;KAC3C,CAAC;IAEF,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QACpC,IAAI,OAAO;YAAE,KAAK,IAAI,OAAO,CAAC,MAAM,CAAC;IACvC,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACU,QAAA,SAAS,GAAG,IAAI,GAAG,CAAC;IAC/B,KAAK,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK;IACnE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM;IACpE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO;IACzE,QAAQ,EAAE,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO;IACzE,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,GAAG,EAAE,KAAK,EAAE,IAAI;IACvE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO;IACpE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM;IACnE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI;IACxE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM;CACtE,CAAC,CAAC;AAEH;;GAEG;AACH,SAAgB,wBAAwB,CAAC,IAAY;IACnD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAU,CAAC;IACjC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAEhC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC;QAChD,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC3C,0CAA0C;YAC1C,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBACV,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;gBAC1B,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;oBACzB,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;oBACjC,IAAI,CAAC,iBAAS,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;wBAC1B,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;oBACnB,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAgB,oBAAoB,CAAC,IAAY;IAC/C,MAAM,IAAI,GAAG,IAAI,GAAG,EAAkB,CAAC;IACvC,MAAM,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;IAE3C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QAC1C,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,iBAAS,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;YAC9C,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAC9C,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,SAAgB,mBAAmB,CAAC,IAAY;IAC9C,MAAM,IAAI,GAAG,oBAAoB,CAAC,IAAI,CAAC,CAAC;IACxC,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC;QAClC,IAAI,KAAK,GAAG,GAAG;YAAE,GAAG,GAAG,KAAK,CAAC;IAC/B,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;GAEG;AACH,SAAgB,WAAW,CAAC,IAAY,EAAE,QAAkB;IAC1D,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;IACjC,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;AAC7D,CAAC;AAED;;GAEG;AACH,SAAgB,YAAY,CAAC,IAAY,EAAE,QAAkB;IAC3D,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;IACjC,OAAO,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC;AACtE,CAAC","sourcesContent":["/**\n * Classification Engine Utilities\n * \n * Shared helper functions for classification.\n * \n * @module engines/classification/utils\n * @author Haiec\n * @license MIT\n */\n\n/**\n * Clamps a value between min and max.\n */\nexport function clamp(value: number, min: number, max: number): number {\n  return Math.min(max, Math.max(min, value));\n}\n\n/**\n * Computes Shannon entropy of text.\n */\nexport function computeEntropy(text: string): number {\n  if (!text || text.length === 0) return 0;\n  \n  const freq: Record<string, number> = {};\n  for (const ch of text) {\n    freq[ch] = (freq[ch] || 0) + 1;\n  }\n  \n  const len = text.length;\n  let entropy = 0;\n  for (const ch in freq) {\n    const p = freq[ch] / len;\n    entropy -= p * Math.log2(p);\n  }\n  \n  return entropy;\n}\n\n/**\n * Tokenizes text into words.\n */\nexport function tokenize(text: string): string[] {\n  return text.split(/\\s+/).filter(w => w.length > 0);\n}\n\n/**\n * Counts sentences in text.\n */\nexport function countSentences(text: string): number {\n  const sentences = text.split(/[.!?]+/).filter(s => s.trim().length > 0);\n  return sentences.length;\n}\n\n/**\n * Counts bullet points/list items in text.\n */\nexport function countBullets(text: string): number {\n  const patterns = [\n    /^[-*•]\\s+/gm,           // Bullet points\n    /^\\d+[.)]\\s+/gm,         // Numbered lists\n    /^[a-z][.)]\\s+/gim       // Lettered lists\n  ];\n  \n  let count = 0;\n  for (const pattern of patterns) {\n    const matches = text.match(pattern);\n    if (matches) count += matches.length;\n  }\n  \n  return count;\n}\n\n/**\n * Common English stopwords.\n */\nexport const STOPWORDS = new Set([\n  'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',\n  'of', 'with', 'by', 'from', 'as', 'is', 'was', 'are', 'were', 'been',\n  'be', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',\n  'should', 'may', 'might', 'must', 'shall', 'can', 'need', 'dare', 'ought',\n  'used', 'it', 'its', 'this', 'that', 'these', 'those', 'i', 'you', 'he',\n  'she', 'we', 'they', 'what', 'which', 'who', 'whom', 'when', 'where',\n  'why', 'how', 'all', 'each', 'every', 'both', 'few', 'more', 'most',\n  'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so',\n  'than', 'too', 'very', 'just', 'also', 'now', 'here', 'there', 'then'\n]);\n\n/**\n * Extracts capitalized tokens (potential entities).\n */\nexport function extractCapitalizedTokens(text: string): Set<string> {\n  const tokens = new Set<string>();\n  const words = text.split(/\\s+/);\n  \n  for (let i = 0; i < words.length; i++) {\n    const word = words[i].replace(/[^a-zA-Z]/g, '');\n    if (word.length > 1 && /^[A-Z]/.test(word)) {\n      // Skip if at sentence start (after . ! ?)\n      if (i > 0) {\n        const prev = words[i - 1];\n        if (!/[.!?]$/.test(prev)) {\n          const lower = word.toLowerCase();\n          if (!STOPWORDS.has(lower)) {\n            tokens.add(word);\n          }\n        }\n      }\n    }\n  }\n  \n  return tokens;\n}\n\n/**\n * Computes word frequency excluding stopwords.\n */\nexport function computeWordFrequency(text: string): Map<string, number> {\n  const freq = new Map<string, number>();\n  const words = tokenize(text.toLowerCase());\n  \n  for (const word of words) {\n    const clean = word.replace(/[^a-z]/g, '');\n    if (clean.length > 2 && !STOPWORDS.has(clean)) {\n      freq.set(clean, (freq.get(clean) || 0) + 1);\n    }\n  }\n  \n  return freq;\n}\n\n/**\n * Gets the most frequent word count.\n */\nexport function getMaxWordFrequency(text: string): number {\n  const freq = computeWordFrequency(text);\n  let max = 0;\n  for (const count of freq.values()) {\n    if (count > max) max = count;\n  }\n  return max;\n}\n\n/**\n * Checks if text contains any of the patterns (case-insensitive).\n */\nexport function containsAny(text: string, patterns: string[]): boolean {\n  const lower = text.toLowerCase();\n  return patterns.some(p => lower.includes(p.toLowerCase()));\n}\n\n/**\n * Counts how many patterns are found in text.\n */\nexport function countMatches(text: string, patterns: string[]): number {\n  const lower = text.toLowerCase();\n  return patterns.filter(p => lower.includes(p.toLowerCase())).length;\n}\n"]}