UNPKG

llmverify

Version:

AI Output Verification Toolkit — Local-first LLM safety, hallucination detection, PII redaction, prompt injection defense, and runtime monitoring. Zero telemetry. OWASP LLM Top 10 aligned.

107 lines 12.9 kB
"use strict"; /** * Structured List Test * * Tests if the LLM can generate properly structured list output. * Validates format compliance and structural consistency. * * WHAT THIS TESTS: * ✅ List formatting capability * ✅ Instruction following for structure * ✅ Consistent formatting patterns * * LIMITATIONS: * - Format variations may be valid but flagged * - Does not validate list content accuracy * - May be too strict for creative responses * * @module sentinel/structuredListTest * @author Haiec * @license MIT */ Object.defineProperty(exports, "__esModule", { value: true }); exports.structuredListTest = structuredListTest; const LIMITATIONS = [ 'Format variations may be valid but flagged as failures', 'Does not validate content accuracy', 'May be too strict for creative or conversational models', 'Different models have different formatting conventions' ]; const TEST_QUERY = 'List exactly 3 colors. Format as a numbered list (1. 2. 3.)'; /** * Tests if the LLM can generate a properly structured list. * * @param config - Sentinel configuration with LLM client * @returns Test result with structure analysis * * @example * const result = await structuredListTest({ * client: myLLMClient, * model: 'gpt-4' * }); * * if (result.passed) { * console.log('LLM can generate structured lists'); * } */ async function structuredListTest(config) { try { const response = await config.client.generate({ prompt: TEST_QUERY, model: config.model }); const text = response.text.trim(); // Check for numbered list patterns const numberedPattern = /^\s*[1-3][.)]\s*.+$/gm; const numberedMatches = text.match(numberedPattern) || []; // Check for bullet list patterns (alternative valid format) const bulletPattern = /^\s*[-*•]\s*.+$/gm; const bulletMatches = text.match(bulletPattern) || []; // Check for line-separated items const lines = text.split('\n').filter(l => l.trim().length > 0); // Determine list type and count const hasNumberedList = numberedMatches.length >= 3; const hasBulletList = bulletMatches.length >= 3; const hasLineList = lines.length >= 3; // Check for color words const colorWords = ['red', 'blue', 'green', 'yellow', 'orange', 'purple', 'pink', 'black', 'white', 'brown', 'gray', 'grey', 'cyan', 'magenta']; const foundColors = colorWords.filter(c => text.toLowerCase().includes(c)); // Pass if we have a list structure with at least 3 items and some colors const hasValidStructure = hasNumberedList || hasBulletList || hasLineList; const hasColors = foundColors.length >= 2; const passed = hasValidStructure && hasColors; return { test: 'structuredListTest', passed, message: passed ? 'LLM generated a properly structured list' : `List structure issues: ${!hasValidStructure ? 'No valid list format detected' : 'Missing expected content'}`, details: { query: TEST_QUERY, response: text.substring(0, 300), numberedItems: numberedMatches.length, bulletItems: bulletMatches.length, lineItems: lines.length, foundColors, hasValidStructure, listType: hasNumberedList ? 'numbered' : hasBulletList ? 'bullet' : hasLineList ? 'line' : 'none' }, confidence: hasValidStructure ? 0.85 : 0.6, limitations: LIMITATIONS }; } catch (error) { return { test: 'structuredListTest', passed: false, message: `Test failed with error: ${error instanceof Error ? error.message : 'Unknown error'}`, details: { error: error instanceof Error ? error.message : 'Unknown error' }, confidence: 0.5, limitations: [...LIMITATIONS, 'Test failed due to error'] }; } } //# sourceMappingURL=data:application/json;base64,{"version":3,"file":"structuredListTest.js","sourceRoot":"","sources":["../../src/sentinel/structuredListTest.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;GAmBG;;AA6BH,gDAkEC;AA3FD,MAAM,WAAW,GAAG;IAClB,wDAAwD;IACxD,oCAAoC;IACpC,yDAAyD;IACzD,wDAAwD;CACzD,CAAC;AAEF,MAAM,UAAU,GAAG,6DAA6D,CAAC;AAEjF;;;;;;;;;;;;;;;GAeG;AACI,KAAK,UAAU,kBAAkB,CAAC,MAAsB;IAC7D,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC;YAC5C,MAAM,EAAE,UAAU;YAClB,KAAK,EAAE,MAAM,CAAC,KAAK;SACpB,CAAC,CAAC;QAEH,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QAElC,mCAAmC;QACnC,MAAM,eAAe,GAAG,uBAAuB,CAAC;QAChD,MAAM,eAAe,GAAG,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;QAE1D,4DAA4D;QAC5D,MAAM,aAAa,GAAG,mBAAmB,CAAC;QAC1C,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,CAAC,IAAI,EAAE,CAAC;QAEtD,iCAAiC;QACjC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAEhE,gCAAgC;QAChC,MAAM,eAAe,GAAG,eAAe,CAAC,MAAM,IAAI,CAAC,CAAC;QACpD,MAAM,aAAa,GAAG,aAAa,CAAC,MAAM,IAAI,CAAC,CAAC;QAChD,MAAM,WAAW,GAAG,KAAK,CAAC,MAAM,IAAI,CAAC,CAAC;QAEtC,wBAAwB;QACxB,MAAM,UAAU,GAAG,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM;YAC5D,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;QAClF,MAAM,WAAW,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;QAE3E,yEAAyE;QACzE,MAAM,iBAAiB,GAAG,eAAe,IAAI,aAAa,IAAI,WAAW,CAAC;QAC1E,MAAM,SAAS,GAAG,WAAW,CAAC,MAAM,IAAI,CAAC,CAAC;QAC1C,MAAM,MAAM,GAAG,iBAAiB,IAAI,SAAS,CAAC;QAE9C,OAAO;YACL,IAAI,EAAE,oBAAoB;YAC1B,MAAM;YACN,OAAO,EAAE,MAAM;gBACb,CAAC,CAAC,0CAA0C;gBAC5C,CAAC,CAAC,0BAA0B,CAAC,iBAAiB,CAAC,CAAC,CAAC,+BAA+B,CAAC,CAAC,CAAC,0BAA0B,EAAE;YACjH,OAAO,EAAE;gBACP,KAAK,EAAE,UAAU;gBACjB,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC;gBAChC,aAAa,EAAE,eAAe,CAAC,MAAM;gBACrC,WAAW,EAAE,aAAa,CAAC,MAAM;gBACjC,SAAS,EAAE,KAAK,CAAC,MAAM;gBACvB,WAAW;gBACX,iBAAiB;gBACjB,QAAQ,EAAE,eAAe,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM;aAClG;YACD,UAAU,EAAE,iBAAiB,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG;YAC1C,WAAW,EAAE,WAAW;SACzB,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO;YACL,IAAI,EAAE,oBAAoB;YAC1B,MAAM,EAAE,KAAK;YACb,OAAO,EAAE,2BAA2B,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE;YAC9F,OAAO,EAAE;gBACP,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe;aAChE;YACD,UAAU,EAAE,GAAG;YACf,WAAW,EAAE,CAAC,GAAG,WAAW,EAAE,0BAA0B,CAAC;SAC1D,CAAC;IACJ,CAAC;AACH,CAAC","sourcesContent":["/**\n * Structured List Test\n * \n * Tests if the LLM can generate properly structured list output.\n * Validates format compliance and structural consistency.\n * \n * WHAT THIS TESTS:\n * ✅ List formatting capability\n * ✅ Instruction following for structure\n * ✅ Consistent formatting patterns\n * \n * LIMITATIONS:\n * - Format variations may be valid but flagged\n * - Does not validate list content accuracy\n * - May be too strict for creative responses\n * \n * @module sentinel/structuredListTest\n * @author Haiec\n * @license MIT\n */\n\nimport { SentinelTestResult, SentinelConfig } from '../types/runtime';\n\nconst LIMITATIONS = [\n  'Format variations may be valid but flagged as failures',\n  'Does not validate content accuracy',\n  'May be too strict for creative or conversational models',\n  'Different models have different formatting conventions'\n];\n\nconst TEST_QUERY = 'List exactly 3 colors. Format as a numbered list (1. 2. 3.)';\n\n/**\n * Tests if the LLM can generate a properly structured list.\n * \n * @param config - Sentinel configuration with LLM client\n * @returns Test result with structure analysis\n * \n * @example\n * const result = await structuredListTest({\n *   client: myLLMClient,\n *   model: 'gpt-4'\n * });\n * \n * if (result.passed) {\n *   console.log('LLM can generate structured lists');\n * }\n */\nexport async function structuredListTest(config: SentinelConfig): Promise<SentinelTestResult> {\n  try {\n    const response = await config.client.generate({\n      prompt: TEST_QUERY,\n      model: config.model\n    });\n\n    const text = response.text.trim();\n\n    // Check for numbered list patterns\n    const numberedPattern = /^\\s*[1-3][.)]\\s*.+$/gm;\n    const numberedMatches = text.match(numberedPattern) || [];\n\n    // Check for bullet list patterns (alternative valid format)\n    const bulletPattern = /^\\s*[-*•]\\s*.+$/gm;\n    const bulletMatches = text.match(bulletPattern) || [];\n\n    // Check for line-separated items\n    const lines = text.split('\\n').filter(l => l.trim().length > 0);\n\n    // Determine list type and count\n    const hasNumberedList = numberedMatches.length >= 3;\n    const hasBulletList = bulletMatches.length >= 3;\n    const hasLineList = lines.length >= 3;\n\n    // Check for color words\n    const colorWords = ['red', 'blue', 'green', 'yellow', 'orange', 'purple', 'pink', \n                        'black', 'white', 'brown', 'gray', 'grey', 'cyan', 'magenta'];\n    const foundColors = colorWords.filter(c => text.toLowerCase().includes(c));\n\n    // Pass if we have a list structure with at least 3 items and some colors\n    const hasValidStructure = hasNumberedList || hasBulletList || hasLineList;\n    const hasColors = foundColors.length >= 2;\n    const passed = hasValidStructure && hasColors;\n\n    return {\n      test: 'structuredListTest',\n      passed,\n      message: passed\n        ? 'LLM generated a properly structured list'\n        : `List structure issues: ${!hasValidStructure ? 'No valid list format detected' : 'Missing expected content'}`,\n      details: {\n        query: TEST_QUERY,\n        response: text.substring(0, 300),\n        numberedItems: numberedMatches.length,\n        bulletItems: bulletMatches.length,\n        lineItems: lines.length,\n        foundColors,\n        hasValidStructure,\n        listType: hasNumberedList ? 'numbered' : hasBulletList ? 'bullet' : hasLineList ? 'line' : 'none'\n      },\n      confidence: hasValidStructure ? 0.85 : 0.6,\n      limitations: LIMITATIONS\n    };\n  } catch (error) {\n    return {\n      test: 'structuredListTest',\n      passed: false,\n      message: `Test failed with error: ${error instanceof Error ? error.message : 'Unknown error'}`,\n      details: {\n        error: error instanceof Error ? error.message : 'Unknown error'\n      },\n      confidence: 0.5,\n      limitations: [...LIMITATIONS, 'Test failed due to error']\n    };\n  }\n}\n"]}