UNPKG

llmverify

Version:

AI Output Verification Toolkit — Local-first LLM safety, hallucination detection, PII redaction, prompt injection defense, and runtime monitoring. Zero telemetry. OWASP LLM Top 10 aligned.

107 lines 12.9 kB
"use strict"; /** * Structured List Test * * Tests if the LLM can generate properly structured list output. * Validates format compliance and structural consistency. * * WHAT THIS TESTS: * ✅ List formatting capability * ✅ Instruction following for structure * ✅ Consistent formatting patterns * * LIMITATIONS: * - Format variations may be valid but flagged * - Does not validate list content accuracy * - May be too strict for creative responses * * @module sentinel/structuredListTest * @author Haiec * @license MIT */ Object.defineProperty(exports, "__esModule", { value: true }); exports.structuredListTest = structuredListTest; const LIMITATIONS = [ 'Format variations may be valid but flagged as failures', 'Does not validate content accuracy', 'May be too strict for creative or conversational models', 'Different models have different formatting conventions' ]; const TEST_QUERY = 'List exactly 3 colors. Format as a numbered list (1. 2. 3.)'; /** * Tests if the LLM can generate a properly structured list. * * @param config - Sentinel configuration with LLM client * @returns Test result with structure analysis * * @example * const result = await structuredListTest({ * client: myLLMClient, * model: 'gpt-4' * }); * * if (result.passed) { * console.log('LLM can generate structured lists'); * } */ async function structuredListTest(config) { try { const response = await config.client.generate({ prompt: TEST_QUERY, model: config.model }); const text = response.text.trim(); // Check for numbered list patterns const numberedPattern = /^\s*[1-3][.)]\s*.+$/gm; const numberedMatches = text.match(numberedPattern) || []; // Check for bullet list patterns (alternative valid format) const bulletPattern = /^\s*[-*•]\s*.+$/gm; const bulletMatches = text.match(bulletPattern) || []; // Check for line-separated items const lines = text.split('\n').filter(l => l.trim().length > 0); // Determine list type and count const hasNumberedList = numberedMatches.length >= 3; const hasBulletList = bulletMatches.length >= 3; const hasLineList = lines.length >= 3; // Check for color words const colorWords = ['red', 'blue', 'green', 'yellow', 'orange', 'purple', 'pink', 'black', 'white', 'brown', 'gray', 'grey', 'cyan', 'magenta']; const foundColors = colorWords.filter(c => text.toLowerCase().includes(c)); // Pass if we have a list structure with at least 3 items and some colors const hasValidStructure = hasNumberedList || hasBulletList || hasLineList; const hasColors = foundColors.length >= 2; const passed = hasValidStructure && hasColors; return { test: 'structuredListTest', passed, message: passed ? 'LLM generated a properly structured list' : `List structure issues: ${!hasValidStructure ? 'No valid list format detected' : 'Missing expected content'}`, details: { query: TEST_QUERY, response: text.substring(0, 300), numberedItems: numberedMatches.length, bulletItems: bulletMatches.length, lineItems: lines.length, foundColors, hasValidStructure, listType: hasNumberedList ? 'numbered' : hasBulletList ? 'bullet' : hasLineList ? 'line' : 'none' }, confidence: hasValidStructure ? 0.85 : 0.6, limitations: LIMITATIONS }; } catch (error) { return { test: 'structuredListTest', passed: false, message: `Test failed with error: ${error instanceof Error ? error.message : 'Unknown error'}`, details: { error: error instanceof Error ? error.message : 'Unknown error' }, confidence: 0.5, limitations: [...LIMITATIONS, 'Test failed due to error'] }; } } //# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoic3RydWN0dXJlZExpc3RUZXN0LmpzIiwic291cmNlUm9vdCI6IiIsInNvdXJjZXMiOlsiLi4vLi4vc3JjL3NlbnRpbmVsL3N0cnVjdHVyZWRMaXN0VGVzdC50cyJdLCJuYW1lcyI6W10sIm1hcHBpbmdzIjoiO0FBQUE7Ozs7Ozs7Ozs7Ozs7Ozs7Ozs7R0FtQkc7O0FBNkJILGdEQWtFQztBQTNGRCxNQUFNLFdBQVcsR0FBRztJQUNsQix3REFBd0Q7SUFDeEQsb0NBQW9DO0lBQ3BDLHlEQUF5RDtJQUN6RCx3REFBd0Q7Q0FDekQsQ0FBQztBQUVGLE1BQU0sVUFBVSxHQUFHLDZEQUE2RCxDQUFDO0FBRWpGOzs7Ozs7Ozs7Ozs7Ozs7R0FlRztBQUNJLEtBQUssVUFBVSxrQkFBa0IsQ0FBQyxNQUFzQjtJQUM3RCxJQUFJLENBQUM7UUFDSCxNQUFNLFFBQVEsR0FBRyxNQUFNLE1BQU0sQ0FBQyxNQUFNLENBQUMsUUFBUSxDQUFDO1lBQzVDLE1BQU0sRUFBRSxVQUFVO1lBQ2xCLEtBQUssRUFBRSxNQUFNLENBQUMsS0FBSztTQUNwQixDQUFDLENBQUM7UUFFSCxNQUFNLElBQUksR0FBRyxRQUFRLENBQUMsSUFBSSxDQUFDLElBQUksRUFBRSxDQUFDO1FBRWxDLG1DQUFtQztRQUNuQyxNQUFNLGVBQWUsR0FBRyx1QkFBdUIsQ0FBQztRQUNoRCxNQUFNLGVBQWUsR0FBRyxJQUFJLENBQUMsS0FBSyxDQUFDLGVBQWUsQ0FBQyxJQUFJLEVBQUUsQ0FBQztRQUUxRCw0REFBNEQ7UUFDNUQsTUFBTSxhQUFhLEdBQUcsbUJBQW1CLENBQUM7UUFDMUMsTUFBTSxhQUFhLEdBQUcsSUFBSSxDQUFDLEtBQUssQ0FBQyxhQUFhLENBQUMsSUFBSSxFQUFFLENBQUM7UUFFdEQsaUNBQWlDO1FBQ2pDLE1BQU0sS0FBSyxHQUFHLElBQUksQ0FBQyxLQUFLLENBQUMsSUFBSSxDQUFDLENBQUMsTUFBTSxDQUFDLENBQUMsQ0FBQyxFQUFFLENBQUMsQ0FBQyxDQUFDLElBQUksRUFBRSxDQUFDLE1BQU0sR0FBRyxDQUFDLENBQUMsQ0FBQztRQUVoRSxnQ0FBZ0M7UUFDaEMsTUFBTSxlQUFlLEdBQUcsZUFBZSxDQUFDLE1BQU0sSUFBSSxDQUFDLENBQUM7UUFDcEQsTUFBTSxhQUFhLEdBQUcsYUFBYSxDQUFDLE1BQU0sSUFBSSxDQUFDLENBQUM7UUFDaEQsTUFBTSxXQUFXLEdBQUcsS0FBSyxDQUFDLE1BQU0sSUFBSSxDQUFDLENBQUM7UUFFdEMsd0JBQXdCO1FBQ3hCLE1BQU0sVUFBVSxHQUFHLENBQUMsS0FBSyxFQUFFLE1BQU0sRUFBRSxPQUFPLEVBQUUsUUFBUSxFQUFFLFFBQVEsRUFBRSxRQUFRLEVBQUUsTUFBTTtZQUM1RCxPQUFPLEVBQUUsT0FBTyxFQUFFLE9BQU8sRUFBRSxNQUFNLEVBQUUsTUFBTSxFQUFFLE1BQU0sRUFBRSxTQUFTLENBQUMsQ0FBQztRQUNsRixNQUFNLFdBQVcsR0FBRyxVQUFVLENBQUMsTUFBTSxDQUFDLENBQUMsQ0FBQyxFQUFFLENBQUMsSUFBSSxDQUFDLFdBQVcsRUFBRSxDQUFDLFFBQVEsQ0FBQyxDQUFDLENBQUMsQ0FBQyxDQUFDO1FBRTNFLHlFQUF5RTtRQUN6RSxNQUFNLGlCQUFpQixHQUFHLGVBQWUsSUFBSSxhQUFhLElBQUksV0FBVyxDQUFDO1FBQzFFLE1BQU0sU0FBUyxHQUFHLFdBQVcsQ0FBQyxNQUFNLElBQUksQ0FBQyxDQUFDO1FBQzFDLE1BQU0sTUFBTSxHQUFHLGlCQUFpQixJQUFJLFNBQVMsQ0FBQztRQUU5QyxPQUFPO1lBQ0wsSUFBSSxFQUFFLG9CQUFvQjtZQUMxQixNQUFNO1lBQ04sT0FBTyxFQUFFLE1BQU07Z0JBQ2IsQ0FBQyxDQUFDLDBDQUEwQztnQkFDNUMsQ0FBQyxDQUFDLDBCQUEwQixDQUFDLGlCQUFpQixDQUFDLENBQUMsQ0FBQywrQkFBK0IsQ0FBQyxDQUFDLENBQUMsMEJBQTBCLEVBQUU7WUFDakgsT0FBTyxFQUFFO2dCQUNQLEtBQUssRUFBRSxVQUFVO2dCQUNqQixRQUFRLEVBQUUsSUFBSSxDQUFDLFNBQVMsQ0FBQyxDQUFDLEVBQUUsR0FBRyxDQUFDO2dCQUNoQyxhQUFhLEVBQUUsZUFBZSxDQUFDLE1BQU07Z0JBQ3JDLFdBQVcsRUFBRSxhQUFhLENBQUMsTUFBTTtnQkFDakMsU0FBUyxFQUFFLEtBQUssQ0FBQyxNQUFNO2dCQUN2QixXQUFXO2dCQUNYLGlCQUFpQjtnQkFDakIsUUFBUSxFQUFFLGVBQWUsQ0FBQyxDQUFDLENBQUMsVUFBVSxDQUFDLENBQUMsQ0FBQyxhQUFhLENBQUMsQ0FBQyxDQUFDLFFBQVEsQ0FBQyxDQUFDLENBQUMsV0FBVyxDQUFDLENBQUMsQ0FBQyxNQUFNLENBQUMsQ0FBQyxDQUFDLE1BQU07YUFDbEc7WUFDRCxVQUFVLEVBQUUsaUJBQWlCLENBQUMsQ0FBQyxDQUFDLElBQUksQ0FBQyxDQUFDLENBQUMsR0FBRztZQUMxQyxXQUFXLEVBQUUsV0FBVztTQUN6QixDQUFDO0lBQ0osQ0FBQztJQUFDLE9BQU8sS0FBSyxFQUFFLENBQUM7UUFDZixPQUFPO1lBQ0wsSUFBSSxFQUFFLG9CQUFvQjtZQUMxQixNQUFNLEVBQUUsS0FBSztZQUNiLE9BQU8sRUFBRSwyQkFBMkIsS0FBSyxZQUFZLEtBQUssQ0FBQyxDQUFDLENBQUMsS0FBSyxDQUFDLE9BQU8sQ0FBQyxDQUFDLENBQUMsZUFBZSxFQUFFO1lBQzlGLE9BQU8sRUFBRTtnQkFDUCxLQUFLLEVBQUUsS0FBSyxZQUFZLEtBQUssQ0FBQyxDQUFDLENBQUMsS0FBSyxDQUFDLE9BQU8sQ0FBQyxDQUFDLENBQUMsZUFBZTthQUNoRTtZQUNELFVBQVUsRUFBRSxHQUFHO1lBQ2YsV0FBVyxFQUFFLENBQUMsR0FBRyxXQUFXLEVBQUUsMEJBQTBCLENBQUM7U0FDMUQsQ0FBQztJQUNKLENBQUM7QUFDSCxDQUFDIiwic291cmNlc0NvbnRlbnQiOlsiLyoqXG4gKiBTdHJ1Y3R1cmVkIExpc3QgVGVzdFxuICogXG4gKiBUZXN0cyBpZiB0aGUgTExNIGNhbiBnZW5lcmF0ZSBwcm9wZXJseSBzdHJ1Y3R1cmVkIGxpc3Qgb3V0cHV0LlxuICogVmFsaWRhdGVzIGZvcm1hdCBjb21wbGlhbmNlIGFuZCBzdHJ1Y3R1cmFsIGNvbnNpc3RlbmN5LlxuICogXG4gKiBXSEFUIFRISVMgVEVTVFM6XG4gKiDinIUgTGlzdCBmb3JtYXR0aW5nIGNhcGFiaWxpdHlcbiAqIOKchSBJbnN0cnVjdGlvbiBmb2xsb3dpbmcgZm9yIHN0cnVjdHVyZVxuICog4pyFIENvbnNpc3RlbnQgZm9ybWF0dGluZyBwYXR0ZXJuc1xuICogXG4gKiBMSU1JVEFUSU9OUzpcbiAqIC0gRm9ybWF0IHZhcmlhdGlvbnMgbWF5IGJlIHZhbGlkIGJ1dCBmbGFnZ2VkXG4gKiAtIERvZXMgbm90IHZhbGlkYXRlIGxpc3QgY29udGVudCBhY2N1cmFjeVxuICogLSBNYXkgYmUgdG9vIHN0cmljdCBmb3IgY3JlYXRpdmUgcmVzcG9uc2VzXG4gKiBcbiAqIEBtb2R1bGUgc2VudGluZWwvc3RydWN0dXJlZExpc3RUZXN0XG4gKiBAYXV0aG9yIEhhaWVjXG4gKiBAbGljZW5zZSBNSVRcbiAqL1xuXG5pbXBvcnQgeyBTZW50aW5lbFRlc3RSZXN1bHQsIFNlbnRpbmVsQ29uZmlnIH0gZnJvbSAnLi4vdHlwZXMvcnVudGltZSc7XG5cbmNvbnN0IExJTUlUQVRJT05TID0gW1xuICAnRm9ybWF0IHZhcmlhdGlvbnMgbWF5IGJlIHZhbGlkIGJ1dCBmbGFnZ2VkIGFzIGZhaWx1cmVzJyxcbiAgJ0RvZXMgbm90IHZhbGlkYXRlIGNvbnRlbnQgYWNjdXJhY3knLFxuICAnTWF5IGJlIHRvbyBzdHJpY3QgZm9yIGNyZWF0aXZlIG9yIGNvbnZlcnNhdGlvbmFsIG1vZGVscycsXG4gICdEaWZmZXJlbnQgbW9kZWxzIGhhdmUgZGlmZmVyZW50IGZvcm1hdHRpbmcgY29udmVudGlvbnMnXG5dO1xuXG5jb25zdCBURVNUX1FVRVJZID0gJ0xpc3QgZXhhY3RseSAzIGNvbG9ycy4gRm9ybWF0IGFzIGEgbnVtYmVyZWQgbGlzdCAoMS4gMi4gMy4pJztcblxuLyoqXG4gKiBUZXN0cyBpZiB0aGUgTExNIGNhbiBnZW5lcmF0ZSBhIHByb3Blcmx5IHN0cnVjdHVyZWQgbGlzdC5cbiAqIFxuICogQHBhcmFtIGNvbmZpZyAtIFNlbnRpbmVsIGNvbmZpZ3VyYXRpb24gd2l0aCBMTE0gY2xpZW50XG4gKiBAcmV0dXJucyBUZXN0IHJlc3VsdCB3aXRoIHN0cnVjdHVyZSBhbmFseXNpc1xuICogXG4gKiBAZXhhbXBsZVxuICogY29uc3QgcmVzdWx0ID0gYXdhaXQgc3RydWN0dXJlZExpc3RUZXN0KHtcbiAqICAgY2xpZW50OiBteUxMTUNsaWVudCxcbiAqICAgbW9kZWw6ICdncHQtNCdcbiAqIH0pO1xuICogXG4gKiBpZiAocmVzdWx0LnBhc3NlZCkge1xuICogICBjb25zb2xlLmxvZygnTExNIGNhbiBnZW5lcmF0ZSBzdHJ1Y3R1cmVkIGxpc3RzJyk7XG4gKiB9XG4gKi9cbmV4cG9ydCBhc3luYyBmdW5jdGlvbiBzdHJ1Y3R1cmVkTGlzdFRlc3QoY29uZmlnOiBTZW50aW5lbENvbmZpZyk6IFByb21pc2U8U2VudGluZWxUZXN0UmVzdWx0PiB7XG4gIHRyeSB7XG4gICAgY29uc3QgcmVzcG9uc2UgPSBhd2FpdCBjb25maWcuY2xpZW50LmdlbmVyYXRlKHtcbiAgICAgIHByb21wdDogVEVTVF9RVUVSWSxcbiAgICAgIG1vZGVsOiBjb25maWcubW9kZWxcbiAgICB9KTtcblxuICAgIGNvbnN0IHRleHQgPSByZXNwb25zZS50ZXh0LnRyaW0oKTtcblxuICAgIC8vIENoZWNrIGZvciBudW1iZXJlZCBsaXN0IHBhdHRlcm5zXG4gICAgY29uc3QgbnVtYmVyZWRQYXR0ZXJuID0gL15cXHMqWzEtM11bLildXFxzKi4rJC9nbTtcbiAgICBjb25zdCBudW1iZXJlZE1hdGNoZXMgPSB0ZXh0Lm1hdGNoKG51bWJlcmVkUGF0dGVybikgfHwgW107XG5cbiAgICAvLyBDaGVjayBmb3IgYnVsbGV0IGxpc3QgcGF0dGVybnMgKGFsdGVybmF0aXZlIHZhbGlkIGZvcm1hdClcbiAgICBjb25zdCBidWxsZXRQYXR0ZXJuID0gL15cXHMqWy0q4oCiXVxccyouKyQvZ207XG4gICAgY29uc3QgYnVsbGV0TWF0Y2hlcyA9IHRleHQubWF0Y2goYnVsbGV0UGF0dGVybikgfHwgW107XG5cbiAgICAvLyBDaGVjayBmb3IgbGluZS1zZXBhcmF0ZWQgaXRlbXNcbiAgICBjb25zdCBsaW5lcyA9IHRleHQuc3BsaXQoJ1xcbicpLmZpbHRlcihsID0+IGwudHJpbSgpLmxlbmd0aCA+IDApO1xuXG4gICAgLy8gRGV0ZXJtaW5lIGxpc3QgdHlwZSBhbmQgY291bnRcbiAgICBjb25zdCBoYXNOdW1iZXJlZExpc3QgPSBudW1iZXJlZE1hdGNoZXMubGVuZ3RoID49IDM7XG4gICAgY29uc3QgaGFzQnVsbGV0TGlzdCA9IGJ1bGxldE1hdGNoZXMubGVuZ3RoID49IDM7XG4gICAgY29uc3QgaGFzTGluZUxpc3QgPSBsaW5lcy5sZW5ndGggPj0gMztcblxuICAgIC8vIENoZWNrIGZvciBjb2xvciB3b3Jkc1xuICAgIGNvbnN0IGNvbG9yV29yZHMgPSBbJ3JlZCcsICdibHVlJywgJ2dyZWVuJywgJ3llbGxvdycsICdvcmFuZ2UnLCAncHVycGxlJywgJ3BpbmsnLCBcbiAgICAgICAgICAgICAgICAgICAgICAgICdibGFjaycsICd3aGl0ZScsICdicm93bicsICdncmF5JywgJ2dyZXknLCAnY3lhbicsICdtYWdlbnRhJ107XG4gICAgY29uc3QgZm91bmRDb2xvcnMgPSBjb2xvcldvcmRzLmZpbHRlcihjID0+IHRleHQudG9Mb3dlckNhc2UoKS5pbmNsdWRlcyhjKSk7XG5cbiAgICAvLyBQYXNzIGlmIHdlIGhhdmUgYSBsaXN0IHN0cnVjdHVyZSB3aXRoIGF0IGxlYXN0IDMgaXRlbXMgYW5kIHNvbWUgY29sb3JzXG4gICAgY29uc3QgaGFzVmFsaWRTdHJ1Y3R1cmUgPSBoYXNOdW1iZXJlZExpc3QgfHwgaGFzQnVsbGV0TGlzdCB8fCBoYXNMaW5lTGlzdDtcbiAgICBjb25zdCBoYXNDb2xvcnMgPSBmb3VuZENvbG9ycy5sZW5ndGggPj0gMjtcbiAgICBjb25zdCBwYXNzZWQgPSBoYXNWYWxpZFN0cnVjdHVyZSAmJiBoYXNDb2xvcnM7XG5cbiAgICByZXR1cm4ge1xuICAgICAgdGVzdDogJ3N0cnVjdHVyZWRMaXN0VGVzdCcsXG4gICAgICBwYXNzZWQsXG4gICAgICBtZXNzYWdlOiBwYXNzZWRcbiAgICAgICAgPyAnTExNIGdlbmVyYXRlZCBhIHByb3Blcmx5IHN0cnVjdHVyZWQgbGlzdCdcbiAgICAgICAgOiBgTGlzdCBzdHJ1Y3R1cmUgaXNzdWVzOiAkeyFoYXNWYWxpZFN0cnVjdHVyZSA/ICdObyB2YWxpZCBsaXN0IGZvcm1hdCBkZXRlY3RlZCcgOiAnTWlzc2luZyBleHBlY3RlZCBjb250ZW50J31gLFxuICAgICAgZGV0YWlsczoge1xuICAgICAgICBxdWVyeTogVEVTVF9RVUVSWSxcbiAgICAgICAgcmVzcG9uc2U6IHRleHQuc3Vic3RyaW5nKDAsIDMwMCksXG4gICAgICAgIG51bWJlcmVkSXRlbXM6IG51bWJlcmVkTWF0Y2hlcy5sZW5ndGgsXG4gICAgICAgIGJ1bGxldEl0ZW1zOiBidWxsZXRNYXRjaGVzLmxlbmd0aCxcbiAgICAgICAgbGluZUl0ZW1zOiBsaW5lcy5sZW5ndGgsXG4gICAgICAgIGZvdW5kQ29sb3JzLFxuICAgICAgICBoYXNWYWxpZFN0cnVjdHVyZSxcbiAgICAgICAgbGlzdFR5cGU6IGhhc051bWJlcmVkTGlzdCA/ICdudW1iZXJlZCcgOiBoYXNCdWxsZXRMaXN0ID8gJ2J1bGxldCcgOiBoYXNMaW5lTGlzdCA/ICdsaW5lJyA6ICdub25lJ1xuICAgICAgfSxcbiAgICAgIGNvbmZpZGVuY2U6IGhhc1ZhbGlkU3RydWN0dXJlID8gMC44NSA6IDAuNixcbiAgICAgIGxpbWl0YXRpb25zOiBMSU1JVEFUSU9OU1xuICAgIH07XG4gIH0gY2F0Y2ggKGVycm9yKSB7XG4gICAgcmV0dXJuIHtcbiAgICAgIHRlc3Q6ICdzdHJ1Y3R1cmVkTGlzdFRlc3QnLFxuICAgICAgcGFzc2VkOiBmYWxzZSxcbiAgICAgIG1lc3NhZ2U6IGBUZXN0IGZhaWxlZCB3aXRoIGVycm9yOiAke2Vycm9yIGluc3RhbmNlb2YgRXJyb3IgPyBlcnJvci5tZXNzYWdlIDogJ1Vua25vd24gZXJyb3InfWAsXG4gICAgICBkZXRhaWxzOiB7XG4gICAgICAgIGVycm9yOiBlcnJvciBpbnN0YW5jZW9mIEVycm9yID8gZXJyb3IubWVzc2FnZSA6ICdVbmtub3duIGVycm9yJ1xuICAgICAgfSxcbiAgICAgIGNvbmZpZGVuY2U6IDAuNSxcbiAgICAgIGxpbWl0YXRpb25zOiBbLi4uTElNSVRBVElPTlMsICdUZXN0IGZhaWxlZCBkdWUgdG8gZXJyb3InXVxuICAgIH07XG4gIH1cbn1cbiJdfQ==