UNPKG

llmverify

Version:

AI Output Verification Toolkit — Local-first LLM safety, hallucination detection, PII redaction, prompt injection defense, and runtime monitoring. Zero telemetry. OWASP LLM Top 10 aligned.

93 lines 11.4 kB
"use strict"; /** * Sentinel Test Suite * * Runs all sentinel tests and aggregates results. * * @module sentinel/suite * @author Haiec * @license MIT */ Object.defineProperty(exports, "__esModule", { value: true }); exports.runAllSentinelTests = runAllSentinelTests; const staticEchoTest_1 = require("./staticEchoTest"); const duplicateQueryTest_1 = require("./duplicateQueryTest"); const structuredListTest_1 = require("./structuredListTest"); const shortReasoningTest_1 = require("./shortReasoningTest"); /** * Runs all sentinel tests and returns aggregated results. * * @param config - Sentinel configuration with LLM client * @param options - Optional configuration for which tests to run * @returns Aggregated test suite results * * @example * const suite = await runAllSentinelTests({ * client: myLLMClient, * model: 'gpt-4' * }); * * console.log(`Passed ${suite.passedCount}/${suite.totalCount} tests`); * * if (!suite.passed) { * suite.results.filter(r => !r.passed).forEach(r => { * console.error(`Failed: ${r.test} - ${r.message}`); * }); * } */ async function runAllSentinelTests(config, options) { const start = Date.now(); const results = []; const skipTests = new Set(options?.skipTests || []); // Define test runners const tests = [ { name: 'staticEchoTest', run: () => (0, staticEchoTest_1.staticEchoTest)(config) }, { name: 'duplicateQueryTest', run: () => (0, duplicateQueryTest_1.duplicateQueryTest)(config) }, { name: 'structuredListTest', run: () => (0, structuredListTest_1.structuredListTest)(config) }, { name: 'shortReasoningTest', run: () => (0, shortReasoningTest_1.shortReasoningTest)(config) } ]; // Run tests sequentially for (const test of tests) { if (skipTests.has(test.name)) continue; try { const result = await test.run(); results.push(result); if (options?.onTestComplete) { options.onTestComplete(result); } } catch (error) { const errorResult = { test: test.name, passed: false, message: `Test threw error: ${error instanceof Error ? error.message : 'Unknown'}`, details: { error: error instanceof Error ? error.message : 'Unknown' }, confidence: 0, limitations: ['Test failed due to unhandled error'] }; results.push(errorResult); if (options?.onTestComplete) { options.onTestComplete(errorResult); } } } const end = Date.now(); const passedCount = results.filter(r => r.passed).length; const totalCount = results.length; const passRate = totalCount > 0 ? passedCount / totalCount : 0; const passed = passRate >= 0.75; // Pass if 75%+ tests pass return { passed, passedCount, totalCount, passRate: Math.round(passRate * 100) / 100, results, timestamp: start, durationMs: end - start, summary: passed ? `All critical tests passed (${passedCount}/${totalCount})` : `Some tests failed (${passedCount}/${totalCount} passed)` }; } //# sourceMappingURL=data:application/json;base64,{"version":3,"file":"suite.js","sourceRoot":"","sources":["../../src/sentinel/suite.ts"],"names":[],"mappings":";AAAA;;;;;;;;GAQG;;AAmDH,kDAiEC;AAjHD,qDAAkD;AAClD,6DAA0D;AAC1D,6DAA0D;AAC1D,6DAA0D;AAwB1D;;;;;;;;;;;;;;;;;;;;GAoBG;AACI,KAAK,UAAU,mBAAmB,CACvC,MAAsB,EACtB,OAGC;IAED,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACzB,MAAM,OAAO,GAAyB,EAAE,CAAC;IACzC,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,OAAO,EAAE,SAAS,IAAI,EAAE,CAAC,CAAC;IAEpD,sBAAsB;IACtB,MAAM,KAAK,GAAoE;QAC7E,EAAE,IAAI,EAAE,gBAAgB,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC,IAAA,+BAAc,EAAC,MAAM,CAAC,EAAE;QAC7D,EAAE,IAAI,EAAE,oBAAoB,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC,IAAA,uCAAkB,EAAC,MAAM,CAAC,EAAE;QACrE,EAAE,IAAI,EAAE,oBAAoB,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC,IAAA,uCAAkB,EAAC,MAAM,CAAC,EAAE;QACrE,EAAE,IAAI,EAAE,oBAAoB,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC,IAAA,uCAAkB,EAAC,MAAM,CAAC,EAAE;KACtE,CAAC;IAEF,yBAAyB;IACzB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC;YAAE,SAAS;QAEvC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,GAAG,EAAE,CAAC;YAChC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAErB,IAAI,OAAO,EAAE,cAAc,EAAE,CAAC;gBAC5B,OAAO,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC;YACjC,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,WAAW,GAAuB;gBACtC,IAAI,EAAE,IAAI,CAAC,IAAI;gBACf,MAAM,EAAE,KAAK;gBACb,OAAO,EAAE,qBAAqB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,EAAE;gBAClF,OAAO,EAAE,EAAE,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,EAAE;gBACtE,UAAU,EAAE,CAAC;gBACb,WAAW,EAAE,CAAC,oCAAoC,CAAC;aACpD,CAAC;YACF,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAE1B,IAAI,OAAO,EAAE,cAAc,EAAE,CAAC;gBAC5B,OAAO,CAAC,cAAc,CAAC,WAAW,CAAC,CAAC;YACtC,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACvB,MAAM,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IACzD,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC;IAClC,MAAM,QAAQ,GAAG,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;IAC/D,MAAM,MAAM,GAAG,QAAQ,IAAI,IAAI,CAAC,CAAC,0BAA0B;IAE3D,OAAO;QACL,MAAM;QACN,WAAW;QACX,UAAU;QACV,QAAQ,EAAE,IAAI,CAAC,KAAK,CAAC,QAAQ,GAAG,GAAG,CAAC,GAAG,GAAG;QAC1C,OAAO;QACP,SAAS,EAAE,KAAK;QAChB,UAAU,EAAE,GAAG,GAAG,KAAK;QACvB,OAAO,EAAE,MAAM;YACb,CAAC,CAAC,8BAA8B,WAAW,IAAI,UAAU,GAAG;YAC5D,CAAC,CAAC,sBAAsB,WAAW,IAAI,UAAU,UAAU;KAC9D,CAAC;AACJ,CAAC","sourcesContent":["/**\n * Sentinel Test Suite\n * \n * Runs all sentinel tests and aggregates results.\n * \n * @module sentinel/suite\n * @author Haiec\n * @license MIT\n */\n\nimport { SentinelTestResult, SentinelConfig } from '../types/runtime';\nimport { staticEchoTest } from './staticEchoTest';\nimport { duplicateQueryTest } from './duplicateQueryTest';\nimport { structuredListTest } from './structuredListTest';\nimport { shortReasoningTest } from './shortReasoningTest';\n\n/**\n * Aggregated sentinel test suite results.\n */\nexport interface SentinelSuite {\n  /** Overall pass/fail status */\n  passed: boolean;\n  /** Number of tests passed */\n  passedCount: number;\n  /** Total number of tests */\n  totalCount: number;\n  /** Pass rate (0-1) */\n  passRate: number;\n  /** Individual test results */\n  results: SentinelTestResult[];\n  /** Timestamp of test run */\n  timestamp: number;\n  /** Duration in milliseconds */\n  durationMs: number;\n  /** Summary message */\n  summary: string;\n}\n\n/**\n * Runs all sentinel tests and returns aggregated results.\n * \n * @param config - Sentinel configuration with LLM client\n * @param options - Optional configuration for which tests to run\n * @returns Aggregated test suite results\n * \n * @example\n * const suite = await runAllSentinelTests({\n *   client: myLLMClient,\n *   model: 'gpt-4'\n * });\n * \n * console.log(`Passed ${suite.passedCount}/${suite.totalCount} tests`);\n * \n * if (!suite.passed) {\n *   suite.results.filter(r => !r.passed).forEach(r => {\n *     console.error(`Failed: ${r.test} - ${r.message}`);\n *   });\n * }\n */\nexport async function runAllSentinelTests(\n  config: SentinelConfig,\n  options?: {\n    skipTests?: string[];\n    onTestComplete?: (result: SentinelTestResult) => void;\n  }\n): Promise<SentinelSuite> {\n  const start = Date.now();\n  const results: SentinelTestResult[] = [];\n  const skipTests = new Set(options?.skipTests || []);\n\n  // Define test runners\n  const tests: Array<{ name: string; run: () => Promise<SentinelTestResult> }> = [\n    { name: 'staticEchoTest', run: () => staticEchoTest(config) },\n    { name: 'duplicateQueryTest', run: () => duplicateQueryTest(config) },\n    { name: 'structuredListTest', run: () => structuredListTest(config) },\n    { name: 'shortReasoningTest', run: () => shortReasoningTest(config) }\n  ];\n\n  // Run tests sequentially\n  for (const test of tests) {\n    if (skipTests.has(test.name)) continue;\n\n    try {\n      const result = await test.run();\n      results.push(result);\n      \n      if (options?.onTestComplete) {\n        options.onTestComplete(result);\n      }\n    } catch (error) {\n      const errorResult: SentinelTestResult = {\n        test: test.name,\n        passed: false,\n        message: `Test threw error: ${error instanceof Error ? error.message : 'Unknown'}`,\n        details: { error: error instanceof Error ? error.message : 'Unknown' },\n        confidence: 0,\n        limitations: ['Test failed due to unhandled error']\n      };\n      results.push(errorResult);\n      \n      if (options?.onTestComplete) {\n        options.onTestComplete(errorResult);\n      }\n    }\n  }\n\n  const end = Date.now();\n  const passedCount = results.filter(r => r.passed).length;\n  const totalCount = results.length;\n  const passRate = totalCount > 0 ? passedCount / totalCount : 0;\n  const passed = passRate >= 0.75; // Pass if 75%+ tests pass\n\n  return {\n    passed,\n    passedCount,\n    totalCount,\n    passRate: Math.round(passRate * 100) / 100,\n    results,\n    timestamp: start,\n    durationMs: end - start,\n    summary: passed\n      ? `All critical tests passed (${passedCount}/${totalCount})`\n      : `Some tests failed (${passedCount}/${totalCount} passed)`\n  };\n}\n"]}