llmverify
Version:
AI Output Verification Toolkit — Local-first LLM safety, hallucination detection, PII redaction, prompt injection defense, and runtime monitoring. Zero telemetry. OWASP LLM Top 10 aligned.
93 lines • 11.4 kB
JavaScript
;
/**
* Sentinel Test Suite
*
* Runs all sentinel tests and aggregates results.
*
* @module sentinel/suite
* @author Haiec
* @license MIT
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.runAllSentinelTests = runAllSentinelTests;
const staticEchoTest_1 = require("./staticEchoTest");
const duplicateQueryTest_1 = require("./duplicateQueryTest");
const structuredListTest_1 = require("./structuredListTest");
const shortReasoningTest_1 = require("./shortReasoningTest");
/**
* Runs all sentinel tests and returns aggregated results.
*
* @param config - Sentinel configuration with LLM client
* @param options - Optional configuration for which tests to run
* @returns Aggregated test suite results
*
* @example
* const suite = await runAllSentinelTests({
* client: myLLMClient,
* model: 'gpt-4'
* });
*
* console.log(`Passed ${suite.passedCount}/${suite.totalCount} tests`);
*
* if (!suite.passed) {
* suite.results.filter(r => !r.passed).forEach(r => {
* console.error(`Failed: ${r.test} - ${r.message}`);
* });
* }
*/
async function runAllSentinelTests(config, options) {
const start = Date.now();
const results = [];
const skipTests = new Set(options?.skipTests || []);
// Define test runners
const tests = [
{ name: 'staticEchoTest', run: () => (0, staticEchoTest_1.staticEchoTest)(config) },
{ name: 'duplicateQueryTest', run: () => (0, duplicateQueryTest_1.duplicateQueryTest)(config) },
{ name: 'structuredListTest', run: () => (0, structuredListTest_1.structuredListTest)(config) },
{ name: 'shortReasoningTest', run: () => (0, shortReasoningTest_1.shortReasoningTest)(config) }
];
// Run tests sequentially
for (const test of tests) {
if (skipTests.has(test.name))
continue;
try {
const result = await test.run();
results.push(result);
if (options?.onTestComplete) {
options.onTestComplete(result);
}
}
catch (error) {
const errorResult = {
test: test.name,
passed: false,
message: `Test threw error: ${error instanceof Error ? error.message : 'Unknown'}`,
details: { error: error instanceof Error ? error.message : 'Unknown' },
confidence: 0,
limitations: ['Test failed due to unhandled error']
};
results.push(errorResult);
if (options?.onTestComplete) {
options.onTestComplete(errorResult);
}
}
}
const end = Date.now();
const passedCount = results.filter(r => r.passed).length;
const totalCount = results.length;
const passRate = totalCount > 0 ? passedCount / totalCount : 0;
const passed = passRate >= 0.75; // Pass if 75%+ tests pass
return {
passed,
passedCount,
totalCount,
passRate: Math.round(passRate * 100) / 100,
results,
timestamp: start,
durationMs: end - start,
summary: passed
? `All critical tests passed (${passedCount}/${totalCount})`
: `Some tests failed (${passedCount}/${totalCount} passed)`
};
}
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"suite.js","sourceRoot":"","sources":["../../src/sentinel/suite.ts"],"names":[],"mappings":";AAAA;;;;;;;;GAQG;;AAmDH,kDAiEC;AAjHD,qDAAkD;AAClD,6DAA0D;AAC1D,6DAA0D;AAC1D,6DAA0D;AAwB1D;;;;;;;;;;;;;;;;;;;;GAoBG;AACI,KAAK,UAAU,mBAAmB,CACvC,MAAsB,EACtB,OAGC;IAED,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACzB,MAAM,OAAO,GAAyB,EAAE,CAAC;IACzC,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,OAAO,EAAE,SAAS,IAAI,EAAE,CAAC,CAAC;IAEpD,sBAAsB;IACtB,MAAM,KAAK,GAAoE;QAC7E,EAAE,IAAI,EAAE,gBAAgB,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC,IAAA,+BAAc,EAAC,MAAM,CAAC,EAAE;QAC7D,EAAE,IAAI,EAAE,oBAAoB,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC,IAAA,uCAAkB,EAAC,MAAM,CAAC,EAAE;QACrE,EAAE,IAAI,EAAE,oBAAoB,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC,IAAA,uCAAkB,EAAC,MAAM,CAAC,EAAE;QACrE,EAAE,IAAI,EAAE,oBAAoB,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC,IAAA,uCAAkB,EAAC,MAAM,CAAC,EAAE;KACtE,CAAC;IAEF,yBAAyB;IACzB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC;YAAE,SAAS;QAEvC,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,GAAG,EAAE,CAAC;YAChC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAErB,IAAI,OAAO,EAAE,cAAc,EAAE,CAAC;gBAC5B,OAAO,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC;YACjC,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,WAAW,GAAuB;gBACtC,IAAI,EAAE,IAAI,CAAC,IAAI;gBACf,MAAM,EAAE,KAAK;gBACb,OAAO,EAAE,qBAAqB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,EAAE;gBAClF,OAAO,EAAE,EAAE,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,EAAE;gBACtE,UAAU,EAAE,CAAC;gBACb,WAAW,EAAE,CAAC,oCAAoC,CAAC;aACpD,CAAC;YACF,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAE1B,IAAI,OAAO,EAAE,cAAc,EAAE,CAAC;gBAC5B,OAAO,CAAC,cAAc,CAAC,WAAW,CAAC,CAAC;YACtC,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACvB,MAAM,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC;IACzD,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC;IAClC,MAAM,QAAQ,GAAG,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;IAC/D,MAAM,MAAM,GAAG,QAAQ,IAAI,IAAI,CAAC,CAAC,0BAA0B;IAE3D,OAAO;QACL,MAAM;QACN,WAAW;QACX,UAAU;QACV,QAAQ,EAAE,IAAI,CAAC,KAAK,CAAC,QAAQ,GAAG,GAAG,CAAC,GAAG,GAAG;QAC1C,OAAO;QACP,SAAS,EAAE,KAAK;QAChB,UAAU,EAAE,GAAG,GAAG,KAAK;QACvB,OAAO,EAAE,MAAM;YACb,CAAC,CAAC,8BAA8B,WAAW,IAAI,UAAU,GAAG;YAC5D,CAAC,CAAC,sBAAsB,WAAW,IAAI,UAAU,UAAU;KAC9D,CAAC;AACJ,CAAC","sourcesContent":["/**\n * Sentinel Test Suite\n * \n * Runs all sentinel tests and aggregates results.\n * \n * @module sentinel/suite\n * @author Haiec\n * @license MIT\n */\n\nimport { SentinelTestResult, SentinelConfig } from '../types/runtime';\nimport { staticEchoTest } from './staticEchoTest';\nimport { duplicateQueryTest } from './duplicateQueryTest';\nimport { structuredListTest } from './structuredListTest';\nimport { shortReasoningTest } from './shortReasoningTest';\n\n/**\n * Aggregated sentinel test suite results.\n */\nexport interface SentinelSuite {\n  /** Overall pass/fail status */\n  passed: boolean;\n  /** Number of tests passed */\n  passedCount: number;\n  /** Total number of tests */\n  totalCount: number;\n  /** Pass rate (0-1) */\n  passRate: number;\n  /** Individual test results */\n  results: SentinelTestResult[];\n  /** Timestamp of test run */\n  timestamp: number;\n  /** Duration in milliseconds */\n  durationMs: number;\n  /** Summary message */\n  summary: string;\n}\n\n/**\n * Runs all sentinel tests and returns aggregated results.\n * \n * @param config - Sentinel configuration with LLM client\n * @param options - Optional configuration for which tests to run\n * @returns Aggregated test suite results\n * \n * @example\n * const suite = await runAllSentinelTests({\n *   client: myLLMClient,\n *   model: 'gpt-4'\n * });\n * \n * console.log(`Passed ${suite.passedCount}/${suite.totalCount} tests`);\n * \n * if (!suite.passed) {\n *   suite.results.filter(r => !r.passed).forEach(r => {\n *     console.error(`Failed: ${r.test} - ${r.message}`);\n *   });\n * }\n */\nexport async function runAllSentinelTests(\n  config: SentinelConfig,\n  options?: {\n    skipTests?: string[];\n    onTestComplete?: (result: SentinelTestResult) => void;\n  }\n): Promise<SentinelSuite> {\n  const start = Date.now();\n  const results: SentinelTestResult[] = [];\n  const skipTests = new Set(options?.skipTests || []);\n\n  // Define test runners\n  const tests: Array<{ name: string; run: () => Promise<SentinelTestResult> }> = [\n    { name: 'staticEchoTest', run: () => staticEchoTest(config) },\n    { name: 'duplicateQueryTest', run: () => duplicateQueryTest(config) },\n    { name: 'structuredListTest', run: () => structuredListTest(config) },\n    { name: 'shortReasoningTest', run: () => shortReasoningTest(config) }\n  ];\n\n  // Run tests sequentially\n  for (const test of tests) {\n    if (skipTests.has(test.name)) continue;\n\n    try {\n      const result = await test.run();\n      results.push(result);\n      \n      if (options?.onTestComplete) {\n        options.onTestComplete(result);\n      }\n    } catch (error) {\n      const errorResult: SentinelTestResult = {\n        test: test.name,\n        passed: false,\n        message: `Test threw error: ${error instanceof Error ? error.message : 'Unknown'}`,\n        details: { error: error instanceof Error ? error.message : 'Unknown' },\n        confidence: 0,\n        limitations: ['Test failed due to unhandled error']\n      };\n      results.push(errorResult);\n      \n      if (options?.onTestComplete) {\n        options.onTestComplete(errorResult);\n      }\n    }\n  }\n\n  const end = Date.now();\n  const passedCount = results.filter(r => r.passed).length;\n  const totalCount = results.length;\n  const passRate = totalCount > 0 ? passedCount / totalCount : 0;\n  const passed = passRate >= 0.75; // Pass if 75%+ tests pass\n\n  return {\n    passed,\n    passedCount,\n    totalCount,\n    passRate: Math.round(passRate * 100) / 100,\n    results,\n    timestamp: start,\n    durationMs: end - start,\n    summary: passed\n      ? `All critical tests passed (${passedCount}/${totalCount})`\n      : `Some tests failed (${passedCount}/${totalCount} passed)`\n  };\n}\n"]}