llmverify
Version:
AI Output Verification Toolkit — Local-first LLM safety, hallucination detection, PII redaction, prompt injection defense, and runtime monitoring. Zero telemetry. OWASP LLM Top 10 aligned.
93 lines • 11.4 kB
JavaScript
;
/**
* Sentinel Test Suite
*
* Runs all sentinel tests and aggregates results.
*
* @module sentinel/suite
* @author Haiec
* @license MIT
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.runAllSentinelTests = runAllSentinelTests;
const staticEchoTest_1 = require("./staticEchoTest");
const duplicateQueryTest_1 = require("./duplicateQueryTest");
const structuredListTest_1 = require("./structuredListTest");
const shortReasoningTest_1 = require("./shortReasoningTest");
/**
* Runs all sentinel tests and returns aggregated results.
*
* @param config - Sentinel configuration with LLM client
* @param options - Optional configuration for which tests to run
* @returns Aggregated test suite results
*
* @example
* const suite = await runAllSentinelTests({
* client: myLLMClient,
* model: 'gpt-4'
* });
*
* console.log(`Passed ${suite.passedCount}/${suite.totalCount} tests`);
*
* if (!suite.passed) {
* suite.results.filter(r => !r.passed).forEach(r => {
* console.error(`Failed: ${r.test} - ${r.message}`);
* });
* }
*/
async function runAllSentinelTests(config, options) {
const start = Date.now();
const results = [];
const skipTests = new Set(options?.skipTests || []);
// Define test runners
const tests = [
{ name: 'staticEchoTest', run: () => (0, staticEchoTest_1.staticEchoTest)(config) },
{ name: 'duplicateQueryTest', run: () => (0, duplicateQueryTest_1.duplicateQueryTest)(config) },
{ name: 'structuredListTest', run: () => (0, structuredListTest_1.structuredListTest)(config) },
{ name: 'shortReasoningTest', run: () => (0, shortReasoningTest_1.shortReasoningTest)(config) }
];
// Run tests sequentially
for (const test of tests) {
if (skipTests.has(test.name))
continue;
try {
const result = await test.run();
results.push(result);
if (options?.onTestComplete) {
options.onTestComplete(result);
}
}
catch (error) {
const errorResult = {
test: test.name,
passed: false,
message: `Test threw error: ${error instanceof Error ? error.message : 'Unknown'}`,
details: { error: error instanceof Error ? error.message : 'Unknown' },
confidence: 0,
limitations: ['Test failed due to unhandled error']
};
results.push(errorResult);
if (options?.onTestComplete) {
options.onTestComplete(errorResult);
}
}
}
const end = Date.now();
const passedCount = results.filter(r => r.passed).length;
const totalCount = results.length;
const passRate = totalCount > 0 ? passedCount / totalCount : 0;
const passed = passRate >= 0.75; // Pass if 75%+ tests pass
return {
passed,
passedCount,
totalCount,
passRate: Math.round(passRate * 100) / 100,
results,
timestamp: start,
durationMs: end - start,
summary: passed
? `All critical tests passed (${passedCount}/${totalCount})`
: `Some tests failed (${passedCount}/${totalCount} passed)`
};
}
//# sourceMappingURL=data:application/json;base64,