UNPKG

llmverify

Version:

AI Output Verification Toolkit — Local-first LLM safety, hallucination detection, PII redaction, prompt injection defense, and runtime monitoring. Zero telemetry. OWASP LLM Top 10 aligned.

186 lines 22.6 kB
"use strict"; /** * LLM Monitor Wrapper * * Drop-in wrapper that adds health monitoring to any LLM client. * Tracks latency, token rate, response fingerprint, and overall health. * * WHAT THIS DOES: * ✅ Wraps any LLM client with health monitoring * ✅ Tracks performance metrics over time * ✅ Detects behavioral drift and anomalies * ✅ Provides lifecycle hooks for health changes * ✅ Returns health report with each response * * WHAT THIS DOES NOT DO: * ❌ Modify LLM responses * ❌ Store prompts or responses (ephemeral only) * ❌ Make predictions about LLM behavior * ❌ Block or filter responses (monitoring only) * * PRIVACY GUARANTEE: * - No data is stored or transmitted * - All analysis is in-memory and ephemeral * - Prompts and responses are not logged * * @module wrapper/monitorLLM * @author Haiec * @license MIT */ Object.defineProperty(exports, "__esModule", { value: true }); exports.monitorLLM = monitorLLM; const baseline_1 = require("../engines/runtime/baseline"); const latency_1 = require("../engines/runtime/latency"); const token_rate_1 = require("../engines/runtime/token-rate"); const fingerprint_1 = require("../engines/runtime/fingerprint"); const structure_1 = require("../engines/runtime/structure"); const health_score_1 = require("../engines/runtime/health-score"); /** * Generates a UUID v4 (browser and Node.js compatible). */ function generateUUID() { if (typeof crypto !== 'undefined' && crypto.randomUUID) { return crypto.randomUUID(); } // Fallback for older environments return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, (c) => { const r = (Math.random() * 16) | 0; const v = c === 'x' ? r : (r & 0x3) | 0x8; return v.toString(16); }); } /** * Wraps an LLM client with health monitoring. * * @param originalClient - The LLM client to wrap * @param config - Optional monitoring configuration * @returns Monitored client with health tracking * * @example * // Basic usage * import { monitorLLM } from 'llmverify'; * * const client = monitorLLM(openaiClient); * const response = await client.generate({ prompt: 'Hello' }); * console.log(response.llmverify.health); // 'stable' * * @example * // With hooks * const client = monitorLLM(openaiClient, { * hooks: { * onUnstable: (report) => alert('LLM unstable!'), * onDegraded: (report) => console.warn('LLM degraded'), * onRecovery: (report) => console.log('LLM recovered') * } * }); * * @example * // With custom thresholds * const client = monitorLLM(openaiClient, { * thresholds: { * latencyWarnRatio: 1.5, * latencyErrorRatio: 4.0 * }, * learningRate: 0.2 * }); */ /** * Checks if client is a unified LlmClient from adapters. */ function isLlmClient(client) { return 'provider' in client && typeof client.provider === 'string'; } function monitorLLM(originalClient, config = {}) { const baselineEngine = new baseline_1.BaselineEngine(config.learningRate ?? 0.1, config.minSamplesForBaseline ?? 5); let lastHealth = 'stable'; // Engine enable flags (all enabled by default) const engines = { latency: config.engines?.latency ?? true, tokenRate: config.engines?.tokenRate ?? true, fingerprint: config.engines?.fingerprint ?? true, structure: config.engines?.structure ?? true }; return { async generate(opts) { const start = Date.now(); // Call original client const resp = await originalClient.generate(opts); const end = Date.now(); // Build call record const call = { id: generateUUID(), timestamp: start, prompt: opts.prompt, model: opts.model || 'unknown', responseText: resp.text || '', responseTokens: resp.tokens ?? (resp.text?.split(/\s+/).length || 0), latencyMs: end - start }; // Get current baseline const baseline = baselineEngine.get(); // Run enabled engines const results = []; if (engines.latency) { results.push((0, latency_1.LatencyEngine)(call, baseline, { warnRatio: config.thresholds?.latencyWarnRatio, errorRatio: config.thresholds?.latencyErrorRatio })); } if (engines.tokenRate) { results.push((0, token_rate_1.TokenRateEngine)(call, baseline, { warnRatio: config.thresholds?.tokenRateWarnRatio, errorRatio: config.thresholds?.tokenRateErrorRatio })); } let currentFingerprint; if (engines.fingerprint) { const fingerprintResult = (0, fingerprint_1.FingerprintEngine)(call, baseline.fingerprint); results.push(fingerprintResult); currentFingerprint = fingerprintResult.details.curr; } else { currentFingerprint = (0, fingerprint_1.extractFingerprint)(call.responseText); } if (engines.structure) { results.push((0, structure_1.StructureEngine)(call)); } // Calculate health score const healthReport = (0, health_score_1.HealthScoreEngine)(results); // Update baseline baselineEngine.update(call, currentFingerprint, 1); // Fire hooks on health state changes if (config.hooks) { if (healthReport.health !== lastHealth) { if (healthReport.health === 'unstable' && config.hooks.onUnstable) { config.hooks.onUnstable(healthReport); } if (healthReport.health === 'degraded' && config.hooks.onDegraded) { config.hooks.onDegraded(healthReport); } if (lastHealth !== 'stable' && healthReport.health === 'stable' && config.hooks.onRecovery) { config.hooks.onRecovery(healthReport); } } if (config.hooks.onHealthCheck) { config.hooks.onHealthCheck(healthReport); } } lastHealth = healthReport.health; return { ...resp, llmverify: healthReport }; }, getBaseline() { return baselineEngine.get(); }, getLastHealth() { return lastHealth; }, resetBaseline() { baselineEngine.reset(); lastHealth = 'stable'; } }; } //# sourceMappingURL=data:application/json;base64,