UNPKG

llmverify

Version:

AI Output Verification Toolkit — Local-first LLM safety, hallucination detection, PII redaction, prompt injection defense, and runtime monitoring. Zero telemetry. OWASP LLM Top 10 aligned.

186 lines 22.6 kB
"use strict"; /** * LLM Monitor Wrapper * * Drop-in wrapper that adds health monitoring to any LLM client. * Tracks latency, token rate, response fingerprint, and overall health. * * WHAT THIS DOES: * ✅ Wraps any LLM client with health monitoring * ✅ Tracks performance metrics over time * ✅ Detects behavioral drift and anomalies * ✅ Provides lifecycle hooks for health changes * ✅ Returns health report with each response * * WHAT THIS DOES NOT DO: * ❌ Modify LLM responses * ❌ Store prompts or responses (ephemeral only) * ❌ Make predictions about LLM behavior * ❌ Block or filter responses (monitoring only) * * PRIVACY GUARANTEE: * - No data is stored or transmitted * - All analysis is in-memory and ephemeral * - Prompts and responses are not logged * * @module wrapper/monitorLLM * @author Haiec * @license MIT */ Object.defineProperty(exports, "__esModule", { value: true }); exports.monitorLLM = monitorLLM; const baseline_1 = require("../engines/runtime/baseline"); const latency_1 = require("../engines/runtime/latency"); const token_rate_1 = require("../engines/runtime/token-rate"); const fingerprint_1 = require("../engines/runtime/fingerprint"); const structure_1 = require("../engines/runtime/structure"); const health_score_1 = require("../engines/runtime/health-score"); /** * Generates a UUID v4 (browser and Node.js compatible). */ function generateUUID() { if (typeof crypto !== 'undefined' && crypto.randomUUID) { return crypto.randomUUID(); } // Fallback for older environments return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, (c) => { const r = (Math.random() * 16) | 0; const v = c === 'x' ? r : (r & 0x3) | 0x8; return v.toString(16); }); } /** * Wraps an LLM client with health monitoring. * * @param originalClient - The LLM client to wrap * @param config - Optional monitoring configuration * @returns Monitored client with health tracking * * @example * // Basic usage * import { monitorLLM } from 'llmverify'; * * const client = monitorLLM(openaiClient); * const response = await client.generate({ prompt: 'Hello' }); * console.log(response.llmverify.health); // 'stable' * * @example * // With hooks * const client = monitorLLM(openaiClient, { * hooks: { * onUnstable: (report) => alert('LLM unstable!'), * onDegraded: (report) => console.warn('LLM degraded'), * onRecovery: (report) => console.log('LLM recovered') * } * }); * * @example * // With custom thresholds * const client = monitorLLM(openaiClient, { * thresholds: { * latencyWarnRatio: 1.5, * latencyErrorRatio: 4.0 * }, * learningRate: 0.2 * }); */ /** * Checks if client is a unified LlmClient from adapters. */ function isLlmClient(client) { return 'provider' in client && typeof client.provider === 'string'; } function monitorLLM(originalClient, config = {}) { const baselineEngine = new baseline_1.BaselineEngine(config.learningRate ?? 0.1, config.minSamplesForBaseline ?? 5); let lastHealth = 'stable'; // Engine enable flags (all enabled by default) const engines = { latency: config.engines?.latency ?? true, tokenRate: config.engines?.tokenRate ?? true, fingerprint: config.engines?.fingerprint ?? true, structure: config.engines?.structure ?? true }; return { async generate(opts) { const start = Date.now(); // Call original client const resp = await originalClient.generate(opts); const end = Date.now(); // Build call record const call = { id: generateUUID(), timestamp: start, prompt: opts.prompt, model: opts.model || 'unknown', responseText: resp.text || '', responseTokens: resp.tokens ?? (resp.text?.split(/\s+/).length || 0), latencyMs: end - start }; // Get current baseline const baseline = baselineEngine.get(); // Run enabled engines const results = []; if (engines.latency) { results.push((0, latency_1.LatencyEngine)(call, baseline, { warnRatio: config.thresholds?.latencyWarnRatio, errorRatio: config.thresholds?.latencyErrorRatio })); } if (engines.tokenRate) { results.push((0, token_rate_1.TokenRateEngine)(call, baseline, { warnRatio: config.thresholds?.tokenRateWarnRatio, errorRatio: config.thresholds?.tokenRateErrorRatio })); } let currentFingerprint; if (engines.fingerprint) { const fingerprintResult = (0, fingerprint_1.FingerprintEngine)(call, baseline.fingerprint); results.push(fingerprintResult); currentFingerprint = fingerprintResult.details.curr; } else { currentFingerprint = (0, fingerprint_1.extractFingerprint)(call.responseText); } if (engines.structure) { results.push((0, structure_1.StructureEngine)(call)); } // Calculate health score const healthReport = (0, health_score_1.HealthScoreEngine)(results); // Update baseline baselineEngine.update(call, currentFingerprint, 1); // Fire hooks on health state changes if (config.hooks) { if (healthReport.health !== lastHealth) { if (healthReport.health === 'unstable' && config.hooks.onUnstable) { config.hooks.onUnstable(healthReport); } if (healthReport.health === 'degraded' && config.hooks.onDegraded) { config.hooks.onDegraded(healthReport); } if (lastHealth !== 'stable' && healthReport.health === 'stable' && config.hooks.onRecovery) { config.hooks.onRecovery(healthReport); } } if (config.hooks.onHealthCheck) { config.hooks.onHealthCheck(healthReport); } } lastHealth = healthReport.health; return { ...resp, llmverify: healthReport }; }, getBaseline() { return baselineEngine.get(); }, getLastHealth() { return lastHealth; }, resetBaseline() { baselineEngine.reset(); lastHealth = 'stable'; } }; } //# sourceMappingURL=data:application/json;base64,{"version":3,"file":"monitorLLM.js","sourceRoot":"","sources":["../../src/wrapper/monitorLLM.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;;AA4HH,gCAqHC;AAxOD,0DAA6D;AAC7D,wDAA2D;AAC3D,8DAAgE;AAChE,gEAAuF;AACvF,4DAA+D;AAC/D,kEAAoE;AAqDpE;;GAEG;AACH,SAAS,YAAY;IACnB,IAAI,OAAO,MAAM,KAAK,WAAW,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;QACvD,OAAO,MAAM,CAAC,UAAU,EAAE,CAAC;IAC7B,CAAC;IACD,kCAAkC;IAClC,OAAO,sCAAsC,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE;QACnE,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC;QACnC,MAAM,CAAC,GAAG,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC;QAC1C,OAAO,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;IACxB,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;AACH;;GAEG;AACH,SAAS,WAAW,CAAC,MAAoB;IACvC,OAAO,UAAU,IAAI,MAAM,IAAI,OAAQ,MAAoB,CAAC,QAAQ,KAAK,QAAQ,CAAC;AACpF,CAAC;AAED,SAAgB,UAAU,CACxB,cAA4B,EAC5B,SAAwB,EAAE;IAE1B,MAAM,cAAc,GAAG,IAAI,yBAAc,CACvC,MAAM,CAAC,YAAY,IAAI,GAAG,EAC1B,MAAM,CAAC,qBAAqB,IAAI,CAAC,CAClC,CAAC;IAEF,IAAI,UAAU,GAAiB,QAAQ,CAAC;IAExC,+CAA+C;IAC/C,MAAM,OAAO,GAAG;QACd,OAAO,EAAE,MAAM,CAAC,OAAO,EAAE,OAAO,IAAI,IAAI;QACxC,SAAS,EAAE,MAAM,CAAC,OAAO,EAAE,SAAS,IAAI,IAAI;QAC5C,WAAW,EAAE,MAAM,CAAC,OAAO,EAAE,WAAW,IAAI,IAAI;QAChD,SAAS,EAAE,MAAM,CAAC,OAAO,EAAE,SAAS,IAAI,IAAI;KAC7C,CAAC;IAEF,OAAO;QACL,KAAK,CAAC,QAAQ,CAAC,IAAqB;YAClC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAEzB,uBAAuB;YACvB,MAAM,IAAI,GAAG,MAAM,cAAc,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;YAEjD,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAEvB,oBAAoB;YACpB,MAAM,IAAI,GAAe;gBACvB,EAAE,EAAE,YAAY,EAAE;gBAClB,SAAS,EAAE,KAAK;gBAChB,MAAM,EAAE,IAAI,CAAC,MAAM;gBACnB,KAAK,EAAE,IAAI,CAAC,KAAK,IAAI,SAAS;gBAC9B,YAAY,EAAE,IAAI,CAAC,IAAI,IAAI,EAAE;gBAC7B,cAAc,EAAE,IAAI,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC;gBACpE,SAAS,EAAE,GAAG,GAAG,KAAK;aACvB,CAAC;YAEF,uBAAuB;YACvB,MAAM,QAAQ,GAAG,cAAc,CAAC,GAAG,EAAE,CAAC;YAEtC,sBAAsB;YACtB,MAAM,OAAO,GAAG,EAAE,CAAC;YAEnB,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;gBACpB,OAAO,CAAC,IAAI,CAAC,IAAA,uBAAa,EAAC,IAAI,EAAE,QAAQ,EAAE;oBACzC,SAAS,EAAE,MAAM,CAAC,UAAU,EAAE,gBAAgB;oBAC9C,UAAU,EAAE,MAAM,CAAC,UAAU,EAAE,iBAAiB;iBACjD,CAAC,CAAC,CAAC;YACN,CAAC;YAED,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;gBACtB,OAAO,CAAC,IAAI,CAAC,IAAA,4BAAe,EAAC,IAAI,EAAE,QAAQ,EAAE;oBAC3C,SAAS,EAAE,MAAM,CAAC,UAAU,EAAE,kBAAkB;oBAChD,UAAU,EAAE,MAAM,CAAC,UAAU,EAAE,mBAAmB;iBACnD,CAAC,CAAC,CAAC;YACN,CAAC;YAED,IAAI,kBAAmD,CAAC;YACxD,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;gBACxB,MAAM,iBAAiB,GAAG,IAAA,+BAAiB,EAAC,IAAI,EAAE,QAAQ,CAAC,WAAW,CAAC,CAAC;gBACxE,OAAO,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;gBAChC,kBAAkB,GAAG,iBAAiB,CAAC,OAAO,CAAC,IAA2B,CAAC;YAC7E,CAAC;iBAAM,CAAC;gBACN,kBAAkB,GAAG,IAAA,gCAAkB,EAAC,IAAI,CAAC,YAAY,CAAC,CAAC;YAC7D,CAAC;YAED,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;gBACtB,OAAO,CAAC,IAAI,CAAC,IAAA,2BAAe,EAAC,IAAI,CAAC,CAAC,CAAC;YACtC,CAAC;YAED,yBAAyB;YACzB,MAAM,YAAY,GAAG,IAAA,gCAAiB,EAAC,OAAO,CAAC,CAAC;YAEhD,kBAAkB;YAClB,cAAc,CAAC,MAAM,CAAC,IAAI,EAAE,kBAAkB,EAAE,CAAC,CAAC,CAAC;YAEnD,qCAAqC;YACrC,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;gBACjB,IAAI,YAAY,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;oBACvC,IAAI,YAAY,CAAC,MAAM,KAAK,UAAU,IAAI,MAAM,CAAC,KAAK,CAAC,UAAU,EAAE,CAAC;wBAClE,MAAM,CAAC,KAAK,CAAC,UAAU,CAAC,YAAY,CAAC,CAAC;oBACxC,CAAC;oBACD,IAAI,YAAY,CAAC,MAAM,KAAK,UAAU,IAAI,MAAM,CAAC,KAAK,CAAC,UAAU,EAAE,CAAC;wBAClE,MAAM,CAAC,KAAK,CAAC,UAAU,CAAC,YAAY,CAAC,CAAC;oBACxC,CAAC;oBACD,IAAI,UAAU,KAAK,QAAQ,IAAI,YAAY,CAAC,MAAM,KAAK,QAAQ,IAAI,MAAM,CAAC,KAAK,CAAC,UAAU,EAAE,CAAC;wBAC3F,MAAM,CAAC,KAAK,CAAC,UAAU,CAAC,YAAY,CAAC,CAAC;oBACxC,CAAC;gBACH,CAAC;gBACD,IAAI,MAAM,CAAC,KAAK,CAAC,aAAa,EAAE,CAAC;oBAC/B,MAAM,CAAC,KAAK,CAAC,aAAa,CAAC,YAAY,CAAC,CAAC;gBAC3C,CAAC;YACH,CAAC;YAED,UAAU,GAAG,YAAY,CAAC,MAAM,CAAC;YAEjC,OAAO;gBACL,GAAG,IAAI;gBACP,SAAS,EAAE,YAAY;aACxB,CAAC;QACJ,CAAC;QAED,WAAW;YACT,OAAO,cAAc,CAAC,GAAG,EAAE,CAAC;QAC9B,CAAC;QAED,aAAa;YACX,OAAO,UAAU,CAAC;QACpB,CAAC;QAED,aAAa;YACX,cAAc,CAAC,KAAK,EAAE,CAAC;YACvB,UAAU,GAAG,QAAQ,CAAC;QACxB,CAAC;KACF,CAAC;AACJ,CAAC","sourcesContent":["/**\n * LLM Monitor Wrapper\n * \n * Drop-in wrapper that adds health monitoring to any LLM client.\n * Tracks latency, token rate, response fingerprint, and overall health.\n * \n * WHAT THIS DOES:\n * ✅ Wraps any LLM client with health monitoring\n * ✅ Tracks performance metrics over time\n * ✅ Detects behavioral drift and anomalies\n * ✅ Provides lifecycle hooks for health changes\n * ✅ Returns health report with each response\n * \n * WHAT THIS DOES NOT DO:\n * ❌ Modify LLM responses\n * ❌ Store prompts or responses (ephemeral only)\n * ❌ Make predictions about LLM behavior\n * ❌ Block or filter responses (monitoring only)\n * \n * PRIVACY GUARANTEE:\n * - No data is stored or transmitted\n * - All analysis is in-memory and ephemeral\n * - Prompts and responses are not logged\n * \n * @module wrapper/monitorLLM\n * @author Haiec\n * @license MIT\n */\n\nimport { \n  CallRecord, \n  MonitorConfig, \n  HealthReport, \n  HealthStatus,\n  ResponseFingerprint \n} from '../types/runtime';\nimport { BaselineEngine } from '../engines/runtime/baseline';\nimport { LatencyEngine } from '../engines/runtime/latency';\nimport { TokenRateEngine } from '../engines/runtime/token-rate';\nimport { FingerprintEngine, extractFingerprint } from '../engines/runtime/fingerprint';\nimport { StructureEngine } from '../engines/runtime/structure';\nimport { HealthScoreEngine } from '../engines/runtime/health-score';\nimport { LlmClient, LlmRequest, LlmResponse } from '../adapters/types';\n\n/**\n * Generic LLM client interface (legacy).\n * Any client with a generate method can be wrapped.\n * @deprecated Use LlmClient from adapters for new code\n */\nexport interface LLMClient {\n  generate(opts: GenerateOptions): Promise<GenerateResponse>;\n  [key: string]: unknown;\n}\n\nexport interface GenerateOptions {\n  prompt: string;\n  model?: string;\n  system?: string;\n  temperature?: number;\n  maxTokens?: number;\n  [key: string]: unknown;\n}\n\nexport interface GenerateResponse {\n  text: string;\n  tokens?: number;\n  totalTokens?: number;\n  model?: string;\n  finishReason?: string;\n  [key: string]: unknown;\n}\n\n/**\n * Union type for any supported client.\n */\nexport type AnyLLMClient = LLMClient | LlmClient;\n\n/**\n * Response from monitored client includes health report.\n */\nexport interface MonitoredResponse extends GenerateResponse {\n  llmverify: HealthReport;\n}\n\n/**\n * Monitored client interface.\n */\nexport interface MonitoredClient {\n  generate(opts: GenerateOptions): Promise<MonitoredResponse>;\n  getBaseline(): ReturnType<BaselineEngine['get']>;\n  getLastHealth(): HealthStatus;\n  resetBaseline(): void;\n}\n\n/**\n * Generates a UUID v4 (browser and Node.js compatible).\n */\nfunction generateUUID(): string {\n  if (typeof crypto !== 'undefined' && crypto.randomUUID) {\n    return crypto.randomUUID();\n  }\n  // Fallback for older environments\n  return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, (c) => {\n    const r = (Math.random() * 16) | 0;\n    const v = c === 'x' ? r : (r & 0x3) | 0x8;\n    return v.toString(16);\n  });\n}\n\n/**\n * Wraps an LLM client with health monitoring.\n * \n * @param originalClient - The LLM client to wrap\n * @param config - Optional monitoring configuration\n * @returns Monitored client with health tracking\n * \n * @example\n * // Basic usage\n * import { monitorLLM } from 'llmverify';\n * \n * const client = monitorLLM(openaiClient);\n * const response = await client.generate({ prompt: 'Hello' });\n * console.log(response.llmverify.health); // 'stable'\n * \n * @example\n * // With hooks\n * const client = monitorLLM(openaiClient, {\n *   hooks: {\n *     onUnstable: (report) => alert('LLM unstable!'),\n *     onDegraded: (report) => console.warn('LLM degraded'),\n *     onRecovery: (report) => console.log('LLM recovered')\n *   }\n * });\n * \n * @example\n * // With custom thresholds\n * const client = monitorLLM(openaiClient, {\n *   thresholds: {\n *     latencyWarnRatio: 1.5,\n *     latencyErrorRatio: 4.0\n *   },\n *   learningRate: 0.2\n * });\n */\n/**\n * Checks if client is a unified LlmClient from adapters.\n */\nfunction isLlmClient(client: AnyLLMClient): client is LlmClient {\n  return 'provider' in client && typeof (client as LlmClient).provider === 'string';\n}\n\nexport function monitorLLM(\n  originalClient: AnyLLMClient,\n  config: MonitorConfig = {}\n): MonitoredClient {\n  const baselineEngine = new BaselineEngine(\n    config.learningRate ?? 0.1,\n    config.minSamplesForBaseline ?? 5\n  );\n  \n  let lastHealth: HealthStatus = 'stable';\n\n  // Engine enable flags (all enabled by default)\n  const engines = {\n    latency: config.engines?.latency ?? true,\n    tokenRate: config.engines?.tokenRate ?? true,\n    fingerprint: config.engines?.fingerprint ?? true,\n    structure: config.engines?.structure ?? true\n  };\n\n  return {\n    async generate(opts: GenerateOptions): Promise<MonitoredResponse> {\n      const start = Date.now();\n      \n      // Call original client\n      const resp = await originalClient.generate(opts);\n      \n      const end = Date.now();\n\n      // Build call record\n      const call: CallRecord = {\n        id: generateUUID(),\n        timestamp: start,\n        prompt: opts.prompt,\n        model: opts.model || 'unknown',\n        responseText: resp.text || '',\n        responseTokens: resp.tokens ?? (resp.text?.split(/\\s+/).length || 0),\n        latencyMs: end - start\n      };\n\n      // Get current baseline\n      const baseline = baselineEngine.get();\n\n      // Run enabled engines\n      const results = [];\n\n      if (engines.latency) {\n        results.push(LatencyEngine(call, baseline, {\n          warnRatio: config.thresholds?.latencyWarnRatio,\n          errorRatio: config.thresholds?.latencyErrorRatio\n        }));\n      }\n\n      if (engines.tokenRate) {\n        results.push(TokenRateEngine(call, baseline, {\n          warnRatio: config.thresholds?.tokenRateWarnRatio,\n          errorRatio: config.thresholds?.tokenRateErrorRatio\n        }));\n      }\n\n      let currentFingerprint: ResponseFingerprint | undefined;\n      if (engines.fingerprint) {\n        const fingerprintResult = FingerprintEngine(call, baseline.fingerprint);\n        results.push(fingerprintResult);\n        currentFingerprint = fingerprintResult.details.curr as ResponseFingerprint;\n      } else {\n        currentFingerprint = extractFingerprint(call.responseText);\n      }\n\n      if (engines.structure) {\n        results.push(StructureEngine(call));\n      }\n\n      // Calculate health score\n      const healthReport = HealthScoreEngine(results);\n\n      // Update baseline\n      baselineEngine.update(call, currentFingerprint, 1);\n\n      // Fire hooks on health state changes\n      if (config.hooks) {\n        if (healthReport.health !== lastHealth) {\n          if (healthReport.health === 'unstable' && config.hooks.onUnstable) {\n            config.hooks.onUnstable(healthReport);\n          }\n          if (healthReport.health === 'degraded' && config.hooks.onDegraded) {\n            config.hooks.onDegraded(healthReport);\n          }\n          if (lastHealth !== 'stable' && healthReport.health === 'stable' && config.hooks.onRecovery) {\n            config.hooks.onRecovery(healthReport);\n          }\n        }\n        if (config.hooks.onHealthCheck) {\n          config.hooks.onHealthCheck(healthReport);\n        }\n      }\n\n      lastHealth = healthReport.health;\n\n      return {\n        ...resp,\n        llmverify: healthReport\n      };\n    },\n\n    getBaseline() {\n      return baselineEngine.get();\n    },\n\n    getLastHealth() {\n      return lastHealth;\n    },\n\n    resetBaseline() {\n      baselineEngine.reset();\n      lastHealth = 'stable';\n    }\n  };\n}\n"]}