UNPKG

llmverify

Version:

AI Output Verification Toolkit — Local-first LLM safety, hallucination detection, PII redaction, prompt injection defense, and runtime monitoring. Zero telemetry. OWASP LLM Top 10 aligned.

157 lines 15.9 kB
"use strict"; /** * Health Score Engine * * Aggregates engine results into a composite health score. * Provides actionable health status and recommendations. * * WHAT THIS DOES: * ✅ Combines multiple engine results * ✅ Calculates weighted health score * ✅ Provides actionable status levels * ✅ Generates recommendations * * WHAT THIS DOES NOT DO: * ❌ Predict future health * ❌ Identify root causes * ❌ Guarantee accuracy of individual engines * * @module engines/runtime/health-score * @author Haiec * @license MIT */ Object.defineProperty(exports, "__esModule", { value: true }); exports.HealthScoreEngine = HealthScoreEngine; exports.isHealthy = isHealthy; exports.getAlertLevel = getAlertLevel; /** * Default weights for each metric. * Consistency and latency are weighted higher as they're most impactful. */ const DEFAULT_WEIGHTS = { consistency: 0.25, structure: 0.15, latency: 0.25, token_rate: 0.15, fingerprint: 0.20 }; /** * Generates recommendations based on engine results. */ function generateRecommendations(results) { const recommendations = []; for (const result of results) { if (result.status === 'error') { switch (result.metric) { case 'latency': recommendations.push('High latency detected. Consider checking provider status or implementing retry logic.'); break; case 'token_rate': recommendations.push('Low token rate detected. Provider may be throttling or experiencing issues.'); break; case 'fingerprint': recommendations.push('Response structure has changed significantly. Verify model behavior.'); break; case 'structure': recommendations.push('Structural anomalies detected. Check for truncation or format issues.'); break; case 'consistency': recommendations.push('Response consistency issues detected. Consider implementing validation.'); break; } } else if (result.status === 'warn') { switch (result.metric) { case 'latency': recommendations.push('Latency slightly elevated. Monitor for trends.'); break; case 'token_rate': recommendations.push('Token rate below normal. May indicate early throttling.'); break; case 'fingerprint': recommendations.push('Minor structural drift detected. May be normal variation.'); break; } } } return recommendations; } /** * Aggregates engine results into a health report. * * @param results - Array of engine results to aggregate * @param weights - Optional custom weights for each metric * @returns Comprehensive health report * * @example * const results = [latencyResult, tokenRateResult, fingerprintResult]; * const report = HealthScoreEngine(results); * * if (report.health === 'unstable') { * alert('LLM health critical!'); * } */ function HealthScoreEngine(results, weights) { const effectiveWeights = { ...DEFAULT_WEIGHTS, ...weights }; // Calculate weighted score let score = 0; let totalWeight = 0; for (const result of results) { const weight = effectiveWeights[result.metric] || 0.1; score += result.value * weight; totalWeight += weight; } // Normalize score score = totalWeight > 0 ? Math.min(1, score / totalWeight * Object.keys(effectiveWeights).length / results.length) : 0; score = Math.min(1, score); // Determine health status let health; if (score <= 0.25) { health = 'stable'; } else if (score <= 0.5) { health = 'minor_variation'; } else if (score <= 0.75) { health = 'degraded'; } else { health = 'unstable'; } // Check for any critical errors that should override const hasError = results.some(r => r.status === 'error'); if (hasError && health === 'stable') { health = 'minor_variation'; } // Generate recommendations const recommendations = generateRecommendations(results); return { health, score: Math.round(score * 100) / 100, engineResults: results, timestamp: Date.now(), recommendations: recommendations.length > 0 ? recommendations : undefined }; } /** * Quick health check - returns true if healthy. */ function isHealthy(report) { return report.health === 'stable' || report.health === 'minor_variation'; } /** * Gets severity level for alerting. */ function getAlertLevel(report) { switch (report.health) { case 'stable': return 'none'; case 'minor_variation': return 'info'; case 'degraded': return 'warning'; case 'unstable': return 'critical'; } } //# sourceMappingURL=data:application/json;base64,{"version":3,"file":"health-score.js","sourceRoot":"","sources":["../../../src/engines/runtime/health-score.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;;AA0EH,8CAgDC;AAKD,8BAEC;AAKD,sCAWC;AA7ID;;;GAGG;AACH,MAAM,eAAe,GAA2B;IAC9C,WAAW,EAAE,IAAI;IACjB,SAAS,EAAE,IAAI;IACf,OAAO,EAAE,IAAI;IACb,UAAU,EAAE,IAAI;IAChB,WAAW,EAAE,IAAI;CAClB,CAAC;AAEF;;GAEG;AACH,SAAS,uBAAuB,CAAC,OAAuB;IACtD,MAAM,eAAe,GAAa,EAAE,CAAC;IAErC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,IAAI,MAAM,CAAC,MAAM,KAAK,OAAO,EAAE,CAAC;YAC9B,QAAQ,MAAM,CAAC,MAAM,EAAE,CAAC;gBACtB,KAAK,SAAS;oBACZ,eAAe,CAAC,IAAI,CAAC,uFAAuF,CAAC,CAAC;oBAC9G,MAAM;gBACR,KAAK,YAAY;oBACf,eAAe,CAAC,IAAI,CAAC,6EAA6E,CAAC,CAAC;oBACpG,MAAM;gBACR,KAAK,aAAa;oBAChB,eAAe,CAAC,IAAI,CAAC,sEAAsE,CAAC,CAAC;oBAC7F,MAAM;gBACR,KAAK,WAAW;oBACd,eAAe,CAAC,IAAI,CAAC,uEAAuE,CAAC,CAAC;oBAC9F,MAAM;gBACR,KAAK,aAAa;oBAChB,eAAe,CAAC,IAAI,CAAC,yEAAyE,CAAC,CAAC;oBAChG,MAAM;YACV,CAAC;QACH,CAAC;aAAM,IAAI,MAAM,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;YACpC,QAAQ,MAAM,CAAC,MAAM,EAAE,CAAC;gBACtB,KAAK,SAAS;oBACZ,eAAe,CAAC,IAAI,CAAC,gDAAgD,CAAC,CAAC;oBACvE,MAAM;gBACR,KAAK,YAAY;oBACf,eAAe,CAAC,IAAI,CAAC,yDAAyD,CAAC,CAAC;oBAChF,MAAM;gBACR,KAAK,aAAa;oBAChB,eAAe,CAAC,IAAI,CAAC,2DAA2D,CAAC,CAAC;oBAClF,MAAM;YACV,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,eAAe,CAAC;AACzB,CAAC;AAED;;;;;;;;;;;;;;GAcG;AACH,SAAgB,iBAAiB,CAC/B,OAAuB,EACvB,OAAgC;IAEhC,MAAM,gBAAgB,GAAG,EAAE,GAAG,eAAe,EAAE,GAAG,OAAO,EAAE,CAAC;IAE5D,2BAA2B;IAC3B,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,WAAW,GAAG,CAAC,CAAC;IAEpB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,MAAM,GAAG,gBAAgB,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,GAAG,CAAC;QACtD,KAAK,IAAI,MAAM,CAAC,KAAK,GAAG,MAAM,CAAC;QAC/B,WAAW,IAAI,MAAM,CAAC;IACxB,CAAC;IAED,kBAAkB;IAClB,KAAK,GAAG,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,GAAG,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACvH,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;IAE3B,0BAA0B;IAC1B,IAAI,MAAoB,CAAC;IACzB,IAAI,KAAK,IAAI,IAAI,EAAE,CAAC;QAClB,MAAM,GAAG,QAAQ,CAAC;IACpB,CAAC;SAAM,IAAI,KAAK,IAAI,GAAG,EAAE,CAAC;QACxB,MAAM,GAAG,iBAAiB,CAAC;IAC7B,CAAC;SAAM,IAAI,KAAK,IAAI,IAAI,EAAE,CAAC;QACzB,MAAM,GAAG,UAAU,CAAC;IACtB,CAAC;SAAM,CAAC;QACN,MAAM,GAAG,UAAU,CAAC;IACtB,CAAC;IAED,qDAAqD;IACrD,MAAM,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,OAAO,CAAC,CAAC;IACzD,IAAI,QAAQ,IAAI,MAAM,KAAK,QAAQ,EAAE,CAAC;QACpC,MAAM,GAAG,iBAAiB,CAAC;IAC7B,CAAC;IAED,2BAA2B;IAC3B,MAAM,eAAe,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;IAEzD,OAAO;QACL,MAAM;QACN,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,GAAG,CAAC,GAAG,GAAG;QACpC,aAAa,EAAE,OAAO;QACtB,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;QACrB,eAAe,EAAE,eAAe,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,SAAS;KAC1E,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAgB,SAAS,CAAC,MAAoB;IAC5C,OAAO,MAAM,CAAC,MAAM,KAAK,QAAQ,IAAI,MAAM,CAAC,MAAM,KAAK,iBAAiB,CAAC;AAC3E,CAAC;AAED;;GAEG;AACH,SAAgB,aAAa,CAAC,MAAoB;IAChD,QAAQ,MAAM,CAAC,MAAM,EAAE,CAAC;QACtB,KAAK,QAAQ;YACX,OAAO,MAAM,CAAC;QAChB,KAAK,iBAAiB;YACpB,OAAO,MAAM,CAAC;QAChB,KAAK,UAAU;YACb,OAAO,SAAS,CAAC;QACnB,KAAK,UAAU;YACb,OAAO,UAAU,CAAC;IACtB,CAAC;AACH,CAAC","sourcesContent":["/**\n * Health Score Engine\n * \n * Aggregates engine results into a composite health score.\n * Provides actionable health status and recommendations.\n * \n * WHAT THIS DOES:\n * ✅ Combines multiple engine results\n * ✅ Calculates weighted health score\n * ✅ Provides actionable status levels\n * ✅ Generates recommendations\n * \n * WHAT THIS DOES NOT DO:\n * ❌ Predict future health\n * ❌ Identify root causes\n * ❌ Guarantee accuracy of individual engines\n * \n * @module engines/runtime/health-score\n * @author Haiec\n * @license MIT\n */\n\nimport { EngineResult, HealthReport, HealthStatus } from '../../types/runtime';\n\n/**\n * Default weights for each metric.\n * Consistency and latency are weighted higher as they're most impactful.\n */\nconst DEFAULT_WEIGHTS: Record<string, number> = {\n  consistency: 0.25,\n  structure: 0.15,\n  latency: 0.25,\n  token_rate: 0.15,\n  fingerprint: 0.20\n};\n\n/**\n * Generates recommendations based on engine results.\n */\nfunction generateRecommendations(results: EngineResult[]): string[] {\n  const recommendations: string[] = [];\n\n  for (const result of results) {\n    if (result.status === 'error') {\n      switch (result.metric) {\n        case 'latency':\n          recommendations.push('High latency detected. Consider checking provider status or implementing retry logic.');\n          break;\n        case 'token_rate':\n          recommendations.push('Low token rate detected. Provider may be throttling or experiencing issues.');\n          break;\n        case 'fingerprint':\n          recommendations.push('Response structure has changed significantly. Verify model behavior.');\n          break;\n        case 'structure':\n          recommendations.push('Structural anomalies detected. Check for truncation or format issues.');\n          break;\n        case 'consistency':\n          recommendations.push('Response consistency issues detected. Consider implementing validation.');\n          break;\n      }\n    } else if (result.status === 'warn') {\n      switch (result.metric) {\n        case 'latency':\n          recommendations.push('Latency slightly elevated. Monitor for trends.');\n          break;\n        case 'token_rate':\n          recommendations.push('Token rate below normal. May indicate early throttling.');\n          break;\n        case 'fingerprint':\n          recommendations.push('Minor structural drift detected. May be normal variation.');\n          break;\n      }\n    }\n  }\n\n  return recommendations;\n}\n\n/**\n * Aggregates engine results into a health report.\n * \n * @param results - Array of engine results to aggregate\n * @param weights - Optional custom weights for each metric\n * @returns Comprehensive health report\n * \n * @example\n * const results = [latencyResult, tokenRateResult, fingerprintResult];\n * const report = HealthScoreEngine(results);\n * \n * if (report.health === 'unstable') {\n *   alert('LLM health critical!');\n * }\n */\nexport function HealthScoreEngine(\n  results: EngineResult[],\n  weights?: Record<string, number>\n): HealthReport {\n  const effectiveWeights = { ...DEFAULT_WEIGHTS, ...weights };\n\n  // Calculate weighted score\n  let score = 0;\n  let totalWeight = 0;\n\n  for (const result of results) {\n    const weight = effectiveWeights[result.metric] || 0.1;\n    score += result.value * weight;\n    totalWeight += weight;\n  }\n\n  // Normalize score\n  score = totalWeight > 0 ? Math.min(1, score / totalWeight * Object.keys(effectiveWeights).length / results.length) : 0;\n  score = Math.min(1, score);\n\n  // Determine health status\n  let health: HealthStatus;\n  if (score <= 0.25) {\n    health = 'stable';\n  } else if (score <= 0.5) {\n    health = 'minor_variation';\n  } else if (score <= 0.75) {\n    health = 'degraded';\n  } else {\n    health = 'unstable';\n  }\n\n  // Check for any critical errors that should override\n  const hasError = results.some(r => r.status === 'error');\n  if (hasError && health === 'stable') {\n    health = 'minor_variation';\n  }\n\n  // Generate recommendations\n  const recommendations = generateRecommendations(results);\n\n  return {\n    health,\n    score: Math.round(score * 100) / 100,\n    engineResults: results,\n    timestamp: Date.now(),\n    recommendations: recommendations.length > 0 ? recommendations : undefined\n  };\n}\n\n/**\n * Quick health check - returns true if healthy.\n */\nexport function isHealthy(report: HealthReport): boolean {\n  return report.health === 'stable' || report.health === 'minor_variation';\n}\n\n/**\n * Gets severity level for alerting.\n */\nexport function getAlertLevel(report: HealthReport): 'none' | 'info' | 'warning' | 'critical' {\n  switch (report.health) {\n    case 'stable':\n      return 'none';\n    case 'minor_variation':\n      return 'info';\n    case 'degraded':\n      return 'warning';\n    case 'unstable':\n      return 'critical';\n  }\n}\n"]}