llmverify
Version:
AI Output Verification Toolkit — Local-first LLM safety, hallucination detection, PII redaction, prompt injection defense, and runtime monitoring. Zero telemetry. OWASP LLM Top 10 aligned.
157 lines • 15.9 kB
JavaScript
;
/**
* Health Score Engine
*
* Aggregates engine results into a composite health score.
* Provides actionable health status and recommendations.
*
* WHAT THIS DOES:
* ✅ Combines multiple engine results
* ✅ Calculates weighted health score
* ✅ Provides actionable status levels
* ✅ Generates recommendations
*
* WHAT THIS DOES NOT DO:
* ❌ Predict future health
* ❌ Identify root causes
* ❌ Guarantee accuracy of individual engines
*
* @module engines/runtime/health-score
* @author Haiec
* @license MIT
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.HealthScoreEngine = HealthScoreEngine;
exports.isHealthy = isHealthy;
exports.getAlertLevel = getAlertLevel;
/**
* Default weights for each metric.
* Consistency and latency are weighted higher as they're most impactful.
*/
const DEFAULT_WEIGHTS = {
consistency: 0.25,
structure: 0.15,
latency: 0.25,
token_rate: 0.15,
fingerprint: 0.20
};
/**
* Generates recommendations based on engine results.
*/
function generateRecommendations(results) {
const recommendations = [];
for (const result of results) {
if (result.status === 'error') {
switch (result.metric) {
case 'latency':
recommendations.push('High latency detected. Consider checking provider status or implementing retry logic.');
break;
case 'token_rate':
recommendations.push('Low token rate detected. Provider may be throttling or experiencing issues.');
break;
case 'fingerprint':
recommendations.push('Response structure has changed significantly. Verify model behavior.');
break;
case 'structure':
recommendations.push('Structural anomalies detected. Check for truncation or format issues.');
break;
case 'consistency':
recommendations.push('Response consistency issues detected. Consider implementing validation.');
break;
}
}
else if (result.status === 'warn') {
switch (result.metric) {
case 'latency':
recommendations.push('Latency slightly elevated. Monitor for trends.');
break;
case 'token_rate':
recommendations.push('Token rate below normal. May indicate early throttling.');
break;
case 'fingerprint':
recommendations.push('Minor structural drift detected. May be normal variation.');
break;
}
}
}
return recommendations;
}
/**
* Aggregates engine results into a health report.
*
* @param results - Array of engine results to aggregate
* @param weights - Optional custom weights for each metric
* @returns Comprehensive health report
*
* @example
* const results = [latencyResult, tokenRateResult, fingerprintResult];
* const report = HealthScoreEngine(results);
*
* if (report.health === 'unstable') {
* alert('LLM health critical!');
* }
*/
function HealthScoreEngine(results, weights) {
const effectiveWeights = { ...DEFAULT_WEIGHTS, ...weights };
// Calculate weighted score
let score = 0;
let totalWeight = 0;
for (const result of results) {
const weight = effectiveWeights[result.metric] || 0.1;
score += result.value * weight;
totalWeight += weight;
}
// Normalize score
score = totalWeight > 0 ? Math.min(1, score / totalWeight * Object.keys(effectiveWeights).length / results.length) : 0;
score = Math.min(1, score);
// Determine health status
let health;
if (score <= 0.25) {
health = 'stable';
}
else if (score <= 0.5) {
health = 'minor_variation';
}
else if (score <= 0.75) {
health = 'degraded';
}
else {
health = 'unstable';
}
// Check for any critical errors that should override
const hasError = results.some(r => r.status === 'error');
if (hasError && health === 'stable') {
health = 'minor_variation';
}
// Generate recommendations
const recommendations = generateRecommendations(results);
return {
health,
score: Math.round(score * 100) / 100,
engineResults: results,
timestamp: Date.now(),
recommendations: recommendations.length > 0 ? recommendations : undefined
};
}
/**
* Quick health check - returns true if healthy.
*/
function isHealthy(report) {
return report.health === 'stable' || report.health === 'minor_variation';
}
/**
* Gets severity level for alerting.
*/
function getAlertLevel(report) {
switch (report.health) {
case 'stable':
return 'none';
case 'minor_variation':
return 'info';
case 'degraded':
return 'warning';
case 'unstable':
return 'critical';
}
}
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"health-score.js","sourceRoot":"","sources":["../../../src/engines/runtime/health-score.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;;AA0EH,8CAgDC;AAKD,8BAEC;AAKD,sCAWC;AA7ID;;;GAGG;AACH,MAAM,eAAe,GAA2B;IAC9C,WAAW,EAAE,IAAI;IACjB,SAAS,EAAE,IAAI;IACf,OAAO,EAAE,IAAI;IACb,UAAU,EAAE,IAAI;IAChB,WAAW,EAAE,IAAI;CAClB,CAAC;AAEF;;GAEG;AACH,SAAS,uBAAuB,CAAC,OAAuB;IACtD,MAAM,eAAe,GAAa,EAAE,CAAC;IAErC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,IAAI,MAAM,CAAC,MAAM,KAAK,OAAO,EAAE,CAAC;YAC9B,QAAQ,MAAM,CAAC,MAAM,EAAE,CAAC;gBACtB,KAAK,SAAS;oBACZ,eAAe,CAAC,IAAI,CAAC,uFAAuF,CAAC,CAAC;oBAC9G,MAAM;gBACR,KAAK,YAAY;oBACf,eAAe,CAAC,IAAI,CAAC,6EAA6E,CAAC,CAAC;oBACpG,MAAM;gBACR,KAAK,aAAa;oBAChB,eAAe,CAAC,IAAI,CAAC,sEAAsE,CAAC,CAAC;oBAC7F,MAAM;gBACR,KAAK,WAAW;oBACd,eAAe,CAAC,IAAI,CAAC,uEAAuE,CAAC,CAAC;oBAC9F,MAAM;gBACR,KAAK,aAAa;oBAChB,eAAe,CAAC,IAAI,CAAC,yEAAyE,CAAC,CAAC;oBAChG,MAAM;YACV,CAAC;QACH,CAAC;aAAM,IAAI,MAAM,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;YACpC,QAAQ,MAAM,CAAC,MAAM,EAAE,CAAC;gBACtB,KAAK,SAAS;oBACZ,eAAe,CAAC,IAAI,CAAC,gDAAgD,CAAC,CAAC;oBACvE,MAAM;gBACR,KAAK,YAAY;oBACf,eAAe,CAAC,IAAI,CAAC,yDAAyD,CAAC,CAAC;oBAChF,MAAM;gBACR,KAAK,aAAa;oBAChB,eAAe,CAAC,IAAI,CAAC,2DAA2D,CAAC,CAAC;oBAClF,MAAM;YACV,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,eAAe,CAAC;AACzB,CAAC;AAED;;;;;;;;;;;;;;GAcG;AACH,SAAgB,iBAAiB,CAC/B,OAAuB,EACvB,OAAgC;IAEhC,MAAM,gBAAgB,GAAG,EAAE,GAAG,eAAe,EAAE,GAAG,OAAO,EAAE,CAAC;IAE5D,2BAA2B;IAC3B,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,WAAW,GAAG,CAAC,CAAC;IAEpB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,MAAM,GAAG,gBAAgB,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,GAAG,CAAC;QACtD,KAAK,IAAI,MAAM,CAAC,KAAK,GAAG,MAAM,CAAC;QAC/B,WAAW,IAAI,MAAM,CAAC;IACxB,CAAC;IAED,kBAAkB;IAClB,KAAK,GAAG,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,GAAG,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACvH,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;IAE3B,0BAA0B;IAC1B,IAAI,MAAoB,CAAC;IACzB,IAAI,KAAK,IAAI,IAAI,EAAE,CAAC;QAClB,MAAM,GAAG,QAAQ,CAAC;IACpB,CAAC;SAAM,IAAI,KAAK,IAAI,GAAG,EAAE,CAAC;QACxB,MAAM,GAAG,iBAAiB,CAAC;IAC7B,CAAC;SAAM,IAAI,KAAK,IAAI,IAAI,EAAE,CAAC;QACzB,MAAM,GAAG,UAAU,CAAC;IACtB,CAAC;SAAM,CAAC;QACN,MAAM,GAAG,UAAU,CAAC;IACtB,CAAC;IAED,qDAAqD;IACrD,MAAM,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,OAAO,CAAC,CAAC;IACzD,IAAI,QAAQ,IAAI,MAAM,KAAK,QAAQ,EAAE,CAAC;QACpC,MAAM,GAAG,iBAAiB,CAAC;IAC7B,CAAC;IAED,2BAA2B;IAC3B,MAAM,eAAe,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;IAEzD,OAAO;QACL,MAAM;QACN,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,GAAG,CAAC,GAAG,GAAG;QACpC,aAAa,EAAE,OAAO;QACtB,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;QACrB,eAAe,EAAE,eAAe,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,SAAS;KAC1E,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAgB,SAAS,CAAC,MAAoB;IAC5C,OAAO,MAAM,CAAC,MAAM,KAAK,QAAQ,IAAI,MAAM,CAAC,MAAM,KAAK,iBAAiB,CAAC;AAC3E,CAAC;AAED;;GAEG;AACH,SAAgB,aAAa,CAAC,MAAoB;IAChD,QAAQ,MAAM,CAAC,MAAM,EAAE,CAAC;QACtB,KAAK,QAAQ;YACX,OAAO,MAAM,CAAC;QAChB,KAAK,iBAAiB;YACpB,OAAO,MAAM,CAAC;QAChB,KAAK,UAAU;YACb,OAAO,SAAS,CAAC;QACnB,KAAK,UAAU;YACb,OAAO,UAAU,CAAC;IACtB,CAAC;AACH,CAAC","sourcesContent":["/**\n * Health Score Engine\n * \n * Aggregates engine results into a composite health score.\n * Provides actionable health status and recommendations.\n * \n * WHAT THIS DOES:\n * ✅ Combines multiple engine results\n * ✅ Calculates weighted health score\n * ✅ Provides actionable status levels\n * ✅ Generates recommendations\n * \n * WHAT THIS DOES NOT DO:\n * ❌ Predict future health\n * ❌ Identify root causes\n * ❌ Guarantee accuracy of individual engines\n * \n * @module engines/runtime/health-score\n * @author Haiec\n * @license MIT\n */\n\nimport { EngineResult, HealthReport, HealthStatus } from '../../types/runtime';\n\n/**\n * Default weights for each metric.\n * Consistency and latency are weighted higher as they're most impactful.\n */\nconst DEFAULT_WEIGHTS: Record<string, number> = {\n  consistency: 0.25,\n  structure: 0.15,\n  latency: 0.25,\n  token_rate: 0.15,\n  fingerprint: 0.20\n};\n\n/**\n * Generates recommendations based on engine results.\n */\nfunction generateRecommendations(results: EngineResult[]): string[] {\n  const recommendations: string[] = [];\n\n  for (const result of results) {\n    if (result.status === 'error') {\n      switch (result.metric) {\n        case 'latency':\n          recommendations.push('High latency detected. Consider checking provider status or implementing retry logic.');\n          break;\n        case 'token_rate':\n          recommendations.push('Low token rate detected. Provider may be throttling or experiencing issues.');\n          break;\n        case 'fingerprint':\n          recommendations.push('Response structure has changed significantly. Verify model behavior.');\n          break;\n        case 'structure':\n          recommendations.push('Structural anomalies detected. Check for truncation or format issues.');\n          break;\n        case 'consistency':\n          recommendations.push('Response consistency issues detected. Consider implementing validation.');\n          break;\n      }\n    } else if (result.status === 'warn') {\n      switch (result.metric) {\n        case 'latency':\n          recommendations.push('Latency slightly elevated. Monitor for trends.');\n          break;\n        case 'token_rate':\n          recommendations.push('Token rate below normal. May indicate early throttling.');\n          break;\n        case 'fingerprint':\n          recommendations.push('Minor structural drift detected. May be normal variation.');\n          break;\n      }\n    }\n  }\n\n  return recommendations;\n}\n\n/**\n * Aggregates engine results into a health report.\n * \n * @param results - Array of engine results to aggregate\n * @param weights - Optional custom weights for each metric\n * @returns Comprehensive health report\n * \n * @example\n * const results = [latencyResult, tokenRateResult, fingerprintResult];\n * const report = HealthScoreEngine(results);\n * \n * if (report.health === 'unstable') {\n *   alert('LLM health critical!');\n * }\n */\nexport function HealthScoreEngine(\n  results: EngineResult[],\n  weights?: Record<string, number>\n): HealthReport {\n  const effectiveWeights = { ...DEFAULT_WEIGHTS, ...weights };\n\n  // Calculate weighted score\n  let score = 0;\n  let totalWeight = 0;\n\n  for (const result of results) {\n    const weight = effectiveWeights[result.metric] || 0.1;\n    score += result.value * weight;\n    totalWeight += weight;\n  }\n\n  // Normalize score\n  score = totalWeight > 0 ? Math.min(1, score / totalWeight * Object.keys(effectiveWeights).length / results.length) : 0;\n  score = Math.min(1, score);\n\n  // Determine health status\n  let health: HealthStatus;\n  if (score <= 0.25) {\n    health = 'stable';\n  } else if (score <= 0.5) {\n    health = 'minor_variation';\n  } else if (score <= 0.75) {\n    health = 'degraded';\n  } else {\n    health = 'unstable';\n  }\n\n  // Check for any critical errors that should override\n  const hasError = results.some(r => r.status === 'error');\n  if (hasError && health === 'stable') {\n    health = 'minor_variation';\n  }\n\n  // Generate recommendations\n  const recommendations = generateRecommendations(results);\n\n  return {\n    health,\n    score: Math.round(score * 100) / 100,\n    engineResults: results,\n    timestamp: Date.now(),\n    recommendations: recommendations.length > 0 ? recommendations : undefined\n  };\n}\n\n/**\n * Quick health check - returns true if healthy.\n */\nexport function isHealthy(report: HealthReport): boolean {\n  return report.health === 'stable' || report.health === 'minor_variation';\n}\n\n/**\n * Gets severity level for alerting.\n */\nexport function getAlertLevel(report: HealthReport): 'none' | 'info' | 'warning' | 'critical' {\n  switch (report.health) {\n    case 'stable':\n      return 'none';\n    case 'minor_variation':\n      return 'info';\n    case 'degraded':\n      return 'warning';\n    case 'unstable':\n      return 'critical';\n  }\n}\n"]}