devflow-ai
Version: 
Enterprise-grade AI agent orchestration with swarm management UI dashboard
940 lines (774 loc) • 32.2 kB
text/typescript
/**
 * Agent Truth Scorer - Advanced performance evaluation and scoring system
 * 
 * Provides comprehensive agent performance analysis including accuracy,
 * reliability, consistency, efficiency, and adaptability metrics.
 */
import type { ILogger } from '../core/logger.js';
import type {
  TruthMetric,
  AgentTruthScore,
  PerformanceWindow,
  ScoreTrend,
  BenchmarkComparison,
  RiskAssessment,
  RiskFactor,
  TruthTelemetryConfig,
} from './telemetry.js';
export interface AgentScoringConfig {
  windowSizes: {
    recent: number; // minutes
    short: number; // hours
    medium: number; // days
    long: number; // weeks
  };
  weights: {
    accuracy: number;
    reliability: number;
    consistency: number;
    efficiency: number;
    adaptability: number;
  };
  benchmarks: {
    minAccuracy: number;
    minReliability: number;
    minConsistency: number;
    minEfficiency: number;
    targetTasksPerHour: number;
  };
  riskThresholds: {
    low: number;
    medium: number;
    high: number;
  };
}
export interface AgentPerformanceData {
  agentId: string;
  metrics: TruthMetric[];
  recentMetrics: TruthMetric[];
  taskHistory: TaskPerformance[];
  errorHistory: ErrorAnalysis[];
}
export interface TaskPerformance {
  taskId: string;
  timestamp: Date;
  taskType: string;
  complexity: string;
  duration: number;
  accuracy: number;
  success: boolean;
  interventionRequired: boolean;
  errorCount: number;
}
export interface ErrorAnalysis {
  timestamp: Date;
  errorType: string;
  severity: string;
  frequency: number;
  impact: number;
  resolved: boolean;
  pattern: string;
}
export interface TrendAnalysis {
  metric: string;
  timeframe: string;
  direction: 'improving' | 'declining' | 'stable';
  rate: number;
  confidence: number;
  significance: number;
  seasonality?: {
    detected: boolean;
    period: number;
    strength: number;
  };
}
export class AgentTruthScorer {
  private config: AgentScoringConfig;
  private logger: ILogger;
  private telemetryConfig: TruthTelemetryConfig;
  
  // Agent data storage
  private agentData = new Map<string, AgentPerformanceData>();
  private agentScores = new Map<string, AgentTruthScore>();
  
  // Benchmark data
  private benchmarkScores = new Map<string, number>();
  private industryBenchmarks: Map<string, BenchmarkComparison[]> = new Map();
  
  constructor(telemetryConfig: TruthTelemetryConfig, logger: ILogger) {
    this.telemetryConfig = telemetryConfig;
    this.logger = logger;
    
    this.config = {
      windowSizes: {
        recent: 15, // 15 minutes
        short: 4, // 4 hours
        medium: 7, // 7 days
        long: 4, // 4 weeks
      },
      weights: {
        accuracy: 0.30,
        reliability: 0.25,
        consistency: 0.20,
        efficiency: 0.15,
        adaptability: 0.10,
      },
      benchmarks: {
        minAccuracy: 0.90,
        minReliability: 0.85,
        minConsistency: 0.80,
        minEfficiency: 0.75,
        targetTasksPerHour: 10,
      },
      riskThresholds: {
        low: 0.85,
        medium: 0.70,
        high: 0.50,
      },
    };
    
    this.initializeBenchmarks();
  }
  
  async initialize(): Promise<void> {
    this.logger.info('Initializing Agent Truth Scorer', {
      weights: this.config.weights,
      benchmarks: this.config.benchmarks,
    });
    
    await this.loadHistoricalData();
    
    this.logger.info('Agent Truth Scorer initialized successfully');
  }
  
  async shutdown(): Promise<void> {
    this.logger.info('Shutting down Agent Truth Scorer');
    
    await this.persistScoringData();
    
    this.logger.info('Agent Truth Scorer shutdown complete');
  }
  
  async updateAgentMetric(metric: TruthMetric): Promise<void> {
    const agentId = metric.agentId;
    
    // Get or create agent data
    let agentData = this.agentData.get(agentId);
    if (!agentData) {
      agentData = {
        agentId,
        metrics: [],
        recentMetrics: [],
        taskHistory: [],
        errorHistory: [],
      };
      this.agentData.set(agentId, agentData);
    }
    
    // Add metric to agent data
    agentData.metrics.push(metric);
    
    // Update recent metrics (last 15 minutes)
    const recentCutoff = new Date(Date.now() - this.config.windowSizes.recent * 60 * 1000);
    agentData.recentMetrics = agentData.metrics.filter(m => m.timestamp >= recentCutoff);
    
    // Update task history
    await this.updateTaskHistory(agentData, metric);
    
    // Update error history
    await this.updateErrorHistory(agentData, metric);
    
    // Trigger score recalculation for significant updates
    if (this.shouldRecalculateScore(metric)) {
      await this.calculateAgentScore(agentId);
    }
  }
  
  async calculateAgentScore(agentId: string): Promise<AgentTruthScore | null> {
    const agentData = this.agentData.get(agentId);
    if (!agentData || agentData.metrics.length === 0) {
      return null;
    }
    
    try {
      // Calculate component scores
      const components = await this.calculateComponentScores(agentData);
      
      // Calculate overall score using weighted average
      const overallScore = this.calculateWeightedScore(components);
      
      // Generate performance windows
      const recentPerformance = await this.generatePerformanceWindows(agentData);
      
      // Analyze trends
      const trends = await this.analyzeTrends(agentData);
      
      // Generate benchmark comparisons
      const benchmarks = await this.generateBenchmarkComparisons(agentId, components);
      
      // Assess risk
      const riskAssessment = await this.assessRisk(agentData, components);
      
      const score: AgentTruthScore = {
        agentId,
        timestamp: new Date(),
        overallScore,
        components,
        recentPerformance,
        trends,
        benchmarks,
        riskAssessment,
      };
      
      this.agentScores.set(agentId, score);
      
      this.logger.debug('Calculated agent score', {
        agentId,
        overallScore: overallScore.toFixed(3),
        components,
      });
      
      return score;
      
    } catch (error) {
      this.logger.error('Error calculating agent score', { agentId, error });
      return null;
    }
  }
  
  private async calculateComponentScores(agentData: AgentPerformanceData): Promise<{
    accuracy: number;
    reliability: number;
    consistency: number;
    efficiency: number;
    adaptability: number;
  }> {
    const shortTermMetrics = this.getMetricsInWindow(
      agentData.metrics,
      this.config.windowSizes.short * 60 * 60 * 1000 // hours to ms
    );
    
    return {
      accuracy: await this.calculateAccuracyScore(shortTermMetrics),
      reliability: await this.calculateReliabilityScore(shortTermMetrics),
      consistency: await this.calculateConsistencyScore(shortTermMetrics),
      efficiency: await this.calculateEfficiencyScore(agentData),
      adaptability: await this.calculateAdaptabilityScore(agentData),
    };
  }
  
  private async calculateAccuracyScore(metrics: TruthMetric[]): Promise<number> {
    if (metrics.length === 0) return 0;
    
    const accuracyMetrics = metrics.filter(m => m.metricType === 'accuracy');
    if (accuracyMetrics.length === 0) return 0.8; // Default for new agents
    
    // Weighted average with more recent metrics having higher weight
    let totalWeight = 0;
    let weightedSum = 0;
    
    accuracyMetrics.forEach((metric, index) => {
      const age = Date.now() - metric.timestamp.getTime();
      const weight = Math.exp(-age / (24 * 60 * 60 * 1000)); // Exponential decay over 24 hours
      
      totalWeight += weight * metric.confidence;
      weightedSum += metric.value * weight * metric.confidence;
    });
    
    return totalWeight > 0 ? Math.min(1, weightedSum / totalWeight) : 0;
  }
  
  private async calculateReliabilityScore(metrics: TruthMetric[]): Promise<number> {
    if (metrics.length === 0) return 0;
    
    const reliabilityMetrics = metrics.filter(m => m.metricType === 'reliability');
    const validationScores = metrics.map(m => m.validation.score);
    
    // Calculate task success rate
    const successRate = validationScores.filter(score => score >= 0.8).length / validationScores.length;
    
    // Calculate consistency of performance
    const variance = this.calculateVariance(validationScores);
    const consistencyScore = Math.max(0, 1 - variance);
    
    // Combine factors
    return (successRate * 0.7) + (consistencyScore * 0.3);
  }
  
  private async calculateConsistencyScore(metrics: TruthMetric[]): Promise<number> {
    if (metrics.length < 3) return 0.8; // Default for insufficient data
    
    const values = metrics.map(m => m.value);
    const confidences = metrics.map(m => m.confidence);
    
    // Calculate coefficient of variation for values and confidence
    const valueCV = this.calculateCoefficientOfVariation(values);
    const confidenceCV = this.calculateCoefficientOfVariation(confidences);
    
    // Lower CV means higher consistency
    const valueConsistency = Math.max(0, 1 - valueCV);
    const confidenceConsistency = Math.max(0, 1 - confidenceCV);
    
    return (valueConsistency * 0.6) + (confidenceConsistency * 0.4);
  }
  
  private async calculateEfficiencyScore(agentData: AgentPerformanceData): Promise<number> {
    const recentTasks = agentData.taskHistory.filter(
      task => task.timestamp > new Date(Date.now() - this.config.windowSizes.short * 60 * 60 * 1000)
    );
    
    if (recentTasks.length === 0) return 0.8; // Default
    
    // Calculate tasks per hour
    const hoursSpanned = this.config.windowSizes.short;
    const tasksPerHour = recentTasks.length / hoursSpanned;
    const throughputScore = Math.min(1, tasksPerHour / this.config.benchmarks.targetTasksPerHour);
    
    // Calculate average task duration relative to complexity
    const durationEfficiency = this.calculateDurationEfficiency(recentTasks);
    
    // Calculate human intervention rate (lower is better)
    const interventionRate = recentTasks.filter(t => t.interventionRequired).length / recentTasks.length;
    const interventionScore = Math.max(0, 1 - (interventionRate * 2)); // Penalty for interventions
    
    return (throughputScore * 0.4) + (durationEfficiency * 0.4) + (interventionScore * 0.2);
  }
  
  private async calculateAdaptabilityScore(agentData: AgentPerformanceData): Promise<number> {
    const recentTasks = agentData.taskHistory.filter(
      task => task.timestamp > new Date(Date.now() - this.config.windowSizes.medium * 24 * 60 * 60 * 1000)
    );
    
    if (recentTasks.length < 5) return 0.7; // Default for new agents
    
    // Analyze performance across different task types
    const taskTypes = new Set(recentTasks.map(t => t.taskType));
    const performanceByType = new Map<string, number[]>();
    
    recentTasks.forEach(task => {
      if (!performanceByType.has(task.taskType)) {
        performanceByType.set(task.taskType, []);
      }
      performanceByType.get(task.taskType)!.push(task.accuracy);
    });
    
    // Calculate adaptability as consistency across different task types
    let adaptabilitySum = 0;
    let typeCount = 0;
    
    for (const [taskType, accuracies] of performanceByType) {
      if (accuracies.length >= 2) {
        const avgAccuracy = accuracies.reduce((sum, acc) => sum + acc, 0) / accuracies.length;
        adaptabilitySum += avgAccuracy;
        typeCount++;
      }
    }
    
    const adaptabilityScore = typeCount > 0 ? adaptabilitySum / typeCount : 0.7;
    
    // Bonus for handling multiple task types
    const diversityBonus = Math.min(0.1, taskTypes.size * 0.02);
    
    return Math.min(1, adaptabilityScore + diversityBonus);
  }
  
  private calculateWeightedScore(components: {
    accuracy: number;
    reliability: number;
    consistency: number;
    efficiency: number;
    adaptability: number;
  }): number {
    const weights = this.config.weights;
    
    return (
      components.accuracy * weights.accuracy +
      components.reliability * weights.reliability +
      components.consistency * weights.consistency +
      components.efficiency * weights.efficiency +
      components.adaptability * weights.adaptability
    );
  }
  
  private async generatePerformanceWindows(agentData: AgentPerformanceData): Promise<PerformanceWindow[]> {
    const windows: PerformanceWindow[] = [];
    const now = new Date();
    
    // Recent window (last 15 minutes)
    const recentStart = new Date(now.getTime() - this.config.windowSizes.recent * 60 * 1000);
    windows.push(await this.createPerformanceWindow('recent', recentStart, now, agentData));
    
    // Short window (last 4 hours)
    const shortStart = new Date(now.getTime() - this.config.windowSizes.short * 60 * 60 * 1000);
    windows.push(await this.createPerformanceWindow('short', shortStart, now, agentData));
    
    // Medium window (last 7 days)
    const mediumStart = new Date(now.getTime() - this.config.windowSizes.medium * 24 * 60 * 60 * 1000);
    windows.push(await this.createPerformanceWindow('medium', mediumStart, now, agentData));
    
    // Long window (last 4 weeks)
    const longStart = new Date(now.getTime() - this.config.windowSizes.long * 7 * 24 * 60 * 60 * 1000);
    windows.push(await this.createPerformanceWindow('long', longStart, now, agentData));
    
    return windows;
  }
  
  private async createPerformanceWindow(
    period: string,
    startTime: Date,
    endTime: Date,
    agentData: AgentPerformanceData
  ): Promise<PerformanceWindow> {
    const windowMetrics = agentData.metrics.filter(
      m => m.timestamp >= startTime && m.timestamp <= endTime
    );
    
    const windowTasks = agentData.taskHistory.filter(
      t => t.timestamp >= startTime && t.timestamp <= endTime
    );
    
    const successfulTasks = windowTasks.filter(t => t.success).length;
    const accuracyValues = windowMetrics
      .filter(m => m.metricType === 'accuracy')
      .map(m => m.value);
    const confidenceValues = windowMetrics.map(m => m.confidence);
    const interventions = windowTasks.filter(t => t.interventionRequired).length;
    const criticalErrors = agentData.errorHistory.filter(
      e => e.timestamp >= startTime && e.timestamp <= endTime && e.severity === 'critical'
    ).length;
    
    return {
      period,
      startTime,
      endTime,
      metrics: {
        totalTasks: windowTasks.length,
        successfulTasks,
        averageAccuracy: accuracyValues.length > 0 ? 
          accuracyValues.reduce((sum, val) => sum + val, 0) / accuracyValues.length : 0,
        averageConfidence: confidenceValues.length > 0 ?
          confidenceValues.reduce((sum, val) => sum + val, 0) / confidenceValues.length : 0,
        humanInterventions: interventions,
        criticalErrors,
      },
    };
  }
  
  private async analyzeTrends(agentData: AgentPerformanceData): Promise<ScoreTrend[]> {
    const trends: ScoreTrend[] = [];
    
    const metricTypes = ['accuracy', 'reliability', 'consistency', 'efficiency'];
    
    for (const metricType of metricTypes) {
      const trend = await this.analyzeTrendForMetric(agentData, metricType);
      if (trend) {
        trends.push(trend);
      }
    }
    
    return trends;
  }
  
  private async analyzeTrendForMetric(
    agentData: AgentPerformanceData,
    metricType: string
  ): Promise<ScoreTrend | null> {
    const relevantMetrics = agentData.metrics
      .filter(m => m.metricType === metricType)
      .sort((a, b) => a.timestamp.getTime() - b.timestamp.getTime());
    
    if (relevantMetrics.length < 5) return null; // Need minimum data points
    
    // Calculate linear trend
    const values = relevantMetrics.map(m => m.value);
    const times = relevantMetrics.map(m => m.timestamp.getTime());
    
    const { slope, correlation } = this.calculateLinearTrend(times, values);
    
    // Determine trend direction and significance
    let direction: 'improving' | 'declining' | 'stable';
    if (Math.abs(slope) < 0.001) {
      direction = 'stable';
    } else {
      direction = slope > 0 ? 'improving' : 'declining';
    }
    
    const confidence = Math.abs(correlation);
    const rate = Math.abs(slope);
    
    return {
      metric: metricType,
      direction,
      rate,
      confidence,
      timespan: 'medium',
    };
  }
  
  private async generateBenchmarkComparisons(
    agentId: string,
    components: any
  ): Promise<BenchmarkComparison[]> {
    const comparisons: BenchmarkComparison[] = [];
    
    // Compare against configured benchmarks
    const benchmarks = this.config.benchmarks;
    
    comparisons.push({
      category: 'accuracy',
      agentScore: components.accuracy,
      benchmarkScore: benchmarks.minAccuracy,
      percentile: this.calculatePercentile(components.accuracy, 'accuracy'),
      comparison: components.accuracy >= benchmarks.minAccuracy ? 'above' : 'below',
    });
    
    comparisons.push({
      category: 'reliability',
      agentScore: components.reliability,
      benchmarkScore: benchmarks.minReliability,
      percentile: this.calculatePercentile(components.reliability, 'reliability'),
      comparison: components.reliability >= benchmarks.minReliability ? 'above' : 'below',
    });
    
    comparisons.push({
      category: 'consistency',
      agentScore: components.consistency,
      benchmarkScore: benchmarks.minConsistency,
      percentile: this.calculatePercentile(components.consistency, 'consistency'),
      comparison: components.consistency >= benchmarks.minConsistency ? 'above' : 'below',
    });
    
    comparisons.push({
      category: 'efficiency',
      agentScore: components.efficiency,
      benchmarkScore: benchmarks.minEfficiency,
      percentile: this.calculatePercentile(components.efficiency, 'efficiency'),
      comparison: components.efficiency >= benchmarks.minEfficiency ? 'above' : 'below',
    });
    
    return comparisons;
  }
  
  private async assessRisk(
    agentData: AgentPerformanceData,
    components: any
  ): Promise<RiskAssessment> {
    const riskFactors: RiskFactor[] = [];
    
    // Accuracy risk
    if (components.accuracy < this.config.riskThresholds.medium) {
      riskFactors.push({
        name: 'Low Accuracy',
        severity: components.accuracy < this.config.riskThresholds.high ? 0.8 : 0.5,
        probability: 0.9,
        impact: 'High risk of incorrect outputs',
        trend: this.getTrendDirection(agentData, 'accuracy'),
      });
    }
    
    // Reliability risk
    if (components.reliability < this.config.riskThresholds.medium) {
      riskFactors.push({
        name: 'Low Reliability',
        severity: components.reliability < this.config.riskThresholds.high ? 0.7 : 0.4,
        probability: 0.8,
        impact: 'Frequent task failures',
        trend: this.getTrendDirection(agentData, 'reliability'),
      });
    }
    
    // Efficiency risk
    if (components.efficiency < this.config.riskThresholds.medium) {
      riskFactors.push({
        name: 'Low Efficiency',
        severity: 0.4,
        probability: 0.7,
        impact: 'Reduced throughput and increased costs',
        trend: this.getTrendDirection(agentData, 'efficiency'),
      });
    }
    
    // Error pattern risk
    const recentErrors = agentData.errorHistory.filter(
      e => e.timestamp > new Date(Date.now() - 24 * 60 * 60 * 1000) // Last 24 hours
    );
    
    if (recentErrors.length > 5) {
      riskFactors.push({
        name: 'High Error Rate',
        severity: 0.6,
        probability: 0.8,
        impact: 'Increased human intervention required',
        trend: 'increasing',
      });
    }
    
    // Determine overall risk level
    const maxSeverity = riskFactors.length > 0 ? 
      Math.max(...riskFactors.map(f => f.severity)) : 0;
    
    let level: 'low' | 'medium' | 'high' | 'critical';
    if (maxSeverity >= 0.8) level = 'critical';
    else if (maxSeverity >= 0.6) level = 'high';
    else if (maxSeverity >= 0.3) level = 'medium';
    else level = 'low';
    
    return {
      level,
      factors: riskFactors,
      recommendations: this.generateRecommendations(riskFactors),
      mitigationStrategies: this.generateMitigationStrategies(riskFactors),
    };
  }
  
  // ========================================================================================
  // Utility Methods
  // ========================================================================================
  
  private getMetricsInWindow(metrics: TruthMetric[], windowMs: number): TruthMetric[] {
    const cutoff = new Date(Date.now() - windowMs);
    return metrics.filter(m => m.timestamp >= cutoff);
  }
  
  private calculateVariance(values: number[]): number {
    if (values.length === 0) return 0;
    
    const mean = values.reduce((sum, val) => sum + val, 0) / values.length;
    const squaredDiffs = values.map(val => Math.pow(val - mean, 2));
    return squaredDiffs.reduce((sum, diff) => sum + diff, 0) / values.length;
  }
  
  private calculateCoefficientOfVariation(values: number[]): number {
    if (values.length === 0) return 0;
    
    const mean = values.reduce((sum, val) => sum + val, 0) / values.length;
    if (mean === 0) return 0;
    
    const variance = this.calculateVariance(values);
    const stdDev = Math.sqrt(variance);
    
    return stdDev / mean;
  }
  
  private calculateDurationEfficiency(tasks: TaskPerformance[]): number {
    if (tasks.length === 0) return 0.8;
    
    // Group tasks by complexity and calculate average duration
    const complexityGroups = new Map<string, number[]>();
    
    tasks.forEach(task => {
      if (!complexityGroups.has(task.complexity)) {
        complexityGroups.set(task.complexity, []);
      }
      complexityGroups.get(task.complexity)!.push(task.duration);
    });
    
    // Expected durations by complexity (in minutes)
    const expectedDurations: Record<string, number> = {
      low: 5,
      medium: 15,
      high: 45,
      critical: 120,
    };
    
    let totalEfficiency = 0;
    let groupCount = 0;
    
    for (const [complexity, durations] of complexityGroups) {
      const avgDuration = durations.reduce((sum, d) => sum + d, 0) / durations.length;
      const expected = expectedDurations[complexity] || 15;
      
      // Efficiency is better when duration is less than expected
      const efficiency = Math.min(1, expected / avgDuration);
      totalEfficiency += efficiency;
      groupCount++;
    }
    
    return groupCount > 0 ? totalEfficiency / groupCount : 0.8;
  }
  
  private calculateLinearTrend(x: number[], y: number[]): { slope: number; correlation: number } {
    const n = x.length;
    if (n < 2) return { slope: 0, correlation: 0 };
    
    const sumX = x.reduce((sum, val) => sum + val, 0);
    const sumY = y.reduce((sum, val) => sum + val, 0);
    const sumXY = x.reduce((sum, val, i) => sum + val * y[i], 0);
    const sumX2 = x.reduce((sum, val) => sum + val * val, 0);
    const sumY2 = y.reduce((sum, val) => sum + val * val, 0);
    
    const slope = (n * sumXY - sumX * sumY) / (n * sumX2 - sumX * sumX);
    
    const numerator = n * sumXY - sumX * sumY;
    const denominator = Math.sqrt((n * sumX2 - sumX * sumX) * (n * sumY2 - sumY * sumY));
    const correlation = denominator !== 0 ? numerator / denominator : 0;
    
    return { slope, correlation };
  }
  
  private calculatePercentile(score: number, category: string): number {
    // This would typically compare against a database of historical scores
    // For now, using a simplified calculation
    const allScores = Array.from(this.agentScores.values())
      .map(s => s.components[category as keyof typeof s.components])
      .filter(s => s !== undefined)
      .sort((a, b) => a - b);
    
    if (allScores.length === 0) return 50;
    
    const index = allScores.findIndex(s => s >= score);
    return index === -1 ? 100 : (index / allScores.length) * 100;
  }
  
  private getTrendDirection(agentData: AgentPerformanceData, metricType: string): 'increasing' | 'stable' | 'decreasing' {
    const recentMetrics = agentData.metrics
      .filter(m => m.metricType === metricType)
      .slice(-10); // Last 10 metrics
    
    if (recentMetrics.length < 3) return 'stable';
    
    const values = recentMetrics.map(m => m.value);
    const firstHalf = values.slice(0, Math.floor(values.length / 2));
    const secondHalf = values.slice(Math.floor(values.length / 2));
    
    const firstAvg = firstHalf.reduce((sum, val) => sum + val, 0) / firstHalf.length;
    const secondAvg = secondHalf.reduce((sum, val) => sum + val, 0) / secondHalf.length;
    
    const diff = secondAvg - firstAvg;
    if (Math.abs(diff) < 0.01) return 'stable';
    return diff > 0 ? 'increasing' : 'decreasing';
  }
  
  private generateRecommendations(riskFactors: RiskFactor[]): string[] {
    const recommendations: string[] = [];
    
    riskFactors.forEach(factor => {
      switch (factor.name) {
        case 'Low Accuracy':
          recommendations.push('Implement additional validation steps');
          recommendations.push('Review training data quality');
          recommendations.push('Consider accuracy-focused training sessions');
          break;
        case 'Low Reliability':
          recommendations.push('Increase redundancy in critical tasks');
          recommendations.push('Implement circuit breaker patterns');
          recommendations.push('Add health check monitoring');
          break;
        case 'Low Efficiency':
          recommendations.push('Optimize task distribution algorithms');
          recommendations.push('Review resource allocation');
          recommendations.push('Consider task batching strategies');
          break;
        case 'High Error Rate':
          recommendations.push('Implement better error handling');
          recommendations.push('Add preventive validation checks');
          recommendations.push('Review error patterns for systemic issues');
          break;
      }
    });
    
    return Array.from(new Set(recommendations)); // Remove duplicates
  }
  
  private generateMitigationStrategies(riskFactors: RiskFactor[]): string[] {
    const strategies: string[] = [];
    
    const hasAccuracyRisk = riskFactors.some(f => f.name.includes('Accuracy'));
    const hasReliabilityRisk = riskFactors.some(f => f.name.includes('Reliability'));
    const hasEfficiencyRisk = riskFactors.some(f => f.name.includes('Efficiency'));
    
    if (hasAccuracyRisk) {
      strategies.push('Increase human oversight for critical tasks');
      strategies.push('Implement staged rollback procedures');
      strategies.push('Add confidence-based task routing');
    }
    
    if (hasReliabilityRisk) {
      strategies.push('Implement automatic failover mechanisms');
      strategies.push('Add task retry logic with exponential backoff');
      strategies.push('Create backup processing pipelines');
    }
    
    if (hasEfficiencyRisk) {
      strategies.push('Implement dynamic load balancing');
      strategies.push('Add performance-based task assignment');
      strategies.push('Create efficiency monitoring dashboards');
    }
    
    return strategies;
  }
  
  private shouldRecalculateScore(metric: TruthMetric): boolean {
    // Recalculate for significant changes
    return (
      metric.value < 0.7 || // Low performance
      metric.confidence < 0.5 || // Low confidence
      metric.validation.errors.some(e => e.severity === 'critical') || // Critical errors
      metric.metricType === 'accuracy' // Always recalculate for accuracy metrics
    );
  }
  
  private async updateTaskHistory(agentData: AgentPerformanceData, metric: TruthMetric): Promise<void> {
    // Convert metric to task performance record
    const taskPerformance: TaskPerformance = {
      taskId: metric.taskId,
      timestamp: metric.timestamp,
      taskType: metric.context.taskType,
      complexity: metric.context.complexity,
      duration: 0, // Would be calculated from task start/end times
      accuracy: metric.value,
      success: metric.validation.isValid,
      interventionRequired: metric.context.verificationMethod === 'human',
      errorCount: metric.validation.errors.length,
    };
    
    agentData.taskHistory.push(taskPerformance);
    
    // Keep only recent history (last 1000 tasks)
    if (agentData.taskHistory.length > 1000) {
      agentData.taskHistory = agentData.taskHistory.slice(-1000);
    }
  }
  
  private async updateErrorHistory(agentData: AgentPerformanceData, metric: TruthMetric): Promise<void> {
    // Process validation errors
    metric.validation.errors.forEach(error => {
      const errorAnalysis: ErrorAnalysis = {
        timestamp: metric.timestamp,
        errorType: error.type,
        severity: error.severity,
        frequency: 1,
        impact: error.impact,
        resolved: false,
        pattern: this.identifyErrorPattern(error, agentData.errorHistory),
      };
      
      agentData.errorHistory.push(errorAnalysis);
    });
    
    // Keep only recent history (last 500 errors)
    if (agentData.errorHistory.length > 500) {
      agentData.errorHistory = agentData.errorHistory.slice(-500);
    }
  }
  
  private identifyErrorPattern(error: any, errorHistory: ErrorAnalysis[]): string {
    // Simple pattern identification based on error type frequency
    const recentSimilar = errorHistory.filter(
      e => e.errorType === error.type && 
           e.timestamp > new Date(Date.now() - 24 * 60 * 60 * 1000) // Last 24 hours
    );
    
    if (recentSimilar.length >= 3) return 'recurring';
    if (recentSimilar.length >= 2) return 'intermittent';
    return 'isolated';
  }
  
  private initializeBenchmarks(): void {
    // Initialize industry benchmarks (would typically load from external source)
    this.benchmarkScores.set('accuracy', 0.92);
    this.benchmarkScores.set('reliability', 0.88);
    this.benchmarkScores.set('consistency', 0.85);
    this.benchmarkScores.set('efficiency', 0.80);
    this.benchmarkScores.set('adaptability', 0.75);
  }
  
  private async loadHistoricalData(): Promise<void> {
    // Placeholder for loading historical scoring data
    this.logger.debug('Loading historical agent scoring data');
  }
  
  private async persistScoringData(): Promise<void> {
    // Placeholder for persisting scoring data
    this.logger.debug('Persisting agent scoring data');
  }
  
  // ========================================================================================
  // Public API
  // ========================================================================================
  
  getAgentScore(agentId: string): AgentTruthScore | undefined {
    return this.agentScores.get(agentId);
  }
  
  getAllAgentScores(): AgentTruthScore[] {
    return Array.from(this.agentScores.values());
  }
  
  getTopPerformers(limit: number = 10): AgentTruthScore[] {
    return Array.from(this.agentScores.values())
      .sort((a, b) => b.overallScore - a.overallScore)
      .slice(0, limit);
  }
  
  getAgentsByRiskLevel(riskLevel: 'low' | 'medium' | 'high' | 'critical'): AgentTruthScore[] {
    return Array.from(this.agentScores.values())
      .filter(score => score.riskAssessment.level === riskLevel);
  }
  
  getPerformanceStatistics(): {
    totalAgents: number;
    averageScore: number;
    highPerformers: number;
    atRiskAgents: number;
    improvingAgents: number;
    decliningAgents: number;
  } {
    const scores = Array.from(this.agentScores.values());
    
    return {
      totalAgents: scores.length,
      averageScore: scores.length > 0 ? 
        scores.reduce((sum, s) => sum + s.overallScore, 0) / scores.length : 0,
      highPerformers: scores.filter(s => s.overallScore >= 0.9).length,
      atRiskAgents: scores.filter(s => s.riskAssessment.level === 'high' || s.riskAssessment.level === 'critical').length,
      improvingAgents: scores.filter(s => 
        s.trends.some(t => t.direction === 'improving' && t.confidence > 0.7)
      ).length,
      decliningAgents: scores.filter(s => 
        s.trends.some(t => t.direction === 'declining' && t.confidence > 0.7)
      ).length,
    };
  }
}