UNPKG

devflow-ai

Version:

Enterprise-grade AI agent orchestration with swarm management UI dashboard

746 lines (640 loc) 24.8 kB
/** * TruthScorer - Advanced truth scoring system with configurable thresholds * Provides statistical validation and confidence analysis for agent claims and system states */ import type { ILogger } from '../core/logger.js'; import { logger } from '../core/logger.js'; import { AppError } from '../utils/error-handler.js'; import type { TruthScore, TruthScoreConfig, TruthScoringWeights, TruthValidationChecks, ConfidenceConfig, TruthScoreComponents, ConfidenceInterval, TruthEvidence, AgentClaim, VerificationError, } from './types.js'; import { VERIFICATION_CONSTANTS } from './types.js'; import type { AgentId, AgentState } from '../swarm/types.js'; export interface TruthScorerOptions { config?: Partial<TruthScoreConfig>; logger?: ILogger; } export class TruthScorer { private readonly config: TruthScoreConfig; private readonly logger: ILogger; private readonly agentHistory: Map<string, AgentPerformanceHistory> = new Map(); private readonly validationCache: Map<string, CachedValidation> = new Map(); constructor(options: TruthScorerOptions = {}) { this.logger = options.logger || logger.child({ component: 'TruthScorer' }); this.config = this.mergeConfig(options.config); this.logger.info('TruthScorer initialized', { threshold: this.config.threshold, checks: this.config.checks, weights: this.config.weights, }); } /** * Calculate truth score for an agent claim */ async scoreClaim(claim: AgentClaim, context?: ScoringContext): Promise<TruthScore> { const startTime = Date.now(); this.logger.debug('Starting truth score calculation', { claimId: claim.id, claimType: claim.type, agentId: claim.agentId, }); try { // Initialize score components const components: Partial<TruthScoreComponents> = {}; const evidence: TruthEvidence[] = []; const errors: VerificationError[] = []; // Calculate individual components if (this.config.checks.historicalValidation) { components.agentReliability = await this.calculateAgentReliability(claim, evidence, errors); } if (this.config.checks.crossAgentValidation && context?.peers) { components.crossValidation = await this.calculateCrossValidation(claim, context.peers, evidence, errors); } if (this.config.checks.externalValidation && context?.externalSources) { components.externalVerification = await this.calculateExternalVerification(claim, context.externalSources, evidence, errors); } if (this.config.checks.logicalValidation) { components.logicalCoherence = await this.calculateLogicalCoherence(claim, evidence, errors); } if (this.config.checks.statisticalValidation) { components.factualConsistency = await this.calculateFactualConsistency(claim, evidence, errors); } // Calculate overall score using weighted average const overall = this.calculateWeightedScore(components as TruthScoreComponents); const fullComponents: TruthScoreComponents = { agentReliability: components.agentReliability || 0, crossValidation: components.crossValidation || 0, externalVerification: components.externalVerification || 0, logicalCoherence: components.logicalCoherence || 0, factualConsistency: components.factualConsistency || 0, overall, }; // Calculate confidence interval const confidence = this.calculateConfidenceInterval(fullComponents, evidence.length); const score: TruthScore = { score: overall, components: fullComponents, confidence, evidence, timestamp: new Date(), metadata: { claimId: claim.id, agentId: claim.agentId, calculationTime: Date.now() - startTime, evidenceCount: evidence.length, errorCount: errors.length, config: this.config, }, }; this.logger.info('Truth score calculated', { claimId: claim.id, score: overall, components: fullComponents, confidence: confidence.level, duration: Date.now() - startTime, }); return score; } catch (error) { this.logger.error('Failed to calculate truth score', error); throw new AppError( `Truth score calculation failed: ${error instanceof Error ? error.message : 'Unknown error'}`, 'TRUTH_SCORE_CALCULATION_FAILED', 500 ); } } /** * Validate if a truth score meets the configured threshold */ validateScore(score: TruthScore): boolean { const passes = score.score >= this.config.threshold; this.logger.debug('Truth score validation', { score: score.score, threshold: this.config.threshold, passes, }); return passes; } /** * Update agent performance history */ updateAgentHistory(agentId: AgentId, performance: AgentPerformanceRecord): void { const agentKey = typeof agentId === 'string' ? agentId : agentId.id; if (!this.agentHistory.has(agentKey)) { this.agentHistory.set(agentKey, { agentId: agentKey, records: [], statistics: { averageScore: 0, successRate: 0, totalClaims: 0, recentTrend: 'stable', }, }); } const history = this.agentHistory.get(agentKey)!; history.records.push(performance); // Keep only recent records (last 100) if (history.records.length > 100) { history.records = history.records.slice(-100); } // Update statistics this.updateAgentStatistics(history); this.logger.debug('Agent history updated', { agentId: agentKey, recordCount: history.records.length, averageScore: history.statistics.averageScore, }); } /** * Get agent reliability score */ getAgentReliability(agentId: AgentId): number { const agentKey = typeof agentId === 'string' ? agentId : agentId.id; const history = this.agentHistory.get(agentKey); if (!history || history.records.length === 0) { return 0.5; // Default neutral score for unknown agents } return history.statistics.averageScore; } /** * Clear validation cache */ clearCache(): void { this.validationCache.clear(); this.logger.debug('Validation cache cleared'); } private mergeConfig(partialConfig?: Partial<TruthScoreConfig>): TruthScoreConfig { const defaultWeights: TruthScoringWeights = { agentReliability: 0.3, crossValidation: 0.25, externalVerification: 0.2, factualConsistency: 0.15, logicalCoherence: 0.1, }; const defaultChecks: TruthValidationChecks = { historicalValidation: true, crossAgentValidation: true, externalValidation: false, logicalValidation: true, statisticalValidation: true, }; const defaultConfidence: ConfidenceConfig = { level: VERIFICATION_CONSTANTS.DEFAULT_CONFIDENCE_LEVEL, minSampleSize: VERIFICATION_CONSTANTS.DEFAULT_MIN_SAMPLE_SIZE, maxErrorMargin: VERIFICATION_CONSTANTS.DEFAULT_MAX_ERROR_MARGIN, }; return { threshold: partialConfig?.threshold || VERIFICATION_CONSTANTS.DEFAULT_TRUTH_THRESHOLD, weights: { ...defaultWeights, ...partialConfig?.weights }, checks: { ...defaultChecks, ...partialConfig?.checks }, confidence: { ...defaultConfidence, ...partialConfig?.confidence }, }; } private async calculateAgentReliability( claim: AgentClaim, evidence: TruthEvidence[], errors: VerificationError[] ): Promise<number> { try { const agentKey = typeof claim.agentId === 'string' ? claim.agentId : claim.agentId.id; const history = this.agentHistory.get(agentKey); if (!history || history.records.length < 3) { // Insufficient data - use neutral score evidence.push({ type: 'agent_history', source: 'internal_history', weight: this.config.weights.agentReliability, score: 0.5, details: { reason: 'insufficient_data', recordCount: history?.records.length || 0 }, timestamp: new Date(), }); return 0.5; } // Calculate reliability based on recent performance const recentRecords = history.records.slice(-10); // Last 10 records const avgScore = recentRecords.reduce((sum, record) => sum + record.score, 0) / recentRecords.length; const consistency = 1 - this.calculateVariance(recentRecords.map(r => r.score)); const trendFactor = this.calculateTrendFactor(recentRecords); const reliability = (avgScore * 0.6) + (consistency * 0.3) + (trendFactor * 0.1); evidence.push({ type: 'agent_history', source: 'internal_history', weight: this.config.weights.agentReliability, score: reliability, details: { averageScore: avgScore, consistency, trendFactor, recordCount: recentRecords.length, }, timestamp: new Date(), }); return Math.max(0, Math.min(1, reliability)); } catch (error) { errors.push({ code: 'AGENT_RELIABILITY_CALCULATION_FAILED', message: `Failed to calculate agent reliability: ${error instanceof Error ? error.message : 'Unknown error'}`, severity: 'medium', context: { claimId: claim.id, agentId: claim.agentId }, recoverable: true, timestamp: new Date(), }); return 0.5; // Default score on error } } private async calculateCrossValidation( claim: AgentClaim, peers: AgentState[], evidence: TruthEvidence[], errors: VerificationError[] ): Promise<number> { try { if (peers.length === 0) { evidence.push({ type: 'cross_validation', source: 'peer_agents', weight: this.config.weights.crossValidation, score: 0.5, details: { reason: 'no_peers_available' }, timestamp: new Date(), }); return 0.5; } // Simulate cross-validation with peer agents // In a real implementation, this would involve querying other agents const validationScores: number[] = []; const reliablePeers = peers.filter(peer => this.getAgentReliability(peer.id) > 0.7); for (const peer of reliablePeers.slice(0, 5)) { // Limit to 5 peers const peerReliability = this.getAgentReliability(peer.id); const validationScore = this.simulatePeerValidation(claim, peer); validationScores.push(validationScore * peerReliability); } if (validationScores.length === 0) { evidence.push({ type: 'cross_validation', source: 'peer_agents', weight: this.config.weights.crossValidation, score: 0.5, details: { reason: 'no_reliable_peers' }, timestamp: new Date(), }); return 0.5; } const avgValidation = validationScores.reduce((sum, score) => sum + score, 0) / validationScores.length; const consensus = 1 - this.calculateVariance(validationScores); const crossValidationScore = (avgValidation * 0.8) + (consensus * 0.2); evidence.push({ type: 'cross_validation', source: 'peer_agents', weight: this.config.weights.crossValidation, score: crossValidationScore, details: { peerCount: validationScores.length, averageValidation: avgValidation, consensus, validationScores, }, timestamp: new Date(), }); return Math.max(0, Math.min(1, crossValidationScore)); } catch (error) { errors.push({ code: 'CROSS_VALIDATION_FAILED', message: `Cross validation failed: ${error instanceof Error ? error.message : 'Unknown error'}`, severity: 'medium', context: { claimId: claim.id, peerCount: peers.length }, recoverable: true, timestamp: new Date(), }); return 0.5; } } private async calculateExternalVerification( claim: AgentClaim, externalSources: ExternalSource[], evidence: TruthEvidence[], errors: VerificationError[] ): Promise<number> { try { if (externalSources.length === 0) { evidence.push({ type: 'external_source', source: 'external_verification', weight: this.config.weights.externalVerification, score: 0.5, details: { reason: 'no_external_sources' }, timestamp: new Date(), }); return 0.5; } // Simulate external verification // In a real implementation, this would query external APIs, databases, etc. const verificationResults: number[] = []; for (const source of externalSources.slice(0, 3)) { // Limit to 3 sources const verificationScore = await this.simulateExternalVerification(claim, source); verificationResults.push(verificationScore * source.reliability); } const avgVerification = verificationResults.reduce((sum, score) => sum + score, 0) / verificationResults.length; const sourceAgreement = 1 - this.calculateVariance(verificationResults); const externalScore = (avgVerification * 0.7) + (sourceAgreement * 0.3); evidence.push({ type: 'external_source', source: 'external_verification', weight: this.config.weights.externalVerification, score: externalScore, details: { sourceCount: verificationResults.length, averageVerification: avgVerification, sourceAgreement, verificationResults, }, timestamp: new Date(), }); return Math.max(0, Math.min(1, externalScore)); } catch (error) { errors.push({ code: 'EXTERNAL_VERIFICATION_FAILED', message: `External verification failed: ${error instanceof Error ? error.message : 'Unknown error'}`, severity: 'medium', context: { claimId: claim.id, sourceCount: externalSources.length }, recoverable: true, timestamp: new Date(), }); return 0.5; } } private async calculateLogicalCoherence( claim: AgentClaim, evidence: TruthEvidence[], errors: VerificationError[] ): Promise<number> { try { // Analyze logical consistency of the claim const coherenceChecks = { structuralIntegrity: this.checkStructuralIntegrity(claim), causalConsistency: this.checkCausalConsistency(claim), temporalCoherence: this.checkTemporalCoherence(claim), metricConsistency: this.checkMetricConsistency(claim), }; const coherenceScore = Object.values(coherenceChecks).reduce((sum, score) => sum + score, 0) / 4; evidence.push({ type: 'logical_proof', source: 'logical_analyzer', weight: this.config.weights.logicalCoherence, score: coherenceScore, details: coherenceChecks, timestamp: new Date(), }); return Math.max(0, Math.min(1, coherenceScore)); } catch (error) { errors.push({ code: 'LOGICAL_COHERENCE_FAILED', message: `Logical coherence analysis failed: ${error instanceof Error ? error.message : 'Unknown error'}`, severity: 'medium', context: { claimId: claim.id }, recoverable: true, timestamp: new Date(), }); return 0.5; } } private async calculateFactualConsistency( claim: AgentClaim, evidence: TruthEvidence[], errors: VerificationError[] ): Promise<number> { try { // Perform statistical validation of claim metrics const statisticalTests = { distributionTest: this.performDistributionTest(claim), outlierDetection: this.performOutlierDetection(claim), trendAnalysis: this.performTrendAnalysis(claim), correlationAnalysis: this.performCorrelationAnalysis(claim), }; const consistencyScore = Object.values(statisticalTests).reduce((sum, score) => sum + score, 0) / 4; evidence.push({ type: 'statistical_test', source: 'statistical_analyzer', weight: this.config.weights.factualConsistency, score: consistencyScore, details: statisticalTests, timestamp: new Date(), }); return Math.max(0, Math.min(1, consistencyScore)); } catch (error) { errors.push({ code: 'FACTUAL_CONSISTENCY_FAILED', message: `Factual consistency analysis failed: ${error instanceof Error ? error.message : 'Unknown error'}`, severity: 'medium', context: { claimId: claim.id }, recoverable: true, timestamp: new Date(), }); return 0.5; } } private calculateWeightedScore(components: TruthScoreComponents): number { const weights = this.config.weights; const totalWeight = Object.values(weights).reduce((sum, weight) => sum + weight, 0); const weightedSum = (components.agentReliability * weights.agentReliability) + (components.crossValidation * weights.crossValidation) + (components.externalVerification * weights.externalVerification) + (components.factualConsistency * weights.factualConsistency) + (components.logicalCoherence * weights.logicalCoherence); return weightedSum / totalWeight; } private calculateConfidenceInterval(components: TruthScoreComponents, evidenceCount: number): ConfidenceInterval { const score = components.overall; const sampleSize = Math.max(evidenceCount, 1); const confidenceLevel = this.config.confidence.level; // Calculate standard error (simplified) const variance = this.calculateComponentVariance(components); const standardError = Math.sqrt(variance / sampleSize); // Z-score for confidence level (approximation) const zScore = this.getZScore(confidenceLevel); const margin = zScore * standardError; return { lower: Math.max(0, score - margin), upper: Math.min(1, score + margin), level: confidenceLevel, }; } private calculateComponentVariance(components: TruthScoreComponents): number { const scores = [ components.agentReliability, components.crossValidation, components.externalVerification, components.factualConsistency, components.logicalCoherence, ]; return this.calculateVariance(scores); } private calculateVariance(values: number[]): number { if (values.length === 0) return 0; const mean = values.reduce((sum, val) => sum + val, 0) / values.length; const squaredDiffs = values.map(val => Math.pow(val - mean, 2)); return squaredDiffs.reduce((sum, diff) => sum + diff, 0) / values.length; } private calculateTrendFactor(records: AgentPerformanceRecord[]): number { if (records.length < 2) return 0.5; // Simple linear trend calculation const scores = records.map(r => r.score); const n = scores.length; const x = Array.from({ length: n }, (_, i) => i); const sumX = x.reduce((a, b) => a + b, 0); const sumY = scores.reduce((a, b) => a + b, 0); const sumXY = x.reduce((sum, xi, i) => sum + xi * scores[i], 0); const sumXX = x.reduce((sum, xi) => sum + xi * xi, 0); const slope = (n * sumXY - sumX * sumY) / (n * sumXX - sumX * sumX); // Convert slope to factor (positive trend increases score) return 0.5 + Math.max(-0.5, Math.min(0.5, slope)); } private getZScore(confidenceLevel: number): number { // Simplified Z-score lookup if (confidenceLevel >= 0.99) return 2.576; if (confidenceLevel >= 0.95) return 1.96; if (confidenceLevel >= 0.90) return 1.645; if (confidenceLevel >= 0.80) return 1.282; return 1.0; } private updateAgentStatistics(history: AgentPerformanceHistory): void { const records = history.records; if (records.length === 0) return; const scores = records.map(r => r.score); const successCount = records.filter(r => r.success).length; history.statistics.averageScore = scores.reduce((sum, score) => sum + score, 0) / scores.length; history.statistics.successRate = successCount / records.length; history.statistics.totalClaims = records.length; // Calculate trend if (records.length >= 5) { const recentScores = scores.slice(-5); const earlierScores = scores.slice(-10, -5); if (earlierScores.length > 0) { const recentAvg = recentScores.reduce((sum, score) => sum + score, 0) / recentScores.length; const earlierAvg = earlierScores.reduce((sum, score) => sum + score, 0) / earlierScores.length; if (recentAvg > earlierAvg + 0.05) { history.statistics.recentTrend = 'improving'; } else if (recentAvg < earlierAvg - 0.05) { history.statistics.recentTrend = 'declining'; } else { history.statistics.recentTrend = 'stable'; } } } } // Simulation methods for demo purposes - replace with real implementations private simulatePeerValidation(claim: AgentClaim, peer: AgentState): number { // Simulate peer validation logic const baseScore = 0.7; const randomFactor = (Math.random() - 0.5) * 0.4; return Math.max(0, Math.min(1, baseScore + randomFactor)); } private async simulateExternalVerification(claim: AgentClaim, source: ExternalSource): Promise<number> { // Simulate external source verification const baseScore = source.reliability * 0.8; const randomFactor = (Math.random() - 0.5) * 0.3; return Math.max(0, Math.min(1, baseScore + randomFactor)); } private checkStructuralIntegrity(claim: AgentClaim): number { // Check if claim has required fields and proper structure let score = 0.8; if (!claim.data || typeof claim.data !== 'object') score -= 0.3; if (!claim.evidence || claim.evidence.length === 0) score -= 0.2; if (!claim.metrics) score -= 0.2; return Math.max(0, score); } private checkCausalConsistency(claim: AgentClaim): number { // Check for causal relationships in claim data return 0.8; // Simplified - implement actual causal analysis } private checkTemporalCoherence(claim: AgentClaim): number { // Check temporal consistency of claim const now = new Date(); const claimAge = now.getTime() - claim.submittedAt.getTime(); // Claims should be recent if (claimAge > 24 * 60 * 60 * 1000) { // More than 24 hours return 0.5; } return 0.9; } private checkMetricConsistency(claim: AgentClaim): number { // Check consistency of metrics in claim if (!claim.metrics) return 0.5; // Check for reasonable metric values const metrics = claim.metrics; let score = 0.8; if (metrics.accuracy && (metrics.accuracy < 0 || metrics.accuracy > 1)) score -= 0.3; if (metrics.errorRate && metrics.errorRate < 0) score -= 0.2; if (metrics.executionTime && metrics.executionTime < 0) score -= 0.2; return Math.max(0, score); } private performDistributionTest(claim: AgentClaim): number { // Perform statistical distribution test return 0.8; // Simplified - implement actual statistical tests } private performOutlierDetection(claim: AgentClaim): number { // Detect outliers in claim metrics return 0.8; // Simplified - implement actual outlier detection } private performTrendAnalysis(claim: AgentClaim): number { // Analyze trends in claim data return 0.8; // Simplified - implement actual trend analysis } private performCorrelationAnalysis(claim: AgentClaim): number { // Analyze correlations in claim metrics return 0.8; // Simplified - implement actual correlation analysis } } // Supporting interfaces interface AgentPerformanceHistory { agentId: string; records: AgentPerformanceRecord[]; statistics: AgentStatistics; } interface AgentPerformanceRecord { timestamp: Date; claimId: string; score: number; success: boolean; executionTime: number; resourceUsage: number; metadata: Record<string, unknown>; } interface AgentStatistics { averageScore: number; successRate: number; totalClaims: number; recentTrend: 'improving' | 'stable' | 'declining'; } interface CachedValidation { key: string; result: number; timestamp: Date; expiry: Date; } interface ScoringContext { peers?: AgentState[]; externalSources?: ExternalSource[]; historicalData?: Record<string, unknown>; constraints?: Record<string, unknown>; } interface ExternalSource { id: string; name: string; type: string; endpoint: string; reliability: number; credentials?: Record<string, string>; } export default TruthScorer;