universal-ai-brain
Version:
🧠 UNIVERSAL AI BRAIN 3.3 - The world's most advanced cognitive architecture with 24 specialized systems, MongoDB 8.1 $rankFusion hybrid search, latest Voyage 3.5 embeddings, and framework-agnostic design. Works with Mastra, Vercel AI, LangChain, OpenAI A
760 lines (664 loc) • 24.4 kB
text/typescript
/**
* @file ConfidenceTrackingEngine - Advanced uncertainty quantification for AI agents
*
* This engine provides comprehensive confidence tracking and calibration using MongoDB's
* statistical aggregation capabilities. Demonstrates MongoDB's advanced analytics for
* multi-dimensional confidence data and uncertainty quantification.
*
* Features:
* - Multi-dimensional confidence tracking with statistical aggregations
* - Uncertainty quantification (epistemic vs aleatoric)
* - Confidence calibration and prediction accuracy analysis
* - Real-time confidence monitoring and alerting
* - Adaptive confidence adjustment based on historical performance
* - Temporal confidence modeling with decay functions
*/
import { Db, ObjectId } from 'mongodb';
import { ConfidenceTrackingCollection, ConfidenceRecord } from '../collections/ConfidenceTrackingCollection';
export interface ConfidenceAssessmentRequest {
agentId: string;
sessionId?: string;
task: string;
taskType: 'prediction' | 'classification' | 'generation' | 'reasoning' | 'decision';
domain: string;
complexity: number; // 0-1
novelty: number; // 0-1
stakes: 'low' | 'medium' | 'high' | 'critical';
prediction: {
type: 'binary' | 'multiclass' | 'regression' | 'ranking' | 'generation';
value: any;
alternatives?: Array<{ value: any; confidence: number; reasoning: string }>;
probability?: number;
};
features: string[];
computationTime: number;
memoryUsage?: number;
}
export interface ConfidenceAssessment {
confidenceId: ObjectId;
overall: number;
breakdown: {
epistemic: number; // Knowledge uncertainty
aleatoric: number; // Data uncertainty
calibrated: number; // Historically adjusted
};
aspects: {
factualAccuracy: number;
completeness: number;
relevance: number;
clarity: number;
appropriateness: number;
};
sources: {
modelIntrinsic: number;
retrievalQuality: number;
contextRelevance: number;
historicalPerformance: number;
domainExpertise: number;
};
recommendations: string[];
riskLevel: 'low' | 'medium' | 'high' | 'critical';
shouldProceed: boolean;
}
export interface ConfidenceCalibration {
isWellCalibrated: boolean;
calibrationError: number;
overconfidenceRate: number;
underconfidenceRate: number;
brierScore: number;
logLoss: number;
reliability: number;
resolution: number;
sharpness: number;
recommendations: string[];
}
export interface ConfidenceTrends {
timeline: Array<{
timestamp: Date;
avgConfidence: number;
accuracy: number;
calibrationError: number;
predictionCount: number;
}>;
trends: {
confidenceTrend: 'improving' | 'stable' | 'declining';
accuracyTrend: 'improving' | 'stable' | 'declining';
calibrationTrend: 'improving' | 'stable' | 'declining';
};
insights: string[];
}
/**
* ConfidenceTrackingEngine - Advanced uncertainty quantification for AI agents
*
* This engine showcases MongoDB's statistical aggregation capabilities:
* - Complex aggregation pipelines for confidence analytics
* - Statistical functions for calibration analysis
* - Time-series optimization for confidence tracking
* - Multi-dimensional confidence modeling
* - Real-time confidence monitoring and alerting
*/
export class ConfidenceTrackingEngine {
private db: Db;
private confidenceCollection: ConfidenceTrackingCollection;
private isInitialized: boolean = false;
// Confidence tracking configuration
private config = {
calibration: {
minSamplesForCalibration: 10,
calibrationWindow: 30, // days
targetCalibrationError: 0.1,
overconfidenceThreshold: 0.8,
underconfidenceThreshold: 0.5
},
confidence: {
defaultDecayRate: 0.05, // per hour
defaultHalfLife: 24, // hours
minConfidenceThreshold: 0.3,
maxConfidenceThreshold: 0.95
},
monitoring: {
alertThresholds: {
calibrationError: 0.2,
accuracyDrop: 0.1,
overconfidenceRate: 0.3
}
}
};
constructor(db: Db) {
this.db = db;
this.confidenceCollection = new ConfidenceTrackingCollection(db);
}
/**
* Initialize the confidence tracking engine
*/
async initialize(): Promise<void> {
if (this.isInitialized) {
return;
}
try {
// Create collection indexes
await this.confidenceCollection.createIndexes();
this.isInitialized = true;
console.log('🤔 ConfidenceTrackingEngine initialized successfully');
} catch (error) {
console.error('❌ Failed to initialize ConfidenceTrackingEngine:', error);
throw error;
}
}
/**
* Assess confidence for a prediction or decision
*/
async assessConfidence(request: ConfidenceAssessmentRequest): Promise<ConfidenceAssessment> {
if (!this.isInitialized) {
throw new Error('ConfidenceTrackingEngine must be initialized first');
}
// Get historical performance for calibration
const historicalPerformance = await this.getHistoricalPerformance(
request.agentId,
request.domain,
request.taskType
);
// Calculate multi-dimensional confidence
const confidence = this.calculateMultiDimensionalConfidence(request, historicalPerformance);
// Create confidence record
const confidenceRecord: Omit<ConfidenceRecord, '_id' | 'createdAt' | 'updatedAt'> = {
agentId: request.agentId,
sessionId: request.sessionId,
timestamp: new Date(),
context: {
task: request.task,
taskType: request.taskType,
domain: request.domain,
complexity: request.complexity,
novelty: request.novelty,
stakes: request.stakes
},
confidence: {
overall: confidence.overall,
epistemic: confidence.breakdown.epistemic,
aleatoric: confidence.breakdown.aleatoric,
calibrated: confidence.breakdown.calibrated,
aspects: confidence.aspects,
sources: confidence.sources
},
prediction: request.prediction,
temporal: {
decayRate: this.config.confidence.defaultDecayRate,
halfLife: this.config.confidence.defaultHalfLife,
expiresAt: new Date(Date.now() + (this.config.confidence.defaultHalfLife * 60 * 60 * 1000))
},
learning: {
surprisal: 0, // Will be calculated when actual outcome is known
informationGain: 0,
modelUpdate: false,
confidenceAdjustment: 0
},
metadata: {
framework: 'universal-ai-brain',
model: 'confidence-tracking-v1',
version: '1.0.0',
features: request.features,
computationTime: request.computationTime,
memoryUsage: request.memoryUsage
}
};
// Store confidence record
const confidenceId = await this.confidenceCollection.recordConfidence(confidenceRecord);
// Generate recommendations and risk assessment
const recommendations = this.generateConfidenceRecommendations(confidence);
const riskLevel = this.assessRiskLevel(confidence, request.stakes);
const shouldProceed = this.shouldProceedWithPrediction(confidence, riskLevel);
return {
confidenceId,
overall: confidence.overall,
breakdown: confidence.breakdown,
aspects: confidence.aspects,
sources: confidence.sources,
recommendations,
riskLevel,
shouldProceed
};
}
/**
* Update confidence record with actual outcome for calibration
*/
async updateWithActualOutcome(
confidenceId: ObjectId,
actualValue: any,
correct: boolean,
accuracy?: number,
feedback?: string,
verificationSource: 'automatic' | 'human' | 'external_system' = 'automatic'
): Promise<void> {
const actual = {
value: actualValue,
correct,
accuracy,
feedback,
verificationTime: new Date(),
verificationSource
};
await this.confidenceCollection.updateWithActual(confidenceId, actual);
}
/**
* Analyze confidence calibration for an agent
*/
async analyzeCalibration(agentId: string, days: number = 30): Promise<ConfidenceCalibration> {
const calibrationAnalysis = await this.confidenceCollection.analyzeCalibration(agentId, {
timeRange: {
start: new Date(Date.now() - (days * 24 * 60 * 60 * 1000)),
end: new Date()
}
});
const stats = await this.confidenceCollection.getConfidenceStats(agentId, days);
const isWellCalibrated = calibrationAnalysis.ece < this.config.calibration.targetCalibrationError;
const recommendations = this.generateCalibrationRecommendations(calibrationAnalysis, stats);
return {
isWellCalibrated,
calibrationError: calibrationAnalysis.ece,
overconfidenceRate: stats.overconfidenceRate,
underconfidenceRate: stats.underconfidenceRate,
brierScore: calibrationAnalysis.brierScore,
logLoss: calibrationAnalysis.logLoss,
reliability: calibrationAnalysis.calibrationCurve.length > 0 ?
calibrationAnalysis.calibrationCurve.reduce((sum, point) => sum + point.accuracy, 0) / calibrationAnalysis.calibrationCurve.length : 0,
resolution: calibrationAnalysis.mce,
sharpness: stats.avgConfidence,
recommendations
};
}
/**
* Get confidence trends over time
*/
async getConfidenceTrends(agentId: string, days: number = 30): Promise<ConfidenceTrends> {
const timeline = await this.confidenceCollection.getConfidenceTrends(agentId, days, 'day');
// Calculate trends
const trends = this.calculateTrends(timeline);
const insights = this.generateTrendInsights(timeline, trends);
return {
timeline,
trends,
insights
};
}
/**
* Get confidence statistics for an agent
*/
async getConfidenceStats(agentId: string, days: number = 7): Promise<{
totalPredictions: number;
verifiedPredictions: number;
avgConfidence: number;
accuracy: number;
calibrationError: number;
overconfidenceRate: number;
underconfidenceRate: number;
confidenceByDomain: Array<{ domain: string; avgConfidence: number; accuracy: number }>;
performanceMetrics: {
avgComputationTime: number;
avgMemoryUsage: number;
efficiency: number;
};
}> {
const stats = await this.confidenceCollection.getConfidenceStats(agentId, days);
// Calculate performance metrics
const performanceMetrics = await this.calculatePerformanceMetrics(agentId, days);
return {
...stats,
performanceMetrics
};
}
/**
* Monitor confidence in real-time and generate alerts
*/
async monitorConfidence(agentId: string): Promise<{
alerts: Array<{
type: 'calibration_error' | 'accuracy_drop' | 'overconfidence' | 'underconfidence';
severity: 'low' | 'medium' | 'high' | 'critical';
message: string;
recommendations: string[];
}>;
status: 'healthy' | 'warning' | 'critical';
}> {
const stats = await this.getConfidenceStats(agentId, 7);
const calibration = await this.analyzeCalibration(agentId, 7);
const alerts = [];
// Check calibration error
if (calibration.calibrationError > this.config.monitoring.alertThresholds.calibrationError) {
alerts.push({
type: 'calibration_error' as const,
severity: calibration.calibrationError > 0.3 ? 'critical' : 'high',
message: `High calibration error: ${calibration.calibrationError.toFixed(3)}`,
recommendations: ['Review confidence assessment methods', 'Increase training data', 'Adjust confidence thresholds']
});
}
// Check accuracy drop
if (stats.accuracy < 0.7) {
alerts.push({
type: 'accuracy_drop' as const,
severity: stats.accuracy < 0.5 ? 'critical' : 'medium',
message: `Low accuracy: ${stats.accuracy.toFixed(3)}`,
recommendations: ['Review model performance', 'Update training data', 'Check for domain shift']
});
}
// Check overconfidence
if (stats.overconfidenceRate > this.config.monitoring.alertThresholds.overconfidenceRate) {
alerts.push({
type: 'overconfidence' as const,
severity: stats.overconfidenceRate > 0.5 ? 'high' : 'medium',
message: `High overconfidence rate: ${stats.overconfidenceRate.toFixed(3)}`,
recommendations: ['Lower confidence thresholds', 'Increase uncertainty estimates', 'Add confidence penalties']
});
}
// Determine overall status
const status = alerts.some(a => a.severity === 'critical') ? 'critical' :
alerts.some(a => a.severity === 'high') ? 'warning' : 'healthy';
return { alerts, status };
}
/**
* Calculate multi-dimensional confidence
*/
private calculateMultiDimensionalConfidence(
request: ConfidenceAssessmentRequest,
historicalPerformance: any
): ConfidenceAssessment {
// Base confidence from prediction probability or heuristics
const baseConfidence = request.prediction.probability || this.estimateBaseConfidence(request);
// Epistemic uncertainty (knowledge-based)
const epistemic = this.calculateEpistemicUncertainty(request, historicalPerformance);
// Aleatoric uncertainty (data-based)
const aleatoric = this.calculateAleatoricUncertainty(request);
// Calibrated confidence based on historical performance
const calibrated = this.calibrateConfidence(baseConfidence, historicalPerformance);
// Aspect-based confidence breakdown
const aspects = {
factualAccuracy: Math.max(0.1, baseConfidence - (request.novelty * 0.2)),
completeness: Math.max(0.1, baseConfidence - (request.complexity * 0.15)),
relevance: Math.max(0.1, baseConfidence - (epistemic * 0.3)),
clarity: Math.max(0.1, baseConfidence - (aleatoric * 0.2)),
appropriateness: Math.max(0.1, baseConfidence - (request.novelty * 0.1))
};
// Confidence sources
const sources = {
modelIntrinsic: baseConfidence,
retrievalQuality: Math.max(0.1, 0.8 - (request.novelty * 0.3)),
contextRelevance: Math.max(0.1, 0.9 - (request.complexity * 0.2)),
historicalPerformance: historicalPerformance.avgAccuracy || 0.5,
domainExpertise: Math.max(0.1, 0.8 - (request.novelty * 0.4))
};
// Overall confidence (weighted combination)
const overall = Math.min(0.95, Math.max(0.05,
(baseConfidence * 0.4) +
(calibrated * 0.3) +
((1 - epistemic) * 0.2) +
((1 - aleatoric) * 0.1)
));
return {
confidenceId: new ObjectId(), // Temporary, will be replaced
overall,
breakdown: {
epistemic,
aleatoric,
calibrated
},
aspects,
sources,
recommendations: [],
riskLevel: 'medium',
shouldProceed: true
};
}
/**
* Get historical performance for calibration
*/
private async getHistoricalPerformance(
agentId: string,
domain: string,
taskType: string
): Promise<{
avgAccuracy: number;
avgConfidence: number;
calibrationError: number;
sampleCount: number;
}> {
const stats = await this.confidenceCollection.getConfidenceStats(agentId, 30);
const domainStats = stats.confidenceByDomain.find(d => d.domain === domain);
return {
avgAccuracy: domainStats?.accuracy || stats.accuracy || 0.5,
avgConfidence: domainStats?.avgConfidence || stats.avgConfidence || 0.5,
calibrationError: stats.calibrationError || 0.2,
sampleCount: stats.verifiedPredictions || 0
};
}
/**
* Estimate base confidence from request characteristics
*/
private estimateBaseConfidence(request: ConfidenceAssessmentRequest): number {
// Heuristic-based confidence estimation
let confidence = 0.7; // Base confidence
// Adjust for complexity
confidence -= request.complexity * 0.2;
// Adjust for novelty
confidence -= request.novelty * 0.3;
// Adjust for stakes (higher stakes = more conservative)
const stakesAdjustment = {
low: 0.1,
medium: 0.05,
high: -0.05,
critical: -0.1
};
confidence += stakesAdjustment[request.stakes];
// Adjust for task type
const taskTypeConfidence = {
classification: 0.8,
prediction: 0.7,
generation: 0.6,
reasoning: 0.65,
decision: 0.75
};
confidence = (confidence + taskTypeConfidence[request.taskType]) / 2;
return Math.min(0.95, Math.max(0.05, confidence));
}
/**
* Calculate epistemic uncertainty (knowledge-based)
*/
private calculateEpistemicUncertainty(
request: ConfidenceAssessmentRequest,
historicalPerformance: any
): number {
let uncertainty = 0.2; // Base epistemic uncertainty
// Higher uncertainty for novel tasks
uncertainty += request.novelty * 0.3;
// Higher uncertainty for complex tasks
uncertainty += request.complexity * 0.2;
// Lower uncertainty with more historical data
if (historicalPerformance.sampleCount > 10) {
uncertainty -= Math.min(0.2, historicalPerformance.sampleCount / 100);
}
return Math.min(0.9, Math.max(0.05, uncertainty));
}
/**
* Calculate aleatoric uncertainty (data-based)
*/
private calculateAleatoricUncertainty(request: ConfidenceAssessmentRequest): number {
let uncertainty = 0.15; // Base aleatoric uncertainty
// Task type affects inherent uncertainty
const taskTypeUncertainty = {
classification: 0.1,
prediction: 0.2,
generation: 0.25,
reasoning: 0.15,
decision: 0.18
};
uncertainty = taskTypeUncertainty[request.taskType];
// Domain complexity affects uncertainty
uncertainty += request.complexity * 0.1;
return Math.min(0.8, Math.max(0.05, uncertainty));
}
/**
* Calibrate confidence based on historical performance
*/
private calibrateConfidence(baseConfidence: number, historicalPerformance: any): number {
if (historicalPerformance.sampleCount < this.config.calibration.minSamplesForCalibration) {
return baseConfidence;
}
// Adjust based on historical calibration error
const adjustment = historicalPerformance.calibrationError *
(historicalPerformance.avgConfidence > historicalPerformance.avgAccuracy ? -1 : 1);
return Math.min(0.95, Math.max(0.05, baseConfidence + adjustment));
}
/**
* Generate confidence recommendations
*/
private generateConfidenceRecommendations(confidence: ConfidenceAssessment): string[] {
const recommendations = [];
if (confidence.overall < 0.5) {
recommendations.push('Consider gathering more information before proceeding');
recommendations.push('Review input data quality and completeness');
}
if (confidence.breakdown.epistemic > 0.6) {
recommendations.push('High knowledge uncertainty - consider domain expert consultation');
recommendations.push('Increase training data for this domain');
}
if (confidence.breakdown.aleatoric > 0.5) {
recommendations.push('High data uncertainty - verify input data quality');
recommendations.push('Consider ensemble methods to reduce uncertainty');
}
if (confidence.aspects.factualAccuracy < 0.6) {
recommendations.push('Low factual accuracy confidence - verify facts before proceeding');
}
return recommendations;
}
/**
* Assess risk level based on confidence and stakes
*/
private assessRiskLevel(
confidence: ConfidenceAssessment,
stakes: string
): 'low' | 'medium' | 'high' | 'critical' {
const stakesWeight = { low: 0.25, medium: 0.5, high: 0.75, critical: 1.0 };
const riskScore = (1 - confidence.overall) * stakesWeight[stakes as keyof typeof stakesWeight];
if (riskScore > 0.7) return 'critical';
if (riskScore > 0.5) return 'high';
if (riskScore > 0.3) return 'medium';
return 'low';
}
/**
* Determine if should proceed with prediction
*/
private shouldProceedWithPrediction(
confidence: ConfidenceAssessment,
riskLevel: string
): boolean {
const thresholds = {
low: 0.3,
medium: 0.5,
high: 0.7,
critical: 0.9
};
return confidence.overall >= thresholds[riskLevel as keyof typeof thresholds];
}
/**
* Generate calibration recommendations
*/
private generateCalibrationRecommendations(
calibrationAnalysis: any,
stats: any
): string[] {
const recommendations = [];
if (calibrationAnalysis.ece > 0.15) {
recommendations.push('Improve confidence calibration through temperature scaling');
recommendations.push('Collect more diverse training data');
}
if (stats.overconfidenceRate > 0.3) {
recommendations.push('Reduce overconfidence by lowering confidence thresholds');
recommendations.push('Implement confidence penalties in training');
}
if (stats.underconfidenceRate > 0.3) {
recommendations.push('Address underconfidence by improving model training');
recommendations.push('Review uncertainty estimation methods');
}
return recommendations;
}
/**
* Calculate trends from timeline data
*/
private calculateTrends(timeline: any[]): {
confidenceTrend: 'improving' | 'stable' | 'declining';
accuracyTrend: 'improving' | 'stable' | 'declining';
calibrationTrend: 'improving' | 'stable' | 'declining';
} {
if (timeline.length < 3) {
return {
confidenceTrend: 'stable',
accuracyTrend: 'stable',
calibrationTrend: 'stable'
};
}
const recent = timeline.slice(-7);
const older = timeline.slice(0, Math.max(1, timeline.length - 7));
const recentAvgConfidence = recent.reduce((sum, t) => sum + t.avgConfidence, 0) / recent.length;
const olderAvgConfidence = older.reduce((sum, t) => sum + t.avgConfidence, 0) / older.length;
const recentAvgAccuracy = recent.reduce((sum, t) => sum + (t.accuracy || 0), 0) / recent.length;
const olderAvgAccuracy = older.reduce((sum, t) => sum + (t.accuracy || 0), 0) / older.length;
const recentAvgCalibration = recent.reduce((sum, t) => sum + (t.calibrationError || 0), 0) / recent.length;
const olderAvgCalibration = older.reduce((sum, t) => sum + (t.calibrationError || 0), 0) / older.length;
return {
confidenceTrend: this.determineTrend(recentAvgConfidence, olderAvgConfidence),
accuracyTrend: this.determineTrend(recentAvgAccuracy, olderAvgAccuracy),
calibrationTrend: this.determineTrend(olderAvgCalibration, recentAvgCalibration) // Lower is better for calibration error
};
}
/**
* Determine trend direction
*/
private determineTrend(recent: number, older: number): 'improving' | 'stable' | 'declining' {
const change = (recent - older) / older;
if (change > 0.05) return 'improving';
if (change < -0.05) return 'declining';
return 'stable';
}
/**
* Generate trend insights
*/
private generateTrendInsights(timeline: any[], trends: any): string[] {
const insights = [];
if (trends.confidenceTrend === 'declining') {
insights.push('Confidence levels are declining - review model performance');
}
if (trends.accuracyTrend === 'improving') {
insights.push('Accuracy is improving - current approach is working well');
}
if (trends.calibrationTrend === 'declining') {
insights.push('Calibration is getting worse - review confidence assessment methods');
}
if (timeline.length > 0) {
const avgPredictions = timeline.reduce((sum, t) => sum + t.predictionCount, 0) / timeline.length;
if (avgPredictions > 100) {
insights.push('High prediction volume - ensure quality is maintained');
}
}
return insights;
}
/**
* Calculate performance metrics
*/
private async calculatePerformanceMetrics(agentId: string, days: number): Promise<{
avgComputationTime: number;
avgMemoryUsage: number;
efficiency: number;
}> {
// This would typically use aggregation to calculate performance metrics
// For now, return simulated metrics
return {
avgComputationTime: 150, // ms
avgMemoryUsage: 25, // MB
efficiency: 0.85 // Efficiency score
};
}
/**
* Cleanup expired confidence records
*/
async cleanup(): Promise<number> {
return await this.confidenceCollection.cleanupExpiredRecords();
}
}