universal-ai-brain
Version:
🧠 UNIVERSAL AI BRAIN 3.3 - The world's most advanced cognitive architecture with 24 specialized systems, MongoDB 8.1 $rankFusion hybrid search, latest Voyage 3.5 embeddings, and framework-agnostic design. Works with Mastra, Vercel AI, LangChain, OpenAI A
743 lines (680 loc) • 22.9 kB
text/typescript
/**
* @file ConfidenceTrackingCollection - MongoDB collection for multi-dimensional confidence tracking
*
* This collection demonstrates MongoDB's statistical aggregation capabilities for
* uncertainty quantification, confidence calibration, and prediction accuracy tracking.
* Showcases MongoDB's advanced analytics for cognitive confidence data.
*
* Features:
* - Multi-dimensional confidence tracking with statistical aggregations
* - Uncertainty quantification and calibration analysis
* - Prediction accuracy tracking with time-series optimization
* - Confidence decay and temporal confidence modeling
* - Real-time confidence monitoring and alerting
*/
import { Db, ObjectId } from 'mongodb';
import { BaseCollection, BaseDocument } from './BaseCollection';
export interface ConfidenceRecord extends BaseDocument {
agentId: string;
sessionId?: string;
timestamp: Date;
// Context of the confidence measurement
context: {
task: string; // What task/decision this confidence relates to
taskType: 'prediction' | 'classification' | 'generation' | 'reasoning' | 'decision';
domain: string; // Domain of expertise (e.g., 'customer_service', 'technical_support')
complexity: number; // 0-1 scale of task complexity
novelty: number; // 0-1 scale of how novel/unfamiliar the task is
stakes: 'low' | 'medium' | 'high' | 'critical'; // Importance of being correct
};
// Multi-dimensional confidence measurements
confidence: {
overall: number; // 0-1 overall confidence score
epistemic: number; // 0-1 knowledge-based uncertainty (what we don't know)
aleatoric: number; // 0-1 data-based uncertainty (inherent randomness)
calibrated: number; // 0-1 calibrated confidence (adjusted for historical accuracy)
// Confidence breakdown by aspect
aspects: {
factualAccuracy: number; // Confidence in factual correctness
completeness: number; // Confidence in response completeness
relevance: number; // Confidence in response relevance
clarity: number; // Confidence in response clarity
appropriateness: number; // Confidence in response appropriateness
};
// Confidence sources
sources: {
modelIntrinsic: number; // Confidence from the AI model itself
retrievalQuality: number; // Confidence from information retrieval
contextRelevance: number; // Confidence from context matching
historicalPerformance: number; // Confidence from past performance
domainExpertise: number; // Confidence from domain knowledge
};
};
// Prediction/decision details
prediction: {
type: 'binary' | 'multiclass' | 'regression' | 'ranking' | 'generation';
value: any; // The actual prediction/decision made
alternatives?: Array<{
value: any;
confidence: number;
reasoning: string;
}>;
probability?: number; // Predicted probability (for probabilistic predictions)
distribution?: Array<{ value: any; probability: number }>; // Full probability distribution
};
// Actual outcome (filled in later for calibration)
actual?: {
value: any; // The actual correct answer/outcome
correct: boolean; // Whether the prediction was correct
accuracy?: number; // Accuracy score (0-1) for continuous predictions
feedback?: string; // Human feedback on the prediction
verificationTime: Date; // When the outcome was verified
verificationSource: 'automatic' | 'human' | 'external_system';
};
// Calibration metrics (computed)
calibration?: {
brier: number; // Brier score for probabilistic predictions
logLoss: number; // Log loss for probabilistic predictions
reliability: number; // Reliability (calibration) score
resolution: number; // Resolution (discrimination) score
sharpness: number; // Sharpness (confidence) score
overconfidence: number; // Measure of overconfidence bias
underconfidence: number; // Measure of underconfidence bias
};
// Temporal aspects
temporal: {
decayRate: number; // How quickly this confidence should decay (per hour)
halfLife: number; // Half-life of confidence relevance (hours)
expiresAt?: Date; // When this confidence measurement expires
seasonality?: string; // Time-based patterns (e.g., 'weekday', 'business_hours')
};
// Learning and adaptation
learning: {
surprisal: number; // How surprising was the actual outcome
informationGain: number; // How much we learned from this instance
modelUpdate: boolean; // Whether this should trigger model updates
confidenceAdjustment: number; // Suggested adjustment to future confidence
};
// Metadata
metadata: {
framework: string;
model: string;
version: string;
features: string[]; // Features used for this prediction
computationTime: number; // Time taken to compute (ms)
memoryUsage?: number; // Memory used (MB)
};
}
export interface ConfidenceFilter {
agentId?: string;
sessionId?: string;
'context.taskType'?: string;
'context.domain'?: string;
'context.stakes'?: string;
'confidence.overall'?: { $gte?: number; $lte?: number };
'prediction.type'?: string;
timestamp?: { $gte?: Date; $lte?: Date };
'actual.correct'?: boolean;
}
export interface ConfidenceAnalyticsOptions {
timeRange?: { start: Date; end: Date };
groupBy?: 'taskType' | 'domain' | 'stakes' | 'hour' | 'day';
includeUnverified?: boolean;
minConfidence?: number;
maxConfidence?: number;
}
/**
* ConfidenceTrackingCollection - Manages multi-dimensional confidence tracking
*
* This collection demonstrates MongoDB's advanced statistical capabilities:
* - Complex aggregation pipelines for confidence analytics
* - Statistical functions for calibration analysis
* - Time-series optimization for confidence tracking
* - Multi-dimensional indexing for confidence queries
*/
export class ConfidenceTrackingCollection extends BaseCollection<ConfidenceRecord> {
protected collectionName = 'agent_confidence_tracking';
constructor(db: Db) {
super(db);
this.collection = db.collection<ConfidenceRecord>(this.collectionName);
}
/**
* Create indexes optimized for confidence tracking and analytics
*/
async createIndexes(): Promise<void> {
try {
// Agent and timestamp index for time-series queries
await this.collection.createIndex({
agentId: 1,
timestamp: -1
}, {
name: 'agent_timestamp_index',
background: true
});
// Confidence analytics index
await this.collection.createIndex({
'confidence.overall': -1,
'context.taskType': 1,
'context.domain': 1
}, {
name: 'confidence_analytics_index',
background: true
});
// Calibration analysis index
await this.collection.createIndex({
'actual.correct': 1,
'prediction.probability': 1,
'confidence.overall': 1
}, {
name: 'calibration_analysis_index',
background: true,
sparse: true
});
// Task type and domain index
await this.collection.createIndex({
'context.taskType': 1,
'context.domain': 1,
'context.stakes': 1,
timestamp: -1
}, {
name: 'task_domain_stakes_index',
background: true
});
// TTL index for confidence expiration
await this.collection.createIndex({
'temporal.expiresAt': 1
}, {
name: 'confidence_expiration_ttl',
expireAfterSeconds: 0,
background: true,
sparse: true
});
// Performance tracking index
await this.collection.createIndex({
'metadata.computationTime': 1,
'confidence.overall': -1
}, {
name: 'performance_tracking_index',
background: true
});
console.log('✅ ConfidenceTrackingCollection indexes created successfully');
} catch (error) {
console.error('❌ Error creating ConfidenceTrackingCollection indexes:', error);
throw error;
}
}
/**
* Record a new confidence measurement
*/
async recordConfidence(confidence: Omit<ConfidenceRecord, '_id' | 'createdAt' | 'updatedAt'>): Promise<ObjectId> {
const confidenceWithTimestamp = {
...confidence,
createdAt: new Date(),
updatedAt: new Date()
};
const result = await this.collection.insertOne(confidenceWithTimestamp);
return result.insertedId;
}
/**
* Update confidence record with actual outcome for calibration
*/
async updateWithActual(
confidenceId: ObjectId,
actual: ConfidenceRecord['actual']
): Promise<void> {
if (!actual) return;
// Calculate calibration metrics
const record = await this.collection.findOne({ _id: confidenceId });
if (!record) {
throw new Error('Confidence record not found');
}
const calibration = this.calculateCalibrationMetrics(record, actual);
const learning = this.calculateLearningMetrics(record, actual);
await this.collection.updateOne(
{ _id: confidenceId },
{
$set: {
actual,
calibration,
learning,
updatedAt: new Date()
}
}
);
}
/**
* Get confidence statistics for an agent
*/
async getConfidenceStats(agentId: string, days: number = 7): Promise<{
totalPredictions: number;
verifiedPredictions: number;
avgConfidence: number;
accuracy: number;
calibrationError: number;
overconfidenceRate: number;
underconfidenceRate: number;
confidenceByDomain: Array<{ domain: string; avgConfidence: number; accuracy: number }>;
}> {
const startDate = new Date(Date.now() - (days * 24 * 60 * 60 * 1000));
const stats = await this.collection.aggregate([
{
$match: {
agentId,
timestamp: { $gte: startDate }
}
},
{
$group: {
_id: null,
totalPredictions: { $sum: 1 },
verifiedPredictions: {
$sum: { $cond: [{ $ne: ['$actual', null] }, 1, 0] }
},
avgConfidence: { $avg: '$confidence.overall' },
correctPredictions: {
$sum: { $cond: [{ $eq: ['$actual.correct', true] }, 1, 0] }
},
totalCalibrationError: {
$sum: {
$cond: [
{ $ne: ['$calibration.reliability', null] },
{ $abs: { $subtract: ['$confidence.overall', '$prediction.probability'] } },
0
]
}
},
overconfidentCount: {
$sum: {
$cond: [
{
$and: [
{ $ne: ['$actual.correct', null] },
{ $gt: ['$confidence.overall', 0.8] },
{ $eq: ['$actual.correct', false] }
]
},
1,
0
]
}
},
underconfidentCount: {
$sum: {
$cond: [
{
$and: [
{ $ne: ['$actual.correct', null] },
{ $lt: ['$confidence.overall', 0.5] },
{ $eq: ['$actual.correct', true] }
]
},
1,
0
]
}
}
}
}
]).toArray();
// Get confidence by domain
const domainStats = await this.collection.aggregate([
{
$match: {
agentId,
timestamp: { $gte: startDate }
}
},
{
$group: {
_id: '$context.domain',
avgConfidence: { $avg: '$confidence.overall' },
totalPredictions: { $sum: 1 },
correctPredictions: {
$sum: { $cond: [{ $eq: ['$actual.correct', true] }, 1, 0] }
}
}
},
{
$project: {
domain: '$_id',
avgConfidence: { $round: ['$avgConfidence', 3] },
accuracy: {
$cond: [
{ $gt: ['$totalPredictions', 0] },
{ $round: [{ $divide: ['$correctPredictions', '$totalPredictions'] }, 3] },
0
]
},
_id: 0
}
}
]).toArray();
const result = stats[0] || {
totalPredictions: 0,
verifiedPredictions: 0,
avgConfidence: 0,
correctPredictions: 0,
totalCalibrationError: 0,
overconfidentCount: 0,
underconfidentCount: 0
};
return {
totalPredictions: result.totalPredictions,
verifiedPredictions: result.verifiedPredictions,
avgConfidence: result.avgConfidence || 0,
accuracy: result.verifiedPredictions > 0 ?
(result.correctPredictions / result.verifiedPredictions) : 0,
calibrationError: result.verifiedPredictions > 0 ?
(result.totalCalibrationError / result.verifiedPredictions) : 0,
overconfidenceRate: result.verifiedPredictions > 0 ?
(result.overconfidentCount / result.verifiedPredictions) : 0,
underconfidenceRate: result.verifiedPredictions > 0 ?
(result.underconfidentCount / result.verifiedPredictions) : 0,
confidenceByDomain: domainStats as Array<{ domain: string; avgConfidence: number; accuracy: number }>
};
}
/**
* Analyze confidence calibration using MongoDB aggregation
*/
async analyzeCalibration(agentId: string, options: ConfidenceAnalyticsOptions = {}): Promise<{
calibrationCurve: Array<{ confidenceBin: number; accuracy: number; count: number }>;
reliabilityDiagram: Array<{ predicted: number; observed: number; count: number }>;
brierScore: number;
logLoss: number;
ece: number; // Expected Calibration Error
mce: number; // Maximum Calibration Error
}> {
const filter: any = { agentId, 'actual.correct': { $ne: null } };
if (options.timeRange) {
filter.timestamp = {
$gte: options.timeRange.start,
$lte: options.timeRange.end
};
}
// Calibration curve analysis
const calibrationCurve = await this.collection.aggregate([
{ $match: filter },
{
$addFields: {
confidenceBin: {
$multiply: [
{ $floor: { $multiply: ['$confidence.overall', 10] } },
0.1
]
}
}
},
{
$group: {
_id: '$confidenceBin',
accuracy: { $avg: { $cond: ['$actual.correct', 1, 0] } },
count: { $sum: 1 },
avgConfidence: { $avg: '$confidence.overall' }
}
},
{
$project: {
confidenceBin: '$_id',
accuracy: { $round: ['$accuracy', 3] },
count: 1,
_id: 0
}
},
{ $sort: { confidenceBin: 1 } }
]).toArray();
// Reliability diagram (predicted vs observed)
const reliabilityDiagram = await this.collection.aggregate([
{ $match: filter },
{
$addFields: {
predictedBin: {
$cond: [
{ $ne: ['$prediction.probability', null] },
{
$multiply: [
{ $floor: { $multiply: ['$prediction.probability', 10] } },
0.1
]
},
{
$multiply: [
{ $floor: { $multiply: ['$confidence.overall', 10] } },
0.1
]
}
]
}
}
},
{
$group: {
_id: '$predictedBin',
observed: { $avg: { $cond: ['$actual.correct', 1, 0] } },
predicted: { $avg: '$confidence.overall' },
count: { $sum: 1 }
}
},
{
$project: {
predicted: { $round: ['$predicted', 3] },
observed: { $round: ['$observed', 3] },
count: 1,
_id: 0
}
},
{ $sort: { predicted: 1 } }
]).toArray();
// Calculate overall metrics
const overallMetrics = await this.collection.aggregate([
{ $match: filter },
{
$group: {
_id: null,
brierScore: {
$avg: {
$pow: [
{
$subtract: [
{ $cond: ['$actual.correct', 1, 0] },
'$confidence.overall'
]
},
2
]
}
},
logLoss: {
$avg: {
$cond: [
'$actual.correct',
{ $multiply: [-1, { $ln: '$confidence.overall' }] },
{ $multiply: [-1, { $ln: { $subtract: [1, '$confidence.overall'] } }] }
]
}
},
calibrationErrors: {
$push: {
$abs: {
$subtract: [
{ $cond: ['$actual.correct', 1, 0] },
'$confidence.overall'
]
}
}
}
}
}
]).toArray();
const metrics = overallMetrics[0] || {
brierScore: 0,
logLoss: 0,
calibrationErrors: []
};
// Calculate ECE and MCE
const calibrationErrors = metrics.calibrationErrors || [];
const ece = calibrationErrors.length > 0 ?
calibrationErrors.reduce((sum: number, err: number) => sum + err, 0) / calibrationErrors.length : 0;
const mce = calibrationErrors.length > 0 ? Math.max(...calibrationErrors) : 0;
return {
calibrationCurve: calibrationCurve as Array<{ confidenceBin: number; accuracy: number; count: number }>,
reliabilityDiagram: reliabilityDiagram as Array<{ predicted: number; observed: number; count: number }>,
brierScore: metrics.brierScore || 0,
logLoss: metrics.logLoss || 0,
ece,
mce
};
}
/**
* Get confidence trends over time
*/
async getConfidenceTrends(
agentId: string,
days: number = 30,
granularity: 'hour' | 'day' = 'day'
): Promise<Array<{
timestamp: Date;
avgConfidence: number;
accuracy: number;
predictionCount: number;
calibrationError: number;
}>> {
const startDate = new Date(Date.now() - (days * 24 * 60 * 60 * 1000));
const dateFormat = granularity === 'hour' ?
{ $dateToString: { format: '%Y-%m-%d %H:00:00', date: '$timestamp' } } :
{ $dateToString: { format: '%Y-%m-%d', date: '$timestamp' } };
return await this.collection.aggregate([
{
$match: {
agentId,
timestamp: { $gte: startDate }
}
},
{
$group: {
_id: dateFormat,
avgConfidence: { $avg: '$confidence.overall' },
predictionCount: { $sum: 1 },
correctPredictions: {
$sum: { $cond: [{ $eq: ['$actual.correct', true] }, 1, 0] }
},
verifiedPredictions: {
$sum: { $cond: [{ $ne: ['$actual.correct', null] }, 1, 0] }
},
calibrationError: {
$avg: {
$cond: [
{ $ne: ['$actual.correct', null] },
{
$abs: {
$subtract: [
{ $cond: ['$actual.correct', 1, 0] },
'$confidence.overall'
]
}
},
null
]
}
}
}
},
{
$project: {
timestamp: { $dateFromString: { dateString: '$_id' } },
avgConfidence: { $round: ['$avgConfidence', 3] },
accuracy: {
$cond: [
{ $gt: ['$verifiedPredictions', 0] },
{ $round: [{ $divide: ['$correctPredictions', '$verifiedPredictions'] }, 3] },
null
]
},
predictionCount: 1,
calibrationError: { $round: ['$calibrationError', 3] },
_id: 0
}
},
{ $sort: { timestamp: 1 } }
]).toArray() as Array<{ timestamp: Date; avgConfidence: number; accuracy: number; predictionCount: number; calibrationError: number }>;
}
/**
* Calculate calibration metrics for a prediction
*/
private calculateCalibrationMetrics(
record: ConfidenceRecord,
actual: ConfidenceRecord['actual']
): ConfidenceRecord['calibration'] {
if (!actual) return undefined;
const predicted = record.prediction.probability || record.confidence.overall;
const correct = actual.correct ? 1 : 0;
// Brier score
const brier = Math.pow(correct - predicted, 2);
// Log loss
const logLoss = correct === 1 ?
-Math.log(Math.max(predicted, 1e-15)) :
-Math.log(Math.max(1 - predicted, 1e-15));
// Reliability (calibration error)
const reliability = Math.abs(correct - predicted);
// Resolution (discrimination ability)
const resolution = Math.pow(predicted - 0.5, 2);
// Sharpness (confidence level)
const sharpness = Math.abs(predicted - 0.5);
// Overconfidence/underconfidence
const overconfidence = predicted > 0.8 && correct === 0 ? predicted - 0.8 : 0;
const underconfidence = predicted < 0.5 && correct === 1 ? 0.5 - predicted : 0;
return {
brier,
logLoss,
reliability,
resolution,
sharpness,
overconfidence,
underconfidence
};
}
/**
* Calculate learning metrics from prediction outcome
*/
private calculateLearningMetrics(
record: ConfidenceRecord,
actual: ConfidenceRecord['actual']
): ConfidenceRecord['learning'] {
if (!actual) {
return {
surprisal: 0,
informationGain: 0,
modelUpdate: false,
confidenceAdjustment: 0
};
}
const predicted = record.prediction.probability || record.confidence.overall;
const correct = actual.correct ? 1 : 0;
// Surprisal (negative log probability of actual outcome)
const surprisal = correct === 1 ?
-Math.log2(Math.max(predicted, 1e-15)) :
-Math.log2(Math.max(1 - predicted, 1e-15));
// Information gain (reduction in uncertainty)
const informationGain = surprisal > 2 ? surprisal / 10 : 0; // Normalize
// Whether this should trigger model updates
const modelUpdate = surprisal > 3 || Math.abs(predicted - correct) > 0.5;
// Suggested confidence adjustment
const confidenceAdjustment = correct === 1 ?
Math.max(0, 0.8 - predicted) :
Math.min(0, 0.2 - predicted);
return {
surprisal,
informationGain,
modelUpdate,
confidenceAdjustment
};
}
/**
* Clean up expired confidence records
*/
async cleanupExpiredRecords(): Promise<number> {
const result = await this.collection.deleteMany({
'temporal.expiresAt': { $lte: new Date() }
});
return result.deletedCount;
}
}