UNPKG

universal-ai-brain

Version:

🧠 UNIVERSAL AI BRAIN 3.3 - The world's most advanced cognitive architecture with 24 specialized systems, MongoDB 8.1 $rankFusion hybrid search, latest Voyage 3.5 embeddings, and framework-agnostic design. Works with Mastra, Vercel AI, LangChain, OpenAI A

617 lines (539 loc) • 21 kB
/** * @file HallucinationDetector - Advanced hallucination detection and factual accuracy validation * * This system compares AI responses against provided context, detects factual inconsistencies, * identifies unsupported claims, and provides confidence scores for response accuracy using * MongoDB for analytics and pattern recognition. * * Features: * - Context-grounded response validation * - Factual consistency checking * - Unsupported claim detection * - Confidence scoring and uncertainty quantification * - Real-time hallucination analytics with MongoDB * - Framework-agnostic accuracy assessment * - Automated fact-checking pipeline */ import { TracingCollection } from '../collections/TracingCollection'; import { MemoryCollection } from '../collections/MemoryCollection'; import { MongoVectorStore } from '../vector/MongoVectorStore'; export interface HallucinationAnalysis { isGrounded: boolean; confidenceScore: number; // 0-1 scale factualAccuracy: number; // 0-1 scale contextAlignment: number; // 0-1 scale detectedIssues: HallucinationIssue[]; supportingEvidence: ContextEvidence[]; unsupportedClaims: UnsupportedClaim[]; overallRisk: 'low' | 'medium' | 'high' | 'critical'; recommendations: string[]; } export interface HallucinationIssue { type: 'factual_error' | 'unsupported_claim' | 'context_contradiction' | 'fabricated_detail' | 'temporal_inconsistency'; severity: 'low' | 'medium' | 'high' | 'critical'; description: string; location: { startIndex: number; endIndex: number; text: string; }; confidence: number; suggestedCorrection?: string; } export interface ContextEvidence { claim: string; supportingContext: string; relevanceScore: number; sourceId: string; sourceType: 'document' | 'memory' | 'knowledge_base' | 'external'; } export interface UnsupportedClaim { claim: string; location: { startIndex: number; endIndex: number; }; confidence: number; reason: 'no_context_support' | 'contradicts_context' | 'fabricated_information' | 'speculative_statement'; suggestedAction: 'remove' | 'qualify' | 'verify' | 'flag'; } export interface HallucinationMetrics { timeRange: { start: Date; end: Date; }; totalAnalyses: number; hallucinationRate: number; averageConfidence: number; issuesByType: Record<string, number>; issuesBySeverity: Record<string, number>; frameworkComparison: { framework: string; hallucinationRate: number; averageConfidence: number; }[]; improvementTrends: { date: Date; hallucinationRate: number; confidence: number; }[]; } export interface FactCheckResult { claim: string; isSupported: boolean; confidence: number; supportingEvidence: string[]; contradictingEvidence: string[]; uncertainty: number; } /** * HallucinationDetector - Advanced hallucination detection and factual accuracy validation * * Uses sophisticated algorithms and MongoDB analytics to detect hallucinations, * validate factual accuracy, and ensure AI responses are grounded in provided context. */ export class HallucinationDetector { private tracingCollection: TracingCollection; private memoryCollection: MemoryCollection; private vectorStore: MongoVectorStore; constructor( tracingCollection: TracingCollection, memoryCollection: MemoryCollection, vectorStore: MongoVectorStore ) { this.tracingCollection = tracingCollection; this.memoryCollection = memoryCollection; this.vectorStore = vectorStore; } /** * Analyze response for hallucinations and factual accuracy */ async analyzeResponse( response: string, context: { providedContext: string[]; originalQuery: string; framework: string; sessionId: string; traceId?: string; } ): Promise<HallucinationAnalysis> { // Extract claims from the response const claims = await this.extractClaims(response); // Analyze each claim against provided context const claimAnalyses = await Promise.all( claims.map(claim => this.analyzeClaim(claim, context.providedContext)) ); // Detect factual inconsistencies const factualIssues = await this.detectFactualInconsistencies(response, context.providedContext); // Check for fabricated details const fabricatedDetails = await this.detectFabricatedDetails(response, context.providedContext); // Analyze temporal consistency const temporalIssues = await this.analyzeTemporalConsistency(response); // Combine all detected issues const allIssues: HallucinationIssue[] = [ ...factualIssues, ...fabricatedDetails, ...temporalIssues ]; // Calculate confidence scores const contextAlignment = this.calculateContextAlignment(response, context.providedContext); const factualAccuracy = this.calculateFactualAccuracy(claimAnalyses); const confidenceScore = (contextAlignment + factualAccuracy) / 2; // Identify unsupported claims const unsupportedClaims = claimAnalyses .filter(analysis => !analysis.isSupported) .map(analysis => this.createUnsupportedClaim(analysis, response)); // Determine overall risk const overallRisk = this.calculateOverallRisk(allIssues, confidenceScore); // Generate supporting evidence const supportingEvidence = claimAnalyses .filter(analysis => analysis.isSupported) .map(analysis => this.createContextEvidence(analysis)); // Generate recommendations const recommendations = this.generateRecommendations(allIssues, unsupportedClaims, overallRisk); // Log analysis for learning await this.logHallucinationAnalysis({ response, context, confidenceScore, factualAccuracy, contextAlignment, issueCount: allIssues.length, overallRisk }); return { isGrounded: confidenceScore > 0.7 && allIssues.filter(i => i.severity === 'critical' || i.severity === 'high').length === 0, confidenceScore, factualAccuracy, contextAlignment, detectedIssues: allIssues, supportingEvidence, unsupportedClaims, overallRisk, recommendations }; } /** * Generate hallucination metrics using MongoDB aggregation */ async generateHallucinationMetrics(timeRange: { start: Date; end: Date }): Promise<HallucinationMetrics> { // Use MongoDB aggregation for comprehensive hallucination analytics const metricsPipeline = [ { $match: { timestamp: { $gte: timeRange.start, $lte: timeRange.end }, 'metadata.type': 'hallucination_analysis' } }, { $facet: { // Total analyses and hallucination rate overallStats: [ { $group: { _id: null, totalAnalyses: { $sum: 1 }, hallucinationCount: { $sum: { $cond: [{ $lt: ['$metadata.confidenceScore', 0.7] }, 1, 0] } }, avgConfidence: { $avg: '$metadata.confidenceScore' }, avgFactualAccuracy: { $avg: '$metadata.factualAccuracy' } } } ], // Issues by type issuesByType: [ { $unwind: '$metadata.detectedIssues' }, { $group: { _id: '$metadata.detectedIssues.type', count: { $sum: 1 } } } ], // Issues by severity issuesBySeverity: [ { $unwind: '$metadata.detectedIssues' }, { $group: { _id: '$metadata.detectedIssues.severity', count: { $sum: 1 } } } ], // Framework comparison frameworkStats: [ { $group: { _id: '$metadata.framework', totalAnalyses: { $sum: 1 }, hallucinationCount: { $sum: { $cond: [{ $lt: ['$metadata.confidenceScore', 0.7] }, 1, 0] } }, avgConfidence: { $avg: '$metadata.confidenceScore' } } } ], // Daily trends dailyTrends: [ { $group: { _id: { $dateToString: { format: '%Y-%m-%d', date: '$timestamp' } }, hallucinationCount: { $sum: { $cond: [{ $lt: ['$metadata.confidenceScore', 0.7] }, 1, 0] } }, totalCount: { $sum: 1 }, avgConfidence: { $avg: '$metadata.confidenceScore' } } }, { $sort: { '_id': 1 } } ] } } ]; const results = await this.memoryCollection.aggregate(metricsPipeline); const facetResults = results[0]; const overallStats = facetResults.overallStats[0] || { totalAnalyses: 0, hallucinationCount: 0, avgConfidence: 0 }; const hallucinationRate = overallStats.totalAnalyses > 0 ? (overallStats.hallucinationCount / overallStats.totalAnalyses) * 100 : 0; return { timeRange, totalAnalyses: overallStats.totalAnalyses, hallucinationRate: Math.round(hallucinationRate * 100) / 100, averageConfidence: Math.round(overallStats.avgConfidence * 100) / 100, issuesByType: this.arrayToRecord(facetResults.issuesByType), issuesBySeverity: this.arrayToRecord(facetResults.issuesBySeverity), frameworkComparison: facetResults.frameworkStats.map((stat: any) => ({ framework: stat._id, hallucinationRate: stat.totalAnalyses > 0 ? (stat.hallucinationCount / stat.totalAnalyses) * 100 : 0, averageConfidence: Math.round(stat.avgConfidence * 100) / 100 })), improvementTrends: facetResults.dailyTrends.map((trend: any) => ({ date: new Date(trend._id), hallucinationRate: trend.totalCount > 0 ? (trend.hallucinationCount / trend.totalCount) * 100 : 0, confidence: Math.round(trend.avgConfidence * 100) / 100 })) }; } /** * Fact-check specific claims against knowledge base */ async factCheckClaim(claim: string, context: string[]): Promise<FactCheckResult> { // Search for supporting evidence in context const supportingEvidence: string[] = []; const contradictingEvidence: string[] = []; for (const contextItem of context) { const similarity = await this.calculateSemanticSimilarity(claim, contextItem); if (similarity > 0.8) { // High similarity suggests support supportingEvidence.push(contextItem); } else if (similarity > 0.6) { // Check for contradiction const isContradiction = await this.detectContradiction(claim, contextItem); if (isContradiction) { contradictingEvidence.push(contextItem); } } } // Calculate confidence and uncertainty const confidence = supportingEvidence.length > 0 ? Math.min(0.9, supportingEvidence.length * 0.3) : Math.max(0.1, 1 - contradictingEvidence.length * 0.4); const uncertainty = 1 - confidence; const isSupported = supportingEvidence.length > 0 && contradictingEvidence.length === 0; return { claim, isSupported, confidence, supportingEvidence, contradictingEvidence, uncertainty }; } // Private helper methods private async extractClaims(response: string): Promise<string[]> { // Extract factual claims from response // This would use NLP techniques to identify statements that can be fact-checked const sentences = response.split(/[.!?]+/).filter(s => s.trim().length > 10); // Filter for factual claims (statements that can be verified) const claims = sentences.filter(sentence => { const trimmed = sentence.trim(); // Look for statements that contain verifiable information return trimmed.length > 20 && !trimmed.toLowerCase().includes('i think') && !trimmed.toLowerCase().includes('maybe') && !trimmed.toLowerCase().includes('possibly'); }); return claims; } private async analyzeClaim(claim: string, context: string[]): Promise<FactCheckResult> { return this.factCheckClaim(claim, context); } private async detectFactualInconsistencies(response: string, context: string[]): Promise<HallucinationIssue[]> { const issues: HallucinationIssue[] = []; // Check for numerical inconsistencies const numbers = response.match(/\d+/g) || []; for (const number of numbers) { const contextHasNumber = context.some(ctx => ctx.includes(number)); if (!contextHasNumber && parseInt(number) > 1000) { // Large numbers without context support are suspicious const index = response.indexOf(number); issues.push({ type: 'factual_error', severity: 'medium', description: `Large number "${number}" not supported by provided context`, location: { startIndex: index, endIndex: index + number.length, text: number }, confidence: 0.7 }); } } return issues; } private async detectFabricatedDetails(response: string, context: string[]): Promise<HallucinationIssue[]> { const issues: HallucinationIssue[] = []; // Check for specific details that aren't in context const specificPatterns = [ /\b\d{4}-\d{2}-\d{2}\b/, // Dates /\b[A-Z][a-z]+ [A-Z][a-z]+\b/, // Proper names /\$\d+(?:,\d{3})*(?:\.\d{2})?\b/ // Money amounts ]; for (const pattern of specificPatterns) { const matches = response.match(pattern); if (matches) { for (const match of matches) { const isInContext = context.some(ctx => ctx.includes(match)); if (!isInContext) { const index = response.indexOf(match); issues.push({ type: 'fabricated_detail', severity: 'high', description: `Specific detail "${match}" not found in provided context`, location: { startIndex: index, endIndex: index + match.length, text: match }, confidence: 0.8 }); } } } } return issues; } private async analyzeTemporalConsistency(response: string): Promise<HallucinationIssue[]> { const issues: HallucinationIssue[] = []; // Check for temporal inconsistencies const timeReferences = response.match(/\b(yesterday|today|tomorrow|last week|next month|in \d+ years?)\b/gi) || []; if (timeReferences.length > 0) { // For now, flag any specific temporal references as potentially problematic // In a real implementation, this would check against current date and context for (const timeRef of timeReferences) { const index = response.indexOf(timeRef); issues.push({ type: 'temporal_inconsistency', severity: 'low', description: `Temporal reference "${timeRef}" may not be accurate without current date context`, location: { startIndex: index, endIndex: index + timeRef.length, text: timeRef }, confidence: 0.6 }); } } return issues; } private calculateContextAlignment(response: string, context: string[]): number { if (context.length === 0) return 0.5; // Neutral if no context // Calculate how well the response aligns with provided context let totalAlignment = 0; let alignmentCount = 0; const responseWords = response.toLowerCase().split(/\s+/); for (const contextItem of context) { const contextWords = contextItem.toLowerCase().split(/\s+/); const commonWords = responseWords.filter(word => contextWords.includes(word) && word.length > 3 ); const alignment = commonWords.length / Math.max(responseWords.length, contextWords.length); totalAlignment += alignment; alignmentCount++; } return alignmentCount > 0 ? totalAlignment / alignmentCount : 0.5; } private calculateFactualAccuracy(claimAnalyses: FactCheckResult[]): number { if (claimAnalyses.length === 0) return 1.0; // No claims to verify const supportedClaims = claimAnalyses.filter(analysis => analysis.isSupported).length; return supportedClaims / claimAnalyses.length; } private calculateOverallRisk(issues: HallucinationIssue[], confidenceScore: number): 'low' | 'medium' | 'high' | 'critical' { const criticalIssues = issues.filter(i => i.severity === 'critical').length; const highIssues = issues.filter(i => i.severity === 'high').length; if (criticalIssues > 0 || confidenceScore < 0.3) return 'critical'; if (highIssues > 2 || confidenceScore < 0.5) return 'high'; if (issues.length > 3 || confidenceScore < 0.7) return 'medium'; return 'low'; } private createUnsupportedClaim(analysis: FactCheckResult, response: string): UnsupportedClaim { const index = response.indexOf(analysis.claim); return { claim: analysis.claim, location: { startIndex: index, endIndex: index + analysis.claim.length }, confidence: 1 - analysis.confidence, reason: analysis.contradictingEvidence.length > 0 ? 'contradicts_context' : 'no_context_support', suggestedAction: analysis.contradictingEvidence.length > 0 ? 'remove' : 'qualify' }; } private createContextEvidence(analysis: FactCheckResult): ContextEvidence { return { claim: analysis.claim, supportingContext: analysis.supportingEvidence[0] || '', relevanceScore: analysis.confidence, sourceId: 'context_0', sourceType: 'document' }; } private generateRecommendations( issues: HallucinationIssue[], unsupportedClaims: UnsupportedClaim[], overallRisk: string ): string[] { const recommendations: string[] = []; if (overallRisk === 'critical' || overallRisk === 'high') { recommendations.push('Consider regenerating the response with more specific context'); } if (unsupportedClaims.length > 0) { recommendations.push(`Verify or qualify ${unsupportedClaims.length} unsupported claims`); } const fabricatedDetails = issues.filter(i => i.type === 'fabricated_detail'); if (fabricatedDetails.length > 0) { recommendations.push('Remove or verify specific details not found in source material'); } if (issues.length === 0) { recommendations.push('Response appears well-grounded in provided context'); } return recommendations; } private async calculateSemanticSimilarity(text1: string, text2: string): Promise<number> { // Simplified similarity calculation // In production, this would use embeddings and cosine similarity const words1 = text1.toLowerCase().split(/\s+/); const words2 = text2.toLowerCase().split(/\s+/); const commonWords = words1.filter(word => words2.includes(word) && word.length > 3); return commonWords.length / Math.max(words1.length, words2.length); } private async detectContradiction(claim: string, context: string): Promise<boolean> { // Simplified contradiction detection // Look for negation patterns const negationWords = ['not', 'no', 'never', 'none', 'neither', 'cannot', 'isn\'t', 'aren\'t', 'won\'t']; const claimWords = claim.toLowerCase().split(/\s+/); const contextWords = context.toLowerCase().split(/\s+/); // Check if one has negation and the other doesn't for similar content const claimHasNegation = claimWords.some(word => negationWords.includes(word)); const contextHasNegation = contextWords.some(word => negationWords.includes(word)); return claimHasNegation !== contextHasNegation; } private async logHallucinationAnalysis(analysisData: any): Promise<void> { await this.memoryCollection.storeDocument( JSON.stringify(analysisData), { type: 'hallucination_analysis', framework: analysisData.context.framework, confidenceScore: analysisData.confidenceScore, factualAccuracy: analysisData.factualAccuracy, contextAlignment: analysisData.contextAlignment, issueCount: analysisData.issueCount, overallRisk: analysisData.overallRisk, detectedIssues: analysisData.detectedIssues || [] } ); } private arrayToRecord(array: any[]): Record<string, number> { const record: Record<string, number> = {}; array.forEach(item => { record[item._id] = item.count; }); return record; } }