UNPKG

@hivetechs/hive-ai

Version:

Real-time streaming AI consensus platform with HTTP+SSE MCP integration for Claude Code, VS Code, Cursor, and Windsurf - powered by OpenRouter's unified API

315 lines (309 loc) 13.3 kB
/** * Confidence Scoring System for Consensus Pipeline * Implements mathematical confidence calculation based on stage agreement and content quality */ export class ConfidenceCalculator { /** * Calculate confidence for an individual stage */ calculateStageConfidence(stageResult, contextQuality = 0.5, providerReliability = 0.8) { const output = stageResult.output; // Base confidence from content analysis const contentScore = this.analyzeContentQuality(output); // Context utilization bonus (0-0.2) const contextBonus = Math.min(contextQuality * 0.2, 0.2); // Provider reliability factor (0-0.3) const reliabilityFactor = Math.min(providerReliability * 0.3, 0.3); // Length and detail factor (0-0.1) const detailFactor = Math.min((output.length / 500) * 0.1, 0.1); // Technical depth bonus (0-0.1) const technicalBonus = this.calculateTechnicalDepth(output) * 0.1; const totalScore = contentScore + contextBonus + reliabilityFactor + detailFactor + technicalBonus; // Cap at 98% to avoid overconfidence return Math.min(totalScore, 0.98); } /** * Calculate overall consensus confidence from all stages */ calculateConsensusConfidence(stageResults) { if (stageResults.length !== 4) { throw new Error('Consensus requires exactly 4 stage results'); } // Calculate individual stage confidences const stageConfidences = stageResults.map(stage => stage.confidence || this.calculateStageConfidence(stage)); // Calculate stage agreement matrix const agreementMatrix = this.calculateAgreementMatrix(stageResults); // Calculate weighted average with progressive weighting const weightedConfidence = this.calculateWeightedConfidence(stageConfidences); // Calculate stage agreement score const agreementScore = this.calculateOverallAgreement(agreementMatrix); // Calculate provider reliability const providerReliability = this.calculateProviderReliability(stageResults); // Calculate content quality progression const contentQuality = this.calculateContentQualityProgression(stageResults); // Calculate context utilization const contextUtilization = this.calculateContextUtilization(stageResults); // Final confidence calculation with agreement weighting const finalConfidence = Math.min(weightedConfidence * agreementScore * 1.1, // Agreement bonus 0.98 // Maximum confidence cap ); return { content_quality: contentQuality, stage_agreement: agreementScore, provider_reliability: providerReliability, context_utilization: contextUtilization, final_confidence: finalConfidence, agreement_matrix: agreementMatrix }; } /** * Analyze content quality based on various factors */ analyzeContentQuality(output) { let score = 0.5; // Base score // Length factor (optimal range 100-800 words) const wordCount = output.split(/\s+/).length; if (wordCount >= 100 && wordCount <= 800) { score += 0.1; } else if (wordCount < 50) { score -= 0.1; // Penalty for too short } // Structure indicators if (this.hasGoodStructure(output)) score += 0.1; // Technical accuracy indicators if (this.hasTechnicalContent(output)) score += 0.1; // Completeness indicators if (this.isComplete(output)) score += 0.1; // Clarity indicators if (this.hasGoodClarity(output)) score += 0.1; return Math.min(score, 0.9); } /** * Calculate agreement matrix between all stages */ calculateAgreementMatrix(stageResults) { const matrix = []; for (let i = 0; i < stageResults.length; i++) { matrix[i] = []; for (let j = 0; j < stageResults.length; j++) { if (i === j) { matrix[i][j] = 1.0; // Perfect agreement with self } else { matrix[i][j] = this.calculateSemanticSimilarity(stageResults[i].output, stageResults[j].output); } } } return matrix; } /** * Calculate semantic similarity between two outputs */ calculateSemanticSimilarity(output1, output2) { // Simple keyword overlap approach (can be enhanced with embeddings) const words1 = this.extractKeywords(output1.toLowerCase()); const words2 = this.extractKeywords(output2.toLowerCase()); const intersection = words1.filter(word => words2.includes(word)); const union = [...new Set([...words1, ...words2])]; if (union.length === 0) return 0; // Jaccard similarity with length normalization const jaccardSimilarity = intersection.length / union.length; // Length similarity factor const length1 = output1.length; const length2 = output2.length; const lengthSimilarity = 1 - Math.abs(length1 - length2) / Math.max(length1, length2); // Combined similarity score return (jaccardSimilarity * 0.7) + (lengthSimilarity * 0.3); } /** * Calculate weighted confidence with progressive stage weighting */ calculateWeightedConfidence(confidences) { if (confidences.length !== 4) return 0; // Progressive weighting: later stages have more influence const weights = [0.15, 0.25, 0.30, 0.30]; // Generator, Refiner, Validator, Curator let weightedSum = 0; let totalWeight = 0; for (let i = 0; i < confidences.length; i++) { weightedSum += confidences[i] * weights[i]; totalWeight += weights[i]; } return weightedSum / totalWeight; } /** * Calculate overall agreement score from agreement matrix */ calculateOverallAgreement(matrix) { let totalAgreement = 0; let comparisons = 0; // Calculate average agreement excluding self-agreements for (let i = 0; i < matrix.length; i++) { for (let j = i + 1; j < matrix[i].length; j++) { totalAgreement += matrix[i][j]; comparisons++; } } if (comparisons === 0) return 0.5; const averageAgreement = totalAgreement / comparisons; // Agreement bonus: high agreement increases confidence if (averageAgreement > 0.8) return 1.1; // 10% bonus if (averageAgreement > 0.6) return 1.05; // 5% bonus if (averageAgreement < 0.3) return 0.9; // 10% penalty return 1.0; // Neutral } /** * Calculate provider reliability score */ calculateProviderReliability(stageResults) { // Provider reliability mapping (can be enhanced with real performance data) const providerScores = { 'openai': 0.9, 'anthropic': 0.9, 'google': 0.85, 'mistral': 0.8, 'groq': 0.8, 'default': 0.7 }; let totalScore = 0; for (const stage of stageResults) { const provider = stage.provider.toLowerCase(); totalScore += providerScores[provider] || providerScores['default']; } return totalScore / stageResults.length; } /** * Calculate content quality progression through stages */ calculateContentQualityProgression(stageResults) { let progressionScore = 0.5; // Check if content generally improves through stages const qualityScores = stageResults.map(stage => this.analyzeContentQuality(stage.output)); // Progressive improvement bonus let improvements = 0; for (let i = 1; i < qualityScores.length; i++) { if (qualityScores[i] > qualityScores[i - 1]) improvements++; } progressionScore += (improvements / (qualityScores.length - 1)) * 0.3; return Math.min(progressionScore, 1.0); } /** * Calculate context utilization score */ calculateContextUtilization(stageResults) { // This would be enhanced with actual context tracking // For now, estimate based on output length and technical depth let utilizationScore = 0.5; for (const stage of stageResults) { const technicalDepth = this.calculateTechnicalDepth(stage.output); utilizationScore += technicalDepth * 0.1; } return Math.min(utilizationScore / stageResults.length, 1.0); } /** * Helper methods for content analysis */ hasGoodStructure(output) { // Check for headers, bullet points, paragraphs return /(?:\n\n|\n#{1,6}\s|\n\*\s|\n\d+\.)/.test(output); } hasTechnicalContent(output) { // Check for technical terms, code, specific terminology const technicalIndicators = /(?:algorithm|function|class|method|API|database|framework|library|implementation)/i; return technicalIndicators.test(output); } isComplete(output) { // Check if response seems complete (ends with period, has conclusion) return output.trim().endsWith('.') || output.trim().endsWith('!') || /(?:conclusion|summary|finally|in summary)/i.test(output); } hasGoodClarity(output) { // Check for clear language, not too many filler words const fillerWords = /(?:um|uh|like|you know|actually|basically)/gi; const fillerCount = (output.match(fillerWords) || []).length; const wordCount = output.split(/\s+/).length; return (fillerCount / wordCount) < 0.02; // Less than 2% filler words } calculateTechnicalDepth(output) { // Analyze technical depth based on terminology and concepts const technicalTerms = [ 'algorithm', 'implementation', 'architecture', 'framework', 'methodology', 'optimization', 'scalability', 'performance', 'efficiency', 'complexity' ]; let technicalScore = 0; const words = output.toLowerCase().split(/\s+/); for (const term of technicalTerms) { if (words.includes(term)) technicalScore += 0.1; } return Math.min(technicalScore, 1.0); } extractKeywords(text) { // Simple keyword extraction (can be enhanced) const stopWords = new Set([ 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should' ]); return text .toLowerCase() .replace(/[^\w\s]/g, ' ') .split(/\s+/) .filter(word => word.length > 2 && !stopWords.has(word)) .slice(0, 20); // Top 20 keywords } /** * Generate confidence report for debugging and transparency */ generateConfidenceReport(metrics, stageResults) { const report = ` 🎯 CONSENSUS CONFIDENCE REPORT ═══════════════════════════════════════ 📊 OVERALL CONFIDENCE: ${(metrics.final_confidence * 100).toFixed(1)}% 📈 COMPONENT SCORES: • Content Quality: ${(metrics.content_quality * 100).toFixed(1)}% • Stage Agreement: ${(metrics.stage_agreement * 100).toFixed(1)}% • Provider Reliability: ${(metrics.provider_reliability * 100).toFixed(1)}% • Context Utilization: ${(metrics.context_utilization * 100).toFixed(1)}% 🔄 STAGE ANALYSIS: ${stageResults.map((stage, i) => `• ${stage.stage_name}: ${((stage.confidence || 0.5) * 100).toFixed(1)}% (${stage.provider}:${stage.model})`).join('\n')} 🤝 AGREEMENT MATRIX: ${this.formatAgreementMatrix(metrics.agreement_matrix)} 💡 CONFIDENCE FACTORS: • High agreement between stages boosts confidence • Technical depth and structure improve quality scores • Provider diversity enhances reliability • Progressive improvement through stages is positive ${metrics.final_confidence > 0.8 ? '✅ HIGH CONFIDENCE - Result is highly reliable' : metrics.final_confidence > 0.6 ? '⚠️ MODERATE CONFIDENCE - Result is reasonably reliable' : '❌ LOW CONFIDENCE - Result should be verified'} `; return report; } formatAgreementMatrix(matrix) { const stages = ['Gen', 'Ref', 'Val', 'Cur']; let formatted = ' ' + stages.join(' ') + '\n'; for (let i = 0; i < matrix.length; i++) { formatted += stages[i] + ' '; for (let j = 0; j < matrix[i].length; j++) { formatted += (matrix[i][j] * 100).toFixed(0).padStart(3) + ' '; } formatted += '\n'; } return formatted; } } // Singleton instance export const confidenceCalculator = new ConfidenceCalculator(); //# sourceMappingURL=confidence-calculator.js.map