UNPKG

@hivetechs/hive-ai

Version:

Real-time streaming AI consensus platform with HTTP+SSE MCP integration for Claude Code, VS Code, Cursor, and Windsurf - powered by OpenRouter's unified API

152 lines 4.31 kB
/** * Consensus Effectiveness Analyzer * * Professional-grade system for measuring the effectiveness of 4-stage consensus * vs single model performance. Provides comprehensive A/B testing, quality metrics, * and cost-effectiveness analysis for optimization recommendations. */ export interface ConsensusComparison { id: string; question: string; questionCategory: string; questionComplexity: 'simple' | 'moderate' | 'complex'; timestamp: string; singleModel: { model: string; answer: string; duration: number; tokenCount: number; cost: number; quality?: QualityMetrics; }; consensus: { models: { generator: string; refiner: string; validator: string; curator: string; }; answer: string; duration: number; tokenCount: number; cost: number; stageBreakdown: { generator: StageMetrics; refiner: StageMetrics; validator: StageMetrics; curator: StageMetrics; }; quality?: QualityMetrics; }; analysis: { improvementScore: number; costEffectiveness: number; timeEfficiency: number; qualityDelta: QualityDelta; recommendation: 'consensus' | 'single_model' | 'depends'; reasoning: string; }; } export interface StageMetrics { duration: number; tokenCount: number; cost: number; tokensPerSecond: number; } export interface QualityMetrics { completeness: number; accuracy: number; clarity: number; usefulness: number; codeQuality?: number; overallScore: number; } export interface QualityDelta { completeness: number; accuracy: number; clarity: number; usefulness: number; codeQuality?: number; overallImprovement: number; } export interface EffectivenessReport { totalComparisons: number; consensusWins: number; singleModelWins: number; averageImprovementScore: number; averageCostEffectiveness: number; categoryBreakdown: Record<string, CategoryStats>; complexityBreakdown: Record<string, CategoryStats>; recommendations: string[]; } export interface CategoryStats { comparisons: number; consensusWinRate: number; averageImprovement: number; averageCostRatio: number; timeRatio: number; } export declare class ConsensusEffectivenessAnalyzer { /** * Run A/B comparison: single model vs 4-stage consensus */ runABComparison(question: string, baselineModel: string, options?: { skipConsensus?: boolean; skipSingleModel?: boolean; autoAnalyze?: boolean; }): Promise<ConsensusComparison>; /** * Run single model baseline for comparison */ private runSingleModelBaseline; /** * Run consensus for comparison */ private runConsensusComparison; /** * Analyze quality difference between single model and consensus */ analyzeQualityDifference(comparison: ConsensusComparison): Promise<void>; /** * Assess answer quality using AI */ private assessAnswerQuality; /** * Build quality assessment prompt */ private buildQualityAssessmentPrompt; /** * Parse quality scores from AI response */ private parseQualityScores; /** * Generate recommendation based on metrics */ private generateRecommendation; /** * Generate effectiveness report from stored comparisons */ generateEffectivenessReport(options?: { dateFrom?: string; dateTo?: string; category?: string; complexity?: string; }): Promise<EffectivenessReport>; /** * Utility functions */ private categorizeQuestion; private assessComplexity; private estimateTokenCount; private estimateSingleModelCost; private extractStageBreakdown; private getConsensusModels; private saveComparison; private generateCategoryBreakdown; private generateRecommendations; } /** * Factory function to create consensus effectiveness analyzer */ export declare function createConsensusAnalyzer(): ConsensusEffectivenessAnalyzer; //# sourceMappingURL=consensus-effectiveness-analyzer.d.ts.map