@hivetechs/hive-ai
Version:
Real-time streaming AI consensus platform with HTTP+SSE MCP integration for Claude Code, VS Code, Cursor, and Windsurf - powered by OpenRouter's unified API
152 lines • 4.31 kB
TypeScript
/**
* Consensus Effectiveness Analyzer
*
* Professional-grade system for measuring the effectiveness of 4-stage consensus
* vs single model performance. Provides comprehensive A/B testing, quality metrics,
* and cost-effectiveness analysis for optimization recommendations.
*/
export interface ConsensusComparison {
id: string;
question: string;
questionCategory: string;
questionComplexity: 'simple' | 'moderate' | 'complex';
timestamp: string;
singleModel: {
model: string;
answer: string;
duration: number;
tokenCount: number;
cost: number;
quality?: QualityMetrics;
};
consensus: {
models: {
generator: string;
refiner: string;
validator: string;
curator: string;
};
answer: string;
duration: number;
tokenCount: number;
cost: number;
stageBreakdown: {
generator: StageMetrics;
refiner: StageMetrics;
validator: StageMetrics;
curator: StageMetrics;
};
quality?: QualityMetrics;
};
analysis: {
improvementScore: number;
costEffectiveness: number;
timeEfficiency: number;
qualityDelta: QualityDelta;
recommendation: 'consensus' | 'single_model' | 'depends';
reasoning: string;
};
}
export interface StageMetrics {
duration: number;
tokenCount: number;
cost: number;
tokensPerSecond: number;
}
export interface QualityMetrics {
completeness: number;
accuracy: number;
clarity: number;
usefulness: number;
codeQuality?: number;
overallScore: number;
}
export interface QualityDelta {
completeness: number;
accuracy: number;
clarity: number;
usefulness: number;
codeQuality?: number;
overallImprovement: number;
}
export interface EffectivenessReport {
totalComparisons: number;
consensusWins: number;
singleModelWins: number;
averageImprovementScore: number;
averageCostEffectiveness: number;
categoryBreakdown: Record<string, CategoryStats>;
complexityBreakdown: Record<string, CategoryStats>;
recommendations: string[];
}
export interface CategoryStats {
comparisons: number;
consensusWinRate: number;
averageImprovement: number;
averageCostRatio: number;
timeRatio: number;
}
export declare class ConsensusEffectivenessAnalyzer {
/**
* Run A/B comparison: single model vs 4-stage consensus
*/
runABComparison(question: string, baselineModel: string, options?: {
skipConsensus?: boolean;
skipSingleModel?: boolean;
autoAnalyze?: boolean;
}): Promise<ConsensusComparison>;
/**
* Run single model baseline for comparison
*/
private runSingleModelBaseline;
/**
* Run consensus for comparison
*/
private runConsensusComparison;
/**
* Analyze quality difference between single model and consensus
*/
analyzeQualityDifference(comparison: ConsensusComparison): Promise<void>;
/**
* Assess answer quality using AI
*/
private assessAnswerQuality;
/**
* Build quality assessment prompt
*/
private buildQualityAssessmentPrompt;
/**
* Parse quality scores from AI response
*/
private parseQualityScores;
/**
* Generate recommendation based on metrics
*/
private generateRecommendation;
/**
* Generate effectiveness report from stored comparisons
*/
generateEffectivenessReport(options?: {
dateFrom?: string;
dateTo?: string;
category?: string;
complexity?: string;
}): Promise<EffectivenessReport>;
/**
* Utility functions
*/
private categorizeQuestion;
private assessComplexity;
private estimateTokenCount;
private estimateSingleModelCost;
private extractStageBreakdown;
private getConsensusModels;
private saveComparison;
private generateCategoryBreakdown;
private generateRecommendations;
}
/**
* Factory function to create consensus effectiveness analyzer
*/
export declare function createConsensusAnalyzer(): ConsensusEffectivenessAnalyzer;
//# sourceMappingURL=consensus-effectiveness-analyzer.d.ts.map