UNPKG

@hivetechs/hive-ai

Version:

Real-time streaming AI consensus platform with HTTP+SSE MCP integration for Claude Code, VS Code, Cursor, and Windsurf - powered by OpenRouter's unified API

425 lines (424 loc) 19 kB
/** * Cost Intelligence Engine * * Advanced cost optimization and budget management system that makes * consensus pipelines cost-aware and budget-friendly without sacrificing quality. * * Features: * - Real-time cost estimation and tracking * - Budget-aware model selection * - Cost optimization strategies * - ROI analysis for consensus vs single models * - Dynamic routing to cost-effective variants * - Smart budget allocation across stages */ import { getDatabase } from '../storage/unified-database.js'; // ===== COST INTELLIGENCE CLASS ===== export class CostIntelligence { /** * Estimate cost for a complete consensus pipeline */ async estimateConsensusCost(question, modelLineup, options = {}) { console.log('💰 Estimating consensus pipeline cost...'); // Estimate token usage based on question complexity const tokenEstimate = this.estimateTokenUsage(question); let totalCost = 0; let totalInputTokens = 0; let totalOutputTokens = 0; let totalInputCost = 0; let totalOutputCost = 0; const optimizations = []; // Calculate cost for each stage for (const [stage, model] of Object.entries(modelLineup)) { const stageTokens = this.getStageTokenMultiplier(stage, tokenEstimate); const stageCost = this.calculateModelCost(model, stageTokens); totalCost += stageCost.total; totalInputTokens += stageTokens.input; totalOutputTokens += stageTokens.output; totalInputCost += stageCost.inputCost; totalOutputCost += stageCost.outputCost; // Check for optimization opportunities if (options.routingOptimizations) { const stageOptimizations = await this.findCostOptimizations(model, stageTokens, stage); optimizations.push(...stageOptimizations); } } // Add fallback buffer if requested if (options.includeFallbackBuffer) { totalCost *= 1.15; // 15% buffer for potential fallbacks } const estimate = { estimatedCost: totalCost, confidence: 0.85, // High confidence in cost estimates breakdown: { inputTokens: totalInputTokens, outputTokens: totalOutputTokens, inputCost: totalInputCost, outputCost: totalOutputCost }, routingOptimizations: optimizations.length > 0 ? optimizations : undefined }; console.log(`💰 Estimated total cost: $${totalCost.toFixed(4)} (${totalInputTokens + totalOutputTokens} tokens)`); return estimate; } /** * Create optimal budget allocation for a given total budget */ async createBudgetAllocation(totalBudget, questionComplexity, strategy = 'balanced') { console.log(`💰 Creating budget allocation: $${totalBudget} (${strategy} strategy)`); // Reserve 10% for fallbacks and overruns const reserveFund = totalBudget * 0.1; const allocatableBudget = totalBudget - reserveFund; let stageAllocations; switch (strategy) { case 'front_loaded': // Put more budget into generator for strong foundation stageAllocations = { generator: allocatableBudget * 0.4, refiner: allocatableBudget * 0.25, validator: allocatableBudget * 0.15, curator: allocatableBudget * 0.2 }; break; case 'quality_focused': // Put more budget into curator for high-quality final output stageAllocations = { generator: allocatableBudget * 0.3, refiner: allocatableBudget * 0.25, validator: allocatableBudget * 0.15, curator: allocatableBudget * 0.3 }; break; case 'cost_optimized': // Even distribution but use cost-effective models stageAllocations = { generator: allocatableBudget * 0.28, refiner: allocatableBudget * 0.24, validator: allocatableBudget * 0.24, curator: allocatableBudget * 0.24 }; break; default: // balanced stageAllocations = { generator: allocatableBudget * 0.28, refiner: allocatableBudget * 0.24, validator: allocatableBudget * 0.24, curator: allocatableBudget * 0.24 }; } // Adjust for question complexity if (questionComplexity === 'production') { // Production questions need more budget for curator stageAllocations.curator += stageAllocations.validator * 0.2; stageAllocations.validator *= 0.8; } else if (questionComplexity === 'minimal') { // Minimal questions can use cheaper models across the board Object.keys(stageAllocations).forEach(stage => { stageAllocations[stage] *= 0.8; }); // Put saved budget back into reserve const savedBudget = allocatableBudget * 0.2; Object.keys(stageAllocations).forEach(stage => { stageAllocations[stage] += savedBudget / 4; }); } return { totalBudget, stageAllocations, reserveFund, allocationStrategy: strategy }; } /** * Find cost optimization opportunities for a model/stage */ async findCostOptimizations(model, tokenUsage, stage) { const optimizations = []; // Strategy 1: Use :floor routing variant for cost savings if (!model.openrouterId.includes(':floor')) { const floorSavings = model.estimatedCost * 0.3; // Estimated 30% savings optimizations.push({ strategy: 'floor_routing', potentialSavings: floorSavings, qualityImpact: -0.1, // Slight quality reduction description: `Use ${model.openrouterId}:floor for ${stage} stage (30% cost reduction)`, routingVariant: ':floor' }); } // Strategy 2: Alternative model selection const alternativeModel = await this.findCostEffectiveAlternative(model, stage); if (alternativeModel) { const savings = model.estimatedCost - alternativeModel.estimatedCost; if (savings > 0) { optimizations.push({ strategy: 'model_substitution', potentialSavings: savings, qualityImpact: alternativeModel.suitabilityScore - model.suitabilityScore, description: `Use ${alternativeModel.openrouterId} instead of ${model.openrouterId} for ${stage} stage` }); } } // Strategy 3: Token optimization for non-critical stages if (stage === 'validator' && tokenUsage.output > 500) { const tokenSavings = (tokenUsage.output - 300) * (model.estimatedCost / (tokenUsage.input + tokenUsage.output)); optimizations.push({ strategy: 'token_optimization', potentialSavings: tokenSavings, qualityImpact: 0, // No quality impact for validation description: `Reduce validator output tokens from ${tokenUsage.output} to 300 (validation tasks need shorter responses)` }); } return optimizations; } /** * Find cost-effective alternative model for a stage */ async findCostEffectiveAlternative(currentModel, stage) { try { const db = await getDatabase(); // Find models that are: // 1. Cheaper than current model // 2. Still have decent ranking (within 15 positions) // 3. Same provider family (for consistency) const query = ` SELECT DISTINCT om.internal_id, om.openrouter_id, om.provider_name, om.pricing_input, om.pricing_output, COALESCE(mr.rank_position, 999) as rank_position, COALESCE(mr.relative_score, 0.1) as relative_score FROM openrouter_models om LEFT JOIN model_rankings mr ON om.internal_id = mr.model_internal_id AND mr.ranking_source = 'openrouter_programming_weekly' WHERE om.is_active = 1 AND om.provider_name = ? AND om.pricing_input < ? AND om.pricing_output < ? AND om.openrouter_id != ? AND COALESCE(mr.rank_position, 999) <= ? ORDER BY (om.pricing_input + om.pricing_output) ASC, COALESCE(mr.rank_position, 999) ASC LIMIT 3 `; const alternatives = await db.all(query, [ currentModel.provider, currentModel.estimatedCost * 0.8, // Must be at least 20% cheaper currentModel.estimatedCost * 0.8, currentModel.openrouterId, (currentModel.rankPosition || 50) + 15 // Within 15 positions ]); if (alternatives.length > 0) { const alt = alternatives[0]; return { internalId: alt.internal_id, openrouterId: alt.openrouter_id, provider: alt.provider_name, rankPosition: alt.rank_position !== 999 ? alt.rank_position : undefined, relativeScore: alt.relative_score, estimatedCost: (alt.pricing_input * 500 + alt.pricing_output * 1000) / 1000000, estimatedLatency: currentModel.estimatedLatency * 1.1, // Assume slightly slower successRate: currentModel.successRate * 0.95, // Assume slightly less reliable features: currentModel.features, contextWindow: currentModel.contextWindow, suitabilityScore: currentModel.suitabilityScore * 0.9 // Assume slightly lower suitability }; } } catch (error) { console.warn('Failed to find cost-effective alternative:', error); } return null; } /** * Calculate efficiency metrics for consensus vs single model */ async calculateEfficiencyMetrics(consensusCost, consensusQuality, singleModelCost, singleModelQuality, consensusTime, singleModelTime) { const consensusCostPerToken = consensusCost / 2000; // Assume ~2000 tokens average const singleModelCostPerToken = singleModelCost / 500; // Assume ~500 tokens average const consensusQualityPerDollar = consensusQuality / consensusCost; const singleModelQualityPerDollar = singleModelQuality / singleModelCost; // Calculate overall value score (quality improvement vs cost increase) const qualityImprovement = consensusQuality - singleModelQuality; const costIncrease = consensusCost - singleModelCost; const totalValueScore = qualityImprovement / (costIncrease + 0.001); // Avoid division by zero return { costPerToken: consensusCostPerToken, qualityPerDollar: consensusQualityPerDollar, timeToValue: consensusTime, totalValueScore }; } /** * Estimate token usage based on question analysis */ estimateTokenUsage(question) { const questionLength = question.length; const questionWords = question.split(/\s+/).length; // Base estimates let inputTokens = questionWords * 1.3; // ~1.3 tokens per word average let outputTokens = 800; // Default output length // Adjust based on question characteristics if (question.toLowerCase().includes('implement') || question.toLowerCase().includes('build')) { outputTokens = 1200; // Implementation questions need more output } if (question.toLowerCase().includes('simple') || question.toLowerCase().includes('basic')) { outputTokens = 500; // Simple questions need less output } if (question.toLowerCase().includes('production') || question.toLowerCase().includes('enterprise')) { outputTokens = 1500; // Production questions need comprehensive output } if (questionLength > 500) { inputTokens *= 1.5; // Complex questions have more context outputTokens *= 1.3; } return { input: Math.round(inputTokens), output: Math.round(outputTokens) }; } /** * Get token multipliers for different stages */ getStageTokenMultiplier(stage, baseTokens) { const multipliers = { generator: { input: 1.0, output: 1.0 }, // Generator uses base amounts refiner: { input: 1.5, output: 1.2 }, // Refiner sees original + generator output validator: { input: 1.8, output: 0.6 }, // Validator sees more context but outputs less curator: { input: 2.0, output: 1.1 } // Curator sees all previous work }; const mult = multipliers[stage]; return { input: Math.round(baseTokens.input * mult.input), output: Math.round(baseTokens.output * mult.output) }; } /** * Calculate cost for a specific model and token usage */ calculateModelCost(model, tokens) { // Extract pricing from model (prices are per 1M tokens) const inputCostPer1M = model.estimatedCost / 1500 * 1000000; // Rough estimate, will be refined const outputCostPer1M = inputCostPer1M * 2; // Output typically 2x input cost const inputCost = (tokens.input * inputCostPer1M) / 1000000; const outputCost = (tokens.output * outputCostPer1M) / 1000000; return { total: inputCost + outputCost, inputCost, outputCost }; } /** * Record cost analytics for learning and optimization */ async recordCostAnalytics(conversationId, costBreakdown, optimizationsApplied, actualCost) { try { const db = await getDatabase(); const costEfficiencyScore = this.calculateCostEfficiencyScore(costBreakdown, actualCost); await db.run(` INSERT INTO cost_analytics (conversation_id, total_cost, cost_per_stage, tokens_per_stage, routing_optimizations, cost_efficiency_score, savings_achieved, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?) `, [ conversationId, actualCost, JSON.stringify(costBreakdown.stageBreakdown || {}), JSON.stringify(costBreakdown.tokenBreakdown || {}), JSON.stringify(optimizationsApplied), costEfficiencyScore, optimizationsApplied.reduce((sum, opt) => sum + opt.potentialSavings, 0), new Date().toISOString() ]); } catch (error) { console.warn('Failed to record cost analytics:', error); } } /** * Update cost analytics with actual costs after consensus completion */ async updateActualCosts(conversationId, actualCostData) { try { console.log(`💰 [UPDATEACTUALCOSTS] Received totalCost: ${actualCostData.totalCost} (type: ${typeof actualCostData.totalCost})`); console.log(`💰 [UPDATEACTUALCOSTS] Received stageBreakdown:`, actualCostData.stageBreakdown); const db = await getDatabase(); const costEfficiencyScore = actualCostData.qualityScore ? actualCostData.qualityScore / Math.max(0.001, actualCostData.totalCost * 100) // Quality per cost : this.calculateCostEfficiencyScore(actualCostData, actualCostData.totalCost); // Update existing cost analytics record with actual data await db.run(` UPDATE cost_analytics SET total_cost = ?, cost_per_stage = ?, tokens_per_stage = ?, cost_efficiency_score = ? WHERE conversation_id = ? `, [ actualCostData.totalCost, JSON.stringify(actualCostData.stageBreakdown), JSON.stringify(actualCostData.tokenBreakdown), costEfficiencyScore, conversationId ]); // Also store/update in conversations table for budget reporting await db.run(` UPDATE conversations SET total_cost = ?, updated_at = ? WHERE id = ? `, [ actualCostData.totalCost, new Date().toISOString(), conversationId ]); console.log(`💰 Updated actual costs for conversation ${conversationId.substring(0, 8)}: $${actualCostData.totalCost.toFixed(6)}`); } catch (error) { console.warn('Failed to update actual cost analytics:', error); } } /** * Calculate cost efficiency score (0-1) */ calculateCostEfficiencyScore(costBreakdown, actualCost) { // Simple efficiency calculation // Better scores for lower costs with maintained quality const targetCost = 0.01; // $0.01 target for good efficiency if (actualCost <= targetCost) return 1.0; if (actualCost >= targetCost * 5) return 0.1; return Math.max(0.1, 1.0 - ((actualCost - targetCost) / (targetCost * 4))); } } // ===== CONVENIENCE FUNCTIONS ===== /** * Quick cost estimate for a question */ export async function estimateQuestionCost(question, complexity = 'basic') { const costIntel = new CostIntelligence(); // Use estimated average model costs for quick estimation const avgModelCosts = { minimal: { generator: 0.001, refiner: 0.0008, validator: 0.0005, curator: 0.001 }, basic: { generator: 0.002, refiner: 0.0015, validator: 0.0008, curator: 0.002 }, production: { generator: 0.004, refiner: 0.003, validator: 0.0015, curator: 0.004 } }; const costs = avgModelCosts[complexity]; return Object.values(costs).reduce((sum, cost) => sum + cost, 0); } /** * Get budget recommendation for a question type */ export async function getBudgetRecommendation(questionComplexity, qualityLevel = 'standard') { const baseBudgets = { minimal: { standard: 0.003, high: 0.005, premium: 0.008 }, basic: { standard: 0.008, high: 0.012, premium: 0.020 }, production: { standard: 0.015, high: 0.025, premium: 0.040 } }; return baseBudgets[questionComplexity][qualityLevel]; } export default CostIntelligence;