@hivetechs/hive-ai
Version:
Real-time streaming AI consensus platform with HTTP+SSE MCP integration for Claude Code, VS Code, Cursor, and Windsurf - powered by OpenRouter's unified API
425 lines • 19 kB
JavaScript
/**
* Cost Intelligence Engine
*
* Advanced cost optimization and budget management system that makes
* consensus pipelines cost-aware and budget-friendly without sacrificing quality.
*
* Features:
* - Real-time cost estimation and tracking
* - Budget-aware model selection
* - Cost optimization strategies
* - ROI analysis for consensus vs single models
* - Dynamic routing to cost-effective variants
* - Smart budget allocation across stages
*/
import { getDatabase } from '../storage/unified-database.js';
// ===== COST INTELLIGENCE CLASS =====
export class CostIntelligence {
/**
* Estimate cost for a complete consensus pipeline
*/
async estimateConsensusCost(question, modelLineup, options = {}) {
console.log('💰 Estimating consensus pipeline cost...');
// Estimate token usage based on question complexity
const tokenEstimate = this.estimateTokenUsage(question);
let totalCost = 0;
let totalInputTokens = 0;
let totalOutputTokens = 0;
let totalInputCost = 0;
let totalOutputCost = 0;
const optimizations = [];
// Calculate cost for each stage
for (const [stage, model] of Object.entries(modelLineup)) {
const stageTokens = this.getStageTokenMultiplier(stage, tokenEstimate);
const stageCost = this.calculateModelCost(model, stageTokens);
totalCost += stageCost.total;
totalInputTokens += stageTokens.input;
totalOutputTokens += stageTokens.output;
totalInputCost += stageCost.inputCost;
totalOutputCost += stageCost.outputCost;
// Check for optimization opportunities
if (options.routingOptimizations) {
const stageOptimizations = await this.findCostOptimizations(model, stageTokens, stage);
optimizations.push(...stageOptimizations);
}
}
// Add fallback buffer if requested
if (options.includeFallbackBuffer) {
totalCost *= 1.15; // 15% buffer for potential fallbacks
}
const estimate = {
estimatedCost: totalCost,
confidence: 0.85, // High confidence in cost estimates
breakdown: {
inputTokens: totalInputTokens,
outputTokens: totalOutputTokens,
inputCost: totalInputCost,
outputCost: totalOutputCost
},
routingOptimizations: optimizations.length > 0 ? optimizations : undefined
};
console.log(`💰 Estimated total cost: $${totalCost.toFixed(4)} (${totalInputTokens + totalOutputTokens} tokens)`);
return estimate;
}
/**
* Create optimal budget allocation for a given total budget
*/
async createBudgetAllocation(totalBudget, questionComplexity, strategy = 'balanced') {
console.log(`💰 Creating budget allocation: $${totalBudget} (${strategy} strategy)`);
// Reserve 10% for fallbacks and overruns
const reserveFund = totalBudget * 0.1;
const allocatableBudget = totalBudget - reserveFund;
let stageAllocations;
switch (strategy) {
case 'front_loaded':
// Put more budget into generator for strong foundation
stageAllocations = {
generator: allocatableBudget * 0.4,
refiner: allocatableBudget * 0.25,
validator: allocatableBudget * 0.15,
curator: allocatableBudget * 0.2
};
break;
case 'quality_focused':
// Put more budget into curator for high-quality final output
stageAllocations = {
generator: allocatableBudget * 0.3,
refiner: allocatableBudget * 0.25,
validator: allocatableBudget * 0.15,
curator: allocatableBudget * 0.3
};
break;
case 'cost_optimized':
// Even distribution but use cost-effective models
stageAllocations = {
generator: allocatableBudget * 0.28,
refiner: allocatableBudget * 0.24,
validator: allocatableBudget * 0.24,
curator: allocatableBudget * 0.24
};
break;
default: // balanced
stageAllocations = {
generator: allocatableBudget * 0.28,
refiner: allocatableBudget * 0.24,
validator: allocatableBudget * 0.24,
curator: allocatableBudget * 0.24
};
}
// Adjust for question complexity
if (questionComplexity === 'production') {
// Production questions need more budget for curator
stageAllocations.curator += stageAllocations.validator * 0.2;
stageAllocations.validator *= 0.8;
}
else if (questionComplexity === 'minimal') {
// Minimal questions can use cheaper models across the board
Object.keys(stageAllocations).forEach(stage => {
stageAllocations[stage] *= 0.8;
});
// Put saved budget back into reserve
const savedBudget = allocatableBudget * 0.2;
Object.keys(stageAllocations).forEach(stage => {
stageAllocations[stage] += savedBudget / 4;
});
}
return {
totalBudget,
stageAllocations,
reserveFund,
allocationStrategy: strategy
};
}
/**
* Find cost optimization opportunities for a model/stage
*/
async findCostOptimizations(model, tokenUsage, stage) {
const optimizations = [];
// Strategy 1: Use :floor routing variant for cost savings
if (!model.openrouterId.includes(':floor')) {
const floorSavings = model.estimatedCost * 0.3; // Estimated 30% savings
optimizations.push({
strategy: 'floor_routing',
potentialSavings: floorSavings,
qualityImpact: -0.1, // Slight quality reduction
description: `Use ${model.openrouterId}:floor for ${stage} stage (30% cost reduction)`,
routingVariant: ':floor'
});
}
// Strategy 2: Alternative model selection
const alternativeModel = await this.findCostEffectiveAlternative(model, stage);
if (alternativeModel) {
const savings = model.estimatedCost - alternativeModel.estimatedCost;
if (savings > 0) {
optimizations.push({
strategy: 'model_substitution',
potentialSavings: savings,
qualityImpact: alternativeModel.suitabilityScore - model.suitabilityScore,
description: `Use ${alternativeModel.openrouterId} instead of ${model.openrouterId} for ${stage} stage`
});
}
}
// Strategy 3: Token optimization for non-critical stages
if (stage === 'validator' && tokenUsage.output > 500) {
const tokenSavings = (tokenUsage.output - 300) * (model.estimatedCost / (tokenUsage.input + tokenUsage.output));
optimizations.push({
strategy: 'token_optimization',
potentialSavings: tokenSavings,
qualityImpact: 0, // No quality impact for validation
description: `Reduce validator output tokens from ${tokenUsage.output} to 300 (validation tasks need shorter responses)`
});
}
return optimizations;
}
/**
* Find cost-effective alternative model for a stage
*/
async findCostEffectiveAlternative(currentModel, stage) {
try {
const db = await getDatabase();
// Find models that are:
// 1. Cheaper than current model
// 2. Still have decent ranking (within 15 positions)
// 3. Same provider family (for consistency)
const query = `
SELECT DISTINCT
om.internal_id,
om.openrouter_id,
om.provider_name,
om.pricing_input,
om.pricing_output,
COALESCE(mr.rank_position, 999) as rank_position,
COALESCE(mr.relative_score, 0.1) as relative_score
FROM openrouter_models om
LEFT JOIN model_rankings mr ON om.internal_id = mr.model_internal_id
AND mr.ranking_source = 'openrouter_programming_weekly'
WHERE om.is_active = 1
AND om.provider_name = ?
AND om.pricing_input < ?
AND om.pricing_output < ?
AND om.openrouter_id != ?
AND COALESCE(mr.rank_position, 999) <= ?
ORDER BY
(om.pricing_input + om.pricing_output) ASC,
COALESCE(mr.rank_position, 999) ASC
LIMIT 3
`;
const alternatives = await db.all(query, [
currentModel.provider,
currentModel.estimatedCost * 0.8, // Must be at least 20% cheaper
currentModel.estimatedCost * 0.8,
currentModel.openrouterId,
(currentModel.rankPosition || 50) + 15 // Within 15 positions
]);
if (alternatives.length > 0) {
const alt = alternatives[0];
return {
internalId: alt.internal_id,
openrouterId: alt.openrouter_id,
provider: alt.provider_name,
rankPosition: alt.rank_position !== 999 ? alt.rank_position : undefined,
relativeScore: alt.relative_score,
estimatedCost: (alt.pricing_input * 500 + alt.pricing_output * 1000) / 1000000,
estimatedLatency: currentModel.estimatedLatency * 1.1, // Assume slightly slower
successRate: currentModel.successRate * 0.95, // Assume slightly less reliable
features: currentModel.features,
contextWindow: currentModel.contextWindow,
suitabilityScore: currentModel.suitabilityScore * 0.9 // Assume slightly lower suitability
};
}
}
catch (error) {
console.warn('Failed to find cost-effective alternative:', error);
}
return null;
}
/**
* Calculate efficiency metrics for consensus vs single model
*/
async calculateEfficiencyMetrics(consensusCost, consensusQuality, singleModelCost, singleModelQuality, consensusTime, singleModelTime) {
const consensusCostPerToken = consensusCost / 2000; // Assume ~2000 tokens average
const singleModelCostPerToken = singleModelCost / 500; // Assume ~500 tokens average
const consensusQualityPerDollar = consensusQuality / consensusCost;
const singleModelQualityPerDollar = singleModelQuality / singleModelCost;
// Calculate overall value score (quality improvement vs cost increase)
const qualityImprovement = consensusQuality - singleModelQuality;
const costIncrease = consensusCost - singleModelCost;
const totalValueScore = qualityImprovement / (costIncrease + 0.001); // Avoid division by zero
return {
costPerToken: consensusCostPerToken,
qualityPerDollar: consensusQualityPerDollar,
timeToValue: consensusTime,
totalValueScore
};
}
/**
* Estimate token usage based on question analysis
*/
estimateTokenUsage(question) {
const questionLength = question.length;
const questionWords = question.split(/\s+/).length;
// Base estimates
let inputTokens = questionWords * 1.3; // ~1.3 tokens per word average
let outputTokens = 800; // Default output length
// Adjust based on question characteristics
if (question.toLowerCase().includes('implement') || question.toLowerCase().includes('build')) {
outputTokens = 1200; // Implementation questions need more output
}
if (question.toLowerCase().includes('simple') || question.toLowerCase().includes('basic')) {
outputTokens = 500; // Simple questions need less output
}
if (question.toLowerCase().includes('production') || question.toLowerCase().includes('enterprise')) {
outputTokens = 1500; // Production questions need comprehensive output
}
if (questionLength > 500) {
inputTokens *= 1.5; // Complex questions have more context
outputTokens *= 1.3;
}
return {
input: Math.round(inputTokens),
output: Math.round(outputTokens)
};
}
/**
* Get token multipliers for different stages
*/
getStageTokenMultiplier(stage, baseTokens) {
const multipliers = {
generator: { input: 1.0, output: 1.0 }, // Generator uses base amounts
refiner: { input: 1.5, output: 1.2 }, // Refiner sees original + generator output
validator: { input: 1.8, output: 0.6 }, // Validator sees more context but outputs less
curator: { input: 2.0, output: 1.1 } // Curator sees all previous work
};
const mult = multipliers[stage];
return {
input: Math.round(baseTokens.input * mult.input),
output: Math.round(baseTokens.output * mult.output)
};
}
/**
* Calculate cost for a specific model and token usage
*/
calculateModelCost(model, tokens) {
// Extract pricing from model (prices are per 1M tokens)
const inputCostPer1M = model.estimatedCost / 1500 * 1000000; // Rough estimate, will be refined
const outputCostPer1M = inputCostPer1M * 2; // Output typically 2x input cost
const inputCost = (tokens.input * inputCostPer1M) / 1000000;
const outputCost = (tokens.output * outputCostPer1M) / 1000000;
return {
total: inputCost + outputCost,
inputCost,
outputCost
};
}
/**
* Record cost analytics for learning and optimization
*/
async recordCostAnalytics(conversationId, costBreakdown, optimizationsApplied, actualCost) {
try {
const db = await getDatabase();
const costEfficiencyScore = this.calculateCostEfficiencyScore(costBreakdown, actualCost);
await db.run(`
INSERT INTO cost_analytics
(conversation_id, total_cost, cost_per_stage, tokens_per_stage,
routing_optimizations, cost_efficiency_score, savings_achieved, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
`, [
conversationId,
actualCost,
JSON.stringify(costBreakdown.stageBreakdown || {}),
JSON.stringify(costBreakdown.tokenBreakdown || {}),
JSON.stringify(optimizationsApplied),
costEfficiencyScore,
optimizationsApplied.reduce((sum, opt) => sum + opt.potentialSavings, 0),
new Date().toISOString()
]);
}
catch (error) {
console.warn('Failed to record cost analytics:', error);
}
}
/**
* Update cost analytics with actual costs after consensus completion
*/
async updateActualCosts(conversationId, actualCostData) {
try {
console.log(`💰 [UPDATEACTUALCOSTS] Received totalCost: ${actualCostData.totalCost} (type: ${typeof actualCostData.totalCost})`);
console.log(`💰 [UPDATEACTUALCOSTS] Received stageBreakdown:`, actualCostData.stageBreakdown);
const db = await getDatabase();
const costEfficiencyScore = actualCostData.qualityScore
? actualCostData.qualityScore / Math.max(0.001, actualCostData.totalCost * 100) // Quality per cost
: this.calculateCostEfficiencyScore(actualCostData, actualCostData.totalCost);
// Update existing cost analytics record with actual data
await db.run(`
UPDATE cost_analytics
SET
total_cost = ?,
cost_per_stage = ?,
tokens_per_stage = ?,
cost_efficiency_score = ?
WHERE conversation_id = ?
`, [
actualCostData.totalCost,
JSON.stringify(actualCostData.stageBreakdown),
JSON.stringify(actualCostData.tokenBreakdown),
costEfficiencyScore,
conversationId
]);
// Also store/update in conversations table for budget reporting
await db.run(`
UPDATE conversations
SET total_cost = ?, updated_at = ?
WHERE id = ?
`, [
actualCostData.totalCost,
new Date().toISOString(),
conversationId
]);
console.log(`💰 Updated actual costs for conversation ${conversationId.substring(0, 8)}: $${actualCostData.totalCost.toFixed(6)}`);
}
catch (error) {
console.warn('Failed to update actual cost analytics:', error);
}
}
/**
* Calculate cost efficiency score (0-1)
*/
calculateCostEfficiencyScore(costBreakdown, actualCost) {
// Simple efficiency calculation
// Better scores for lower costs with maintained quality
const targetCost = 0.01; // $0.01 target for good efficiency
if (actualCost <= targetCost)
return 1.0;
if (actualCost >= targetCost * 5)
return 0.1;
return Math.max(0.1, 1.0 - ((actualCost - targetCost) / (targetCost * 4)));
}
}
// ===== CONVENIENCE FUNCTIONS =====
/**
* Quick cost estimate for a question
*/
export async function estimateQuestionCost(question, complexity = 'basic') {
const costIntel = new CostIntelligence();
// Use estimated average model costs for quick estimation
const avgModelCosts = {
minimal: { generator: 0.001, refiner: 0.0008, validator: 0.0005, curator: 0.001 },
basic: { generator: 0.002, refiner: 0.0015, validator: 0.0008, curator: 0.002 },
production: { generator: 0.004, refiner: 0.003, validator: 0.0015, curator: 0.004 }
};
const costs = avgModelCosts[complexity];
return Object.values(costs).reduce((sum, cost) => sum + cost, 0);
}
/**
* Get budget recommendation for a question type
*/
export async function getBudgetRecommendation(questionComplexity, qualityLevel = 'standard') {
const baseBudgets = {
minimal: { standard: 0.003, high: 0.005, premium: 0.008 },
basic: { standard: 0.008, high: 0.012, premium: 0.020 },
production: { standard: 0.015, high: 0.025, premium: 0.040 }
};
return baseBudgets[questionComplexity][qualityLevel];
}
export default CostIntelligence;
//# sourceMappingURL=cost-intelligence.js.map