UNPKG

ultimate-mcp-server

Version:

The definitive all-in-one Model Context Protocol server for AI-assisted coding across 30+ platforms

465 lines 18.9 kB
/** * Cost Optimization Engine * * Intelligently manages model selection and token usage * to minimize costs while maintaining quality */ import { MODELS } from '../config/models.js'; export class CostOptimizer { modelInfo = new Map(); usageHistory = new Map(); modelPerformance = new Map(); budgetConstraints = {}; constructor() { this.initializeModelInfo(); } /** * Initialize model information with costs and capabilities */ initializeModelInfo() { // OpenAI models this.modelInfo.set(MODELS.GPT_4O, { name: MODELS.GPT_4O, provider: 'openai', category: 'premium', contextWindow: 128000, cost: { inputPer1k: 0.0025, outputPer1k: 0.01 }, speed: 'medium', quality: 'high', capabilities: ['coding', 'reasoning', 'vision', 'function-calling'] }); this.modelInfo.set(MODELS.GPT_4O_MINI, { name: MODELS.GPT_4O_MINI, provider: 'openai', category: 'efficient', contextWindow: 128000, cost: { inputPer1k: 0.00015, outputPer1k: 0.0006 }, speed: 'fast', quality: 'medium', capabilities: ['coding', 'reasoning', 'function-calling'] }); // Anthropic models this.modelInfo.set(MODELS.CLAUDE_3_OPUS, { name: MODELS.CLAUDE_3_OPUS, provider: 'anthropic', category: 'premium', contextWindow: 200000, cost: { inputPer1k: 0.015, outputPer1k: 0.075 }, speed: 'slow', quality: 'high', capabilities: ['coding', 'reasoning', 'vision', 'long-context'] }); this.modelInfo.set(MODELS.CLAUDE_3_HAIKU, { name: MODELS.CLAUDE_3_HAIKU, provider: 'anthropic', category: 'efficient', contextWindow: 200000, cost: { inputPer1k: 0.00025, outputPer1k: 0.00125 }, speed: 'fast', quality: 'medium', capabilities: ['coding', 'reasoning', 'long-context'] }); // Google models this.modelInfo.set(MODELS.GEMINI_2_FLASH, { name: MODELS.GEMINI_2_FLASH, provider: 'google', category: 'efficient', contextWindow: 1000000, cost: { inputPer1k: 0.00001, outputPer1k: 0.00003 }, speed: 'fast', quality: 'medium', capabilities: ['coding', 'reasoning', 'vision', 'massive-context'] }); // DeepSeek models this.modelInfo.set(MODELS.DEEPSEEK_CODER_V2, { name: MODELS.DEEPSEEK_CODER_V2, provider: 'deepseek', category: 'specialized', contextWindow: 128000, cost: { inputPer1k: 0.00014, outputPer1k: 0.00028 }, speed: 'medium', quality: 'high', capabilities: ['coding', 'code-completion', 'debugging'] }); // Add more models as needed... } /** * Select the most cost-effective model for a task */ selectOptimalModel(task, constraints = {}) { let candidates = Array.from(this.modelInfo.values()); // Filter by required capabilities if (task.type === 'vision') { candidates = candidates.filter(m => m.capabilities.includes('vision')); } else if (task.type === 'coding') { candidates = candidates.filter(m => m.capabilities.includes('coding') || m.capabilities.includes('code-completion')); } // Filter by quality requirements if (constraints.minQuality) { const qualityOrder = { high: 3, medium: 2, low: 1 }; const minQualityScore = qualityOrder[constraints.minQuality]; candidates = candidates.filter(m => qualityOrder[m.quality] >= minQualityScore); } // Filter by context window candidates = candidates.filter(m => m.contextWindow >= task.estimatedTokens); // Filter by additional capabilities if (constraints.requiredCapabilities) { candidates = candidates.filter(m => constraints.requiredCapabilities.every(cap => m.capabilities.includes(cap))); } // Score and rank models const scored = candidates.map(model => { let score = 0; // Cost score (lower is better) const estimatedCost = this.estimateCost(model, task.estimatedTokens); const costScore = 1 / (estimatedCost + 0.01); // Avoid division by zero score += costScore * 40; // 40% weight // Quality score const qualityScore = model.quality === 'high' ? 1 : model.quality === 'medium' ? 0.7 : 0.4; score += qualityScore * 30; // 30% weight // Speed score const speedScore = model.speed === 'fast' ? 1 : model.speed === 'medium' ? 0.7 : 0.4; score += speedScore * 20; // 20% weight // Task fitness score let fitnessScore = 0.5; if (task.type === 'coding' && model.name && model.name.includes('coder')) fitnessScore = 1; if (task.type === 'debugging' && (model.capabilities?.includes('debugging') || model.name?.includes('coder'))) fitnessScore = 1; if (task.type === 'vision' && model.capabilities && model.capabilities.includes('vision')) fitnessScore = 0.9; // Adjust for complexity - prefer higher quality models for complex tasks if (task.complexity === 'complex') { if (model.quality === 'high') { fitnessScore = 1; score += 50; // Strong boost for high quality on complex tasks } // For complex tasks, strongly penalize medium/low quality models if (model.quality === 'medium') score -= 30; if (model.quality === 'low') score -= 60; } else if (task.complexity === 'simple') { if (model.speed === 'fast') fitnessScore = 0.9; // For simple tasks, prefer cheaper models score += costScore * 20; // Add extra weight to cost for simple tasks // Penalize expensive models for simple tasks if (model.category === 'premium') score -= 20; } score += fitnessScore * 10; // 10% weight return { model, score }; }); // Sort by score scored.sort((a, b) => b.score - a.score); // Apply cost constraint if specified if (constraints.maxCost) { const withinBudget = scored.filter(s => this.estimateCost(s.model, task.estimatedTokens) <= constraints.maxCost); if (withinBudget.length > 0) { const alternatives = withinBudget.slice(1, 4).map(s => s.model.name); return { model: withinBudget[0].model, reason: `Selected ${withinBudget[0].model.name} - within budget constraint`, score: withinBudget[0].score, estimatedCost: this.estimateCost(withinBudget[0].model, task.estimatedTokens), alternatives }; } } const selected = scored[0]; if (!selected) { const defaultModel = this.getDefaultModel(); return { model: defaultModel, reason: 'Using default model', score: 0, estimatedCost: this.estimateCost(defaultModel, task.estimatedTokens), alternatives: [] }; } // Build reason based on task and model characteristics let reason = `Selected ${selected.model.name}`; const reasons = []; if (task.type === 'coding' && (selected.model.name.includes('coder') || selected.model.capabilities.includes('coding'))) { reasons.push('optimized for coding tasks'); } if (task.type === 'debugging' && (selected.model.capabilities.includes('debugging') || selected.model.name.includes('coder'))) { reasons.push('debugging support'); } if (task.type === 'vision' && selected.model.capabilities.includes('vision')) { reasons.push('vision capabilities'); } if (selected.model.speed === 'fast' && task.complexity === 'simple') { reasons.push('fast response for simple task'); } if (selected.model.quality === 'high' && task.complexity === 'complex') { reasons.push('high quality for complex task'); } if (reasons.length > 0) { reason += ` - ${reasons.join(', ')}`; } else { reason += ` - highest score (${selected.score.toFixed(2)})`; } // Get alternative models (top 3 after the selected one) const alternatives = scored.slice(1, 4).map(s => s.model.name); return { model: selected.model, reason, score: selected.score, estimatedCost: this.estimateCost(selected.model, task.estimatedTokens), alternatives }; } /** * Optimize token usage in prompts */ optimizeTokenUsage(prompt, maxTokens) { let optimized = prompt; const originalLength = prompt.length; // 1. Remove redundant whitespace optimized = optimized.replace(/\s+/g, ' ').trim(); // 2. Remove unnecessary line breaks optimized = optimized.replace(/\n{3,}/g, '\n\n'); // 3. Compress repeated instructions const compressionPatterns = [ { pattern: /Please\s+/gi, replacement: '' }, { pattern: /Could you please\s+/gi, replacement: '' }, { pattern: /I would like you to\s+/gi, replacement: '' }, { pattern: /Can you\s+/gi, replacement: '' }, { pattern: /Make sure to\s+/gi, replacement: 'Ensure ' }, { pattern: /It is important that\s+/gi, replacement: 'Important: ' }, { pattern: /In order to\s+/gi, replacement: 'To ' }, { pattern: /\s+very\s+/gi, replacement: ' ' }, { pattern: /\s+really\s+/gi, replacement: ' ' } ]; for (const { pattern, replacement } of compressionPatterns) { optimized = optimized.replace(pattern, replacement); } // 4. Use abbreviations for common terms const abbreviations = new Map([ ['artificial intelligence', 'AI'], ['machine learning', 'ML'], ['natural language processing', 'NLP'], ['application programming interface', 'API'], ['user interface', 'UI'], ['user experience', 'UX'], ['typescript', 'TS'], ['javascript', 'JS'] ]); for (const [full, abbr] of abbreviations) { const regex = new RegExp(`\\b${full}\\b`, 'gi'); optimized = optimized.replace(regex, abbr); } // 5. Remove filler words const fillerWords = [ 'basically', 'actually', 'obviously', 'clearly', 'simply', 'just', 'really', 'very', 'quite' ]; for (const filler of fillerWords) { const regex = new RegExp(`\\b${filler}\\b\\s*`, 'gi'); optimized = optimized.replace(regex, ''); } // 6. Truncate if needed if (maxTokens) { const estimatedTokens = Math.ceil(optimized.length / 4); // Rough estimate if (estimatedTokens > maxTokens) { const targetLength = maxTokens * 4; optimized = optimized.substring(0, targetLength) + '...'; } } const optimizedLength = optimized.length; const savings = originalLength - optimizedLength; return { original: prompt, optimized, savings: { tokens: Math.ceil(savings / 4), // Rough token estimate percentage: Math.round((savings / originalLength) * 100) } }; } /** * Create fallback chain for model failures */ createFallbackChain(primaryModel) { const primary = this.modelInfo.get(primaryModel); if (!primary) return [this.getDefaultModel().name]; const chain = [primaryModel]; // Add similar quality model from different provider const sameQuality = Array.from(this.modelInfo.values()) .filter(m => m.name !== primaryModel && m.quality === primary.quality && m.provider !== primary.provider) .sort((a, b) => this.estimateCost(a, 1000) - this.estimateCost(b, 1000)); if (sameQuality.length > 0) { chain.push(sameQuality[0].name); } // Add cheaper alternative const cheaper = Array.from(this.modelInfo.values()) .filter(m => !chain.includes(m.name) && this.estimateCost(m, 1000) < this.estimateCost(primary, 1000)) .sort((a, b) => { // Prefer higher quality among cheaper options const qualityOrder = { high: 3, medium: 2, low: 1 }; return qualityOrder[b.quality] - qualityOrder[a.quality]; }); if (cheaper.length > 0) { chain.push(cheaper[0].name); } // Always end with reliable fallbacks if (!chain.includes(MODELS.GPT_4O_MINI)) { chain.push(MODELS.GPT_4O_MINI); } // Ensure we have at least 3 models in the chain const fallbackModels = [MODELS.CLAUDE_3_HAIKU, MODELS.GEMINI_2_FLASH, MODELS.DEEPSEEK_CODER_V2]; for (const fallback of fallbackModels) { if (chain.length >= 3) break; if (!chain.includes(fallback)) { chain.push(fallback); } } return chain; } /** * Track usage for cost monitoring */ trackUsage(model, inputTokens, outputTokens) { const cost = this.calculateCost(model, inputTokens, outputTokens); if (!this.usageHistory.has(model)) { this.usageHistory.set(model, { tokens: 0, cost: 0 }); } const usage = this.usageHistory.get(model); usage.tokens += inputTokens + outputTokens; usage.cost += cost; } /** * Get cost report */ getCostReport(period) { const now = Date.now(); let since = 0; switch (period) { case 'hour': since = now - 3600000; break; case 'day': since = now - 86400000; break; case 'week': since = now - 604800000; break; case 'month': since = now - 2592000000; break; default: since = 0; // All time } const report = { totalCost: 0, byModel: {}, byProvider: {}, modelBreakdown: {}, recommendations: [] }; // Calculate totals for (const [model, usage] of this.usageHistory) { report.totalCost += usage.cost; report.modelBreakdown[model] = usage.cost; report.byModel[model] = { tokens: usage.tokens, cost: usage.cost, percentage: 0 // Will calculate after total }; const provider = model.split('/')[0]; report.byProvider[provider] = (report.byProvider[provider] || 0) + usage.cost; } // Calculate percentages for (const model in report.byModel) { report.byModel[model].percentage = report.totalCost > 0 ? (report.byModel[model].cost / report.totalCost) * 100 : 0; } // Generate recommendations if (report.totalCost > 100) { report.recommendations.push('Consider using more cost-effective models for simple tasks'); } const highCostModels = Object.entries(report.byModel) .filter(([_, data]) => data.percentage > 50) .map(([model]) => model); if (highCostModels.length > 0) { report.recommendations.push(`High concentration of costs in: ${highCostModels.join(', ')}. Consider diversifying.`); } return report; } /** * Estimate cost for a model and token count */ estimateCost(model, estimatedTokens) { // Assume 30% of tokens are output const inputTokens = estimatedTokens * 0.7; const outputTokens = estimatedTokens * 0.3; return ((inputTokens / 1000) * model.cost.inputPer1k + (outputTokens / 1000) * model.cost.outputPer1k); } /** * Calculate actual cost */ calculateCost(modelName, inputTokens, outputTokens) { const model = this.modelInfo.get(modelName); if (!model) return 0; return ((inputTokens / 1000) * model.cost.inputPer1k + (outputTokens / 1000) * model.cost.outputPer1k); } /** * Set budget constraints */ setConstraints(constraints) { this.budgetConstraints = constraints; } /** * Get optimization insights */ getOptimizationInsights() { const insights = { recommendations: [], savingsOpportunity: 0, currentSpend: 0, projectedSavings: 0 }; // Calculate current spend for (const [model, usage] of this.usageHistory) { insights.currentSpend += usage.cost; } // Analyze usage patterns const highCostModels = Array.from(this.usageHistory.entries()) .filter(([model, usage]) => { const modelInfo = this.modelInfo.get(model); return modelInfo && modelInfo.category === 'premium'; }); if (highCostModels.length > 0) { const premiumUsage = highCostModels.reduce((sum, [_, usage]) => sum + usage.tokens, 0); const totalUsage = Array.from(this.usageHistory.values()) .reduce((sum, usage) => sum + usage.tokens, 0); if (premiumUsage / totalUsage > 0.3) { insights.recommendations.push('Consider using more cost-effective models for simple tasks'); insights.savingsOpportunity = insights.currentSpend * 0.3; } } return insights; } /** * Get default model */ getDefaultModel() { return this.modelInfo.get(MODELS.GPT_4O_MINI) || Array.from(this.modelInfo.values())[0]; } } //# sourceMappingURL=cost-optimizer.js.map