UNPKG

ultimate-mcp-server

Version:

The definitive all-in-one Model Context Protocol server for AI-assisted coding across 30+ platforms

314 lines 13.5 kB
/** * Intelligent Model Routing System * * Routes tasks to the most appropriate AI model based on * task characteristics, performance history, and constraints */ import { MODELS } from '../config/models.js'; export class ModelRouter { costOptimizer; performanceMonitor; modelProfiles = new Map(); routingHistory = []; constructor(costOptimizer, performanceMonitor) { this.costOptimizer = costOptimizer; this.performanceMonitor = performanceMonitor; this.initializeModelProfiles(); } /** * Initialize model profiles with characteristics */ initializeModelProfiles() { // GPT-4o this.modelProfiles.set(MODELS.GPT_4O, { strengths: ['reasoning', 'coding', 'creativity', 'function-calling'], weaknesses: ['cost', 'speed'], bestFor: ['complex-reasoning', 'code-generation', 'system-design'], avoid: ['simple-tasks', 'bulk-processing'], performance: { averageLatency: 3000, successRate: 95, userSatisfaction: 90 } }); // GPT-4o Mini this.modelProfiles.set(MODELS.GPT_4O_MINI, { strengths: ['speed', 'cost', 'general-tasks'], weaknesses: ['complex-reasoning', 'nuanced-tasks'], bestFor: ['quick-responses', 'simple-coding', 'basic-analysis'], avoid: ['complex-architecture', 'deep-analysis'], performance: { averageLatency: 1000, successRate: 90, userSatisfaction: 85 } }); // Claude 3 Opus this.modelProfiles.set(MODELS.CLAUDE_3_OPUS, { strengths: ['analysis', 'long-context', 'nuanced-reasoning', 'safety'], weaknesses: ['cost', 'availability'], bestFor: ['document-analysis', 'complex-decisions', 'ethical-considerations'], avoid: ['real-time-applications', 'simple-queries'], performance: { averageLatency: 4000, successRate: 96, userSatisfaction: 92 } }); // DeepSeek Coder this.modelProfiles.set(MODELS.DEEPSEEK_CODER_V2, { strengths: ['code-completion', 'debugging', 'code-understanding'], weaknesses: ['general-reasoning', 'non-code-tasks'], bestFor: ['code-generation', 'bug-fixing', 'code-review'], avoid: ['general-conversation', 'creative-writing'], performance: { averageLatency: 2000, successRate: 93, userSatisfaction: 88 } }); // Gemini 2.5 Flash this.modelProfiles.set(MODELS.GEMINI_2_FLASH, { strengths: ['speed', 'vision', 'massive-context', 'cost'], weaknesses: ['consistency', 'specialized-coding'], bestFor: ['ui-analysis', 'large-codebase-analysis', 'quick-tasks'], avoid: ['critical-code-generation', 'precise-calculations'], performance: { averageLatency: 800, successRate: 88, userSatisfaction: 85 } }); // Qwen Coder this.modelProfiles.set(MODELS.QWEN_2_5_CODER_32B, { strengths: ['code-generation', 'multi-language', 'efficiency'], weaknesses: ['english-nuance', 'creative-tasks'], bestFor: ['algorithm-implementation', 'code-translation', 'technical-docs'], avoid: ['creative-writing', 'cultural-context'], performance: { averageLatency: 1500, successRate: 91, userSatisfaction: 87 } }); } /** * Route a task to the best model */ async routeTask(task, constraints) { const candidates = this.getCandidateModels(task, constraints); const scores = await this.scoreModels(candidates, task); const decision = this.makeRoutingDecision(scores, task, constraints); // Record decision this.routingHistory.push(decision); if (this.routingHistory.length > 1000) { this.routingHistory.shift(); } return decision; } /** * Get candidate models for a task */ getCandidateModels(task, constraints) { let candidates = Array.from(this.modelProfiles.keys()); // Apply constraints if (constraints?.preferredModels) { candidates = candidates.filter(m => constraints.preferredModels.includes(m)); } if (constraints?.excludeModels) { candidates = candidates.filter(m => !constraints.excludeModels.includes(m)); } // Filter by task type candidates = candidates.filter(model => { const profile = this.modelProfiles.get(model); // Check if model is suitable for task type if (task.type === 'coding' || task.type === 'debugging') { return profile.strengths.includes('coding') || profile.strengths.includes('code-completion'); } if (task.type === 'vision') { return profile.strengths.includes('vision'); } if (task.type === 'analysis' && task.contextLength && task.contextLength > 100000) { return profile.strengths.includes('long-context') || profile.strengths.includes('massive-context'); } return true; // General models can handle most tasks }); // Always return at least one model (fallback to GPT-4o-mini) if (candidates.length === 0) { candidates = [MODELS.GPT_4O_MINI]; } return candidates; } /** * Score models for a specific task */ async scoreModels(models, task) { const scores = []; for (const model of models) { const profile = this.modelProfiles.get(model); let score = 0; const breakdown = {}; // 1. Task fitness score (40%) let fitnessScore = 0.5; if (profile.bestFor.some(use => task.type.includes(use))) { fitnessScore = 1.0; } else if (profile.avoid.some(avoid => task.type.includes(avoid))) { fitnessScore = 0.2; } // Complexity matching if (task.complexity === 'complex' && profile.strengths.includes('reasoning')) { fitnessScore *= 1.2; } else if (task.complexity === 'simple' && profile.strengths.includes('speed')) { fitnessScore *= 1.1; } breakdown.fitness = fitnessScore * 40; score += breakdown.fitness; // 2. Performance score (30%) const perfMetrics = this.performanceMonitor.getModelMetrics(model); let perfScore = profile.performance.successRate / 100; if (perfMetrics && typeof perfMetrics === 'object' && 'requests' in perfMetrics) { // Use actual performance data if available const metrics = perfMetrics; perfScore = Math.min(metrics.requests > 10 ? (metrics.requests - metrics.errors) / metrics.requests : perfScore, 1.0); } breakdown.performance = perfScore * 30; score += breakdown.performance; // 3. Speed score (20% if speed required, 10% otherwise) const speedWeight = task.requiresSpeed ? 20 : 10; const speedScore = 1 - (profile.performance.averageLatency / 5000); breakdown.speed = Math.max(speedScore * speedWeight, 0); score += breakdown.speed; // 4. Cost score (10% or 20% if speed not required) const costWeight = task.requiresSpeed ? 10 : 20; const costScore = await this.getCostScore(model, task.estimatedTokens); breakdown.cost = costScore * costWeight; score += breakdown.cost; scores.push({ model, score, breakdown }); } // Sort by score scores.sort((a, b) => b.score - a.score); return scores; } /** * Make final routing decision */ makeRoutingDecision(scores, task, constraints) { const topModel = scores[0]; // Generate alternatives - include other scored models and fallback chain let alternatives = []; if (scores.length > 1) { alternatives = scores.slice(1, 4).map(s => s.model); } // Add fallback models if we don't have enough alternatives if (alternatives.length < 3 && topModel) { const fallbackChain = this.costOptimizer.createFallbackChain(topModel.model); for (const fallbackModel of fallbackChain) { if (!alternatives.includes(fallbackModel) && fallbackModel !== topModel.model) { alternatives.push(fallbackModel); if (alternatives.length >= 3) break; } } } // Generate reasoning let reasoning = this.generateRoutingReasoning(topModel, task, constraints); // Estimate cost and latency const estimatedCost = this.estimateCost(topModel.model, task.estimatedTokens); const estimatedLatency = this.modelProfiles.get(topModel.model).performance.averageLatency; // Check constraints if (constraints?.maxLatency && estimatedLatency > constraints.maxLatency) { // Find faster alternative const fasterModel = scores.find(s => this.modelProfiles.get(s.model).performance.averageLatency <= constraints.maxLatency); if (fasterModel) { return { model: fasterModel.model, reasoning: `Selected ${fasterModel.model} due to latency constraint`, alternativeModels: alternatives, estimatedCost: this.estimateCost(fasterModel.model, task.estimatedTokens), estimatedLatency: this.modelProfiles.get(fasterModel.model).performance.averageLatency }; } else { // No model meets the constraint, but mention it in reasoning reasoning += ' (latency constraint could not be met)'; } } return { model: topModel.model, reasoning, alternativeModels: alternatives, estimatedCost, estimatedLatency }; } /** * Generate routing reasoning */ generateRoutingReasoning(selection, task, constraints) { const profile = this.modelProfiles.get(selection.model); const reasons = []; // Always include task type in reasoning reasons.push(`${task.type} task`); // Task fit if (selection.breakdown.fitness > 30) { reasons.push(`excellent fit for ${task.type} tasks`); } // Performance if (selection.breakdown.performance > 25) { reasons.push('high success rate'); } // Speed if (task.requiresSpeed && selection.breakdown.speed > 15) { reasons.push('fast response time'); } // Cost if (selection.breakdown.cost > 15) { reasons.push('cost-effective'); } // Special capabilities if (task.type === 'vision' && profile.strengths.includes('vision')) { reasons.push('vision capabilities'); } if (task.contextLength && task.contextLength > 100000) { reasons.push('handles large context'); } return `Selected ${selection.model} because: ${reasons.join(', ')}`; } /** * Get cost score for a model */ async getCostScore(model, estimatedTokens) { // Delegate to cost optimizer const optimalModel = this.costOptimizer.selectOptimalModel({ type: 'general', estimatedTokens, complexity: 'medium' }); // Compare costs const modelCost = this.estimateCost(model, estimatedTokens); const optimalCost = this.estimateCost(optimalModel.model.name, estimatedTokens); // Return inverse ratio (lower cost = higher score) return Math.min(optimalCost / (modelCost + 0.001), 1.0); } /** * Estimate cost (simplified) */ estimateCost(model, tokens) { // This is simplified - in production, use actual cost data const costMap = { [MODELS.GPT_4O]: 0.01, [MODELS.GPT_4O_MINI]: 0.0006, [MODELS.CLAUDE_3_OPUS]: 0.075, [MODELS.CLAUDE_3_HAIKU]: 0.00125, [MODELS.GEMINI_2_FLASH]: 0.00003, [MODELS.DEEPSEEK_CODER_V2]: 0.00028 }; return (tokens / 1000) * (costMap[model] || 0.001); } /** * Get routing insights */ getRoutingInsights() { const insights = { totalRoutings: this.routingHistory.length, modelUsage: {}, averageScore: 0, recommendations: [] }; // Analyze routing history for (const decision of this.routingHistory) { insights.modelUsage[decision.model] = (insights.modelUsage[decision.model] || 0) + 1; } // Generate recommendations const mostUsed = Object.entries(insights.modelUsage) .sort((a, b) => b[1] - a[1])[0]; if (mostUsed && mostUsed[1] > this.routingHistory.length * 0.7) { insights.recommendations.push(`Consider diversifying model usage. ${mostUsed[0]} used ${mostUsed[1]} times.`); } return insights; } } //# sourceMappingURL=model-router.js.map