UNPKG

shipdeck

Version:

Ship MVPs in 48 hours. Fix bugs in 30 seconds. The command deck for developers who ship.

436 lines (378 loc) 12.4 kB
/** * Smart Model Router for Cost Optimization * Routes tasks to appropriate models based on complexity and cost */ const MODEL_TIERS = { haiku: { model: 'claude-3-5-haiku-20241022', maxTokens: 8192, inputCostPer1K: 0.001, outputCostPer1K: 0.005, capabilities: ['simple', 'classification', 'extraction', 'validation'] }, sonnet: { model: 'claude-3-5-sonnet-20241022', maxTokens: 8192, inputCostPer1K: 0.003, outputCostPer1K: 0.015, capabilities: ['generation', 'complex', 'refactoring', 'debugging'] }, opus: { model: 'claude-opus-4-1-20250805', maxTokens: 4096, inputCostPer1K: 0.015, outputCostPer1K: 0.075, capabilities: ['architecture', 'critical', 'design', 'strategy'] } }; // Intent-based routing (not percentage-based) const INTENT_ROUTING = { // Simple operations - Use Haiku validation: { intents: ['validate', 'check', 'verify', 'test', 'confirm'], patterns: [/validate/i, /check/i, /verify/i, /test format/i], tier: 'haiku', confidence: 'high' }, formatting: { intents: ['format', 'lint', 'style', 'organize', 'clean'], patterns: [/format/i, /lint/i, /prettier/i, /eslint/i], tier: 'haiku', confidence: 'high' }, extraction: { intents: ['extract', 'parse', 'find', 'locate', 'identify'], patterns: [/extract/i, /parse/i, /find \w+ in/i], tier: 'haiku', confidence: 'high' }, // Code generation - Use Sonnet implementation: { intents: ['implement', 'create', 'build', 'develop', 'code'], patterns: [/implement/i, /create \w+ component/i, /build/i], tier: 'sonnet', confidence: 'high' }, debugging: { intents: ['debug', 'fix', 'troubleshoot', 'resolve', 'diagnose'], patterns: [/debug/i, /fix \w+ bug/i, /troubleshoot/i], tier: 'sonnet', confidence: 'medium' }, refactoring: { intents: ['refactor', 'optimize', 'improve', 'enhance', 'modernize'], patterns: [/refactor/i, /optimize performance/i, /improve/i], tier: 'sonnet', confidence: 'medium' }, // Architecture decisions - Use Opus systemDesign: { intents: ['design', 'architect', 'plan', 'structure', 'organize'], patterns: [/design \w+ architecture/i, /system design/i, /architect/i], tier: 'opus', confidence: 'high' }, security: { intents: ['secure', 'authenticate', 'authorize', 'encrypt', 'protect'], patterns: [/security/i, /authentication/i, /authorization/i, /encrypt/i], tier: 'opus', confidence: 'high' }, critical: { intents: ['critical', 'essential', 'core', 'fundamental', 'payment'], patterns: [/critical/i, /payment/i, /billing/i, /compliance/i], tier: 'opus', confidence: 'high' }, database: { intents: ['database', 'schema', 'migration', 'indexing', 'query'], patterns: [/database \w+ design/i, /schema/i, /migration/i], tier: 'opus', confidence: 'medium' } }; // Quality thresholds for escalation const QUALITY_THRESHOLDS = { haiku: { maxComplexity: 'low', maxTokens: 2000, escalateOn: ['unclear', 'ambiguous', 'complex'] }, sonnet: { maxComplexity: 'medium', maxTokens: 4000, escalateOn: ['critical', 'security', 'architecture'] }, opus: { maxComplexity: 'high', maxTokens: 8000, escalateOn: [] // No escalation from Opus } }; class ModelRouter { constructor(config = {}) { this.forceModel = config.forceModel || null; this.budgetMode = config.budgetMode || 'balanced'; // 'aggressive' | 'balanced' | 'quality' this.usage = { haiku: { requests: 0, inputTokens: 0, outputTokens: 0, cost: 0 }, sonnet: { requests: 0, inputTokens: 0, outputTokens: 0, cost: 0 }, opus: { requests: 0, inputTokens: 0, outputTokens: 0, cost: 0 } }; this.totalBudget = config.totalBudget || Infinity; this.usedBudget = 0; } /** * Route task to appropriate model based on complexity */ routeTask(task, agent = null, context = {}) { // Check budget constraints if (this.usedBudget >= this.totalBudget * 0.9) { console.warn('⚠️ Budget warning: 90% of budget consumed'); if (this.budgetMode === 'aggressive') { return this._selectModel('haiku', task); } } // Force specific model if configured if (this.forceModel && MODEL_TIERS[this.forceModel]) { return this._selectModel(this.forceModel, task); } // Agent-specific routing const agentRouting = this._routeByAgent(agent); if (agentRouting) { return this._selectModel(agentRouting, task); } // Intent-based routing (not pattern-based) const intentRouting = this._routeByIntent(task); if (intentRouting) { return this._selectModel(intentRouting, task); } // Context-based routing const contextRouting = this._routeByContext(context); if (contextRouting) { return this._selectModel(contextRouting, task); } // Default to balanced approach return this._selectModel('sonnet', task); } /** * Route based on agent type */ _routeByAgent(agent) { if (!agent) return null; const agentModelMap = { // Architecture & Design - Need Opus 'backend-architect': 'opus', 'system-designer': 'opus', 'security-auditor': 'opus', // Code Generation - Sonnet is sufficient 'frontend-developer': 'sonnet', 'api-builder': 'sonnet', 'test-writer-fixer': 'sonnet', // Simple Tasks - Haiku is fine 'code-formatter': 'haiku', 'linter': 'haiku', 'validator': 'haiku', 'documentation': 'haiku' }; return agentModelMap[agent] || null; } /** * Route based on intent detection */ _routeByIntent(task) { const taskLower = task.toLowerCase(); const words = taskLower.split(/\s+/); // Check each intent category for (const [category, config] of Object.entries(INTENT_ROUTING)) { // Check if any intent words match const hasIntent = config.intents.some(intent => words.includes(intent) || taskLower.includes(intent) ); // Check if any patterns match const hasPattern = config.patterns.some(pattern => pattern.test(taskLower) ); if (hasIntent || hasPattern) { // Check if we should escalate based on complexity signals const shouldEscalate = this._checkEscalation(taskLower, config.tier); if (shouldEscalate) { console.log(`⬆️ Escalating from ${config.tier} due to complexity signals`); return this._getNextTier(config.tier); } return config.tier; } } return null; } /** * Check if task should be escalated to higher tier */ _checkEscalation(task, currentTier) { const threshold = QUALITY_THRESHOLDS[currentTier]; if (!threshold) return false; // Check for escalation triggers for (const trigger of threshold.escalateOn) { if (task.includes(trigger)) { return true; } } // Check task length as complexity proxy if (task.length > 500 && currentTier === 'haiku') { return true; } return false; } /** * Get next tier for escalation */ _getNextTier(currentTier) { const tierOrder = ['haiku', 'sonnet', 'opus']; const currentIndex = tierOrder.indexOf(currentTier); if (currentIndex < tierOrder.length - 1) { return tierOrder[currentIndex + 1]; } return currentTier; } /** * Route based on context signals */ _routeByContext(context) { // Critical path items need highest quality if (context.critical || context.security) { return 'opus'; } // Large codebases need better understanding if (context.fileCount > 100 || context.complexity === 'high') { return 'opus'; } // Generation tasks if (context.generateCode || context.refactor) { return 'sonnet'; } // Simple validations if (context.validate || context.format) { return 'haiku'; } return null; } /** * Select model and track usage */ _selectModel(tier, task) { const model = MODEL_TIERS[tier]; // Log routing decision console.log(`📊 Routing: "${task.substring(0, 50)}..." → ${tier.toUpperCase()} model`); // Track usage this.usage[tier].requests++; return { model: model.model, maxTokens: model.maxTokens, tier, estimatedCost: this._estimateCost(tier, task), reasoning: this._getRoutingReasoning(tier, task) }; } /** * Estimate cost for task */ _estimateCost(tier, task) { const model = MODEL_TIERS[tier]; // Rough estimation: 500 input tokens, 1500 output tokens average const inputCost = (500 / 1000) * model.inputCostPer1K; const outputCost = (1500 / 1000) * model.outputCostPer1K; return inputCost + outputCost; } /** * Get reasoning for routing decision */ _getRoutingReasoning(tier, task) { const reasons = { haiku: 'Simple task suitable for fast, efficient processing', sonnet: 'Complex generation requiring balanced performance', opus: 'Critical architecture decision requiring highest intelligence' }; return reasons[tier] || 'Default routing'; } /** * Update usage after completion */ updateUsage(tier, inputTokens, outputTokens) { if (!this.usage[tier]) return; const model = MODEL_TIERS[tier]; const cost = (inputTokens / 1000) * model.inputCostPer1K + (outputTokens / 1000) * model.outputCostPer1K; this.usage[tier].inputTokens += inputTokens; this.usage[tier].outputTokens += outputTokens; this.usage[tier].cost += cost; this.usedBudget += cost; } /** * Get usage report */ getUsageReport() { const total = Object.values(this.usage).reduce((sum, tier) => ({ requests: sum.requests + tier.requests, inputTokens: sum.inputTokens + tier.inputTokens, outputTokens: sum.outputTokens + tier.outputTokens, cost: sum.cost + tier.cost }), { requests: 0, inputTokens: 0, outputTokens: 0, cost: 0 }); const savings = this._calculateSavings(); return { byTier: this.usage, total, budgetUsed: this.usedBudget, budgetRemaining: this.totalBudget - this.usedBudget, budgetPercentUsed: (this.usedBudget / this.totalBudget) * 100, costSavings: savings, recommendations: this._getRecommendations() }; } /** * Calculate cost savings vs all-Opus */ _calculateSavings() { const actualCost = this.usedBudget; const opusOnlyCost = Object.values(this.usage).reduce((sum, tier) => { const inputCost = (tier.inputTokens / 1000) * MODEL_TIERS.opus.inputCostPer1K; const outputCost = (tier.outputTokens / 1000) * MODEL_TIERS.opus.outputCostPer1K; return sum + inputCost + outputCost; }, 0); return { amount: opusOnlyCost - actualCost, percentage: ((opusOnlyCost - actualCost) / opusOnlyCost * 100).toFixed(1) }; } /** * Get optimization recommendations */ _getRecommendations() { const recommendations = []; // Check if too much Opus usage const opusPercent = (this.usage.opus.requests / (this.usage.haiku.requests + this.usage.sonnet.requests + this.usage.opus.requests)) * 100; if (opusPercent > 20) { recommendations.push('Consider reviewing Opus usage - target is <10% for cost efficiency'); } // Check if budget mode should change if (this.usedBudget > this.totalBudget * 0.8) { recommendations.push('Switch to aggressive budget mode to preserve remaining budget'); } // Suggest caching if (this.usage.total?.requests > 100) { recommendations.push('Enable response caching to reduce repeated API calls'); } return recommendations; } /** * Reset usage tracking */ resetUsage() { this.usage = { haiku: { requests: 0, inputTokens: 0, outputTokens: 0, cost: 0 }, sonnet: { requests: 0, inputTokens: 0, outputTokens: 0, cost: 0 }, opus: { requests: 0, inputTokens: 0, outputTokens: 0, cost: 0 } }; this.usedBudget = 0; } } module.exports = { ModelRouter, MODEL_TIERS, INTENT_ROUTING, QUALITY_THRESHOLDS };