shipdeck
Version:
Ship MVPs in 48 hours. Fix bugs in 30 seconds. The command deck for developers who ship.
436 lines (378 loc) • 12.4 kB
JavaScript
/**
* Smart Model Router for Cost Optimization
* Routes tasks to appropriate models based on complexity and cost
*/
const MODEL_TIERS = {
haiku: {
model: 'claude-3-5-haiku-20241022',
maxTokens: 8192,
inputCostPer1K: 0.001,
outputCostPer1K: 0.005,
capabilities: ['simple', 'classification', 'extraction', 'validation']
},
sonnet: {
model: 'claude-3-5-sonnet-20241022',
maxTokens: 8192,
inputCostPer1K: 0.003,
outputCostPer1K: 0.015,
capabilities: ['generation', 'complex', 'refactoring', 'debugging']
},
opus: {
model: 'claude-opus-4-1-20250805',
maxTokens: 4096,
inputCostPer1K: 0.015,
outputCostPer1K: 0.075,
capabilities: ['architecture', 'critical', 'design', 'strategy']
}
};
// Intent-based routing (not percentage-based)
const INTENT_ROUTING = {
// Simple operations - Use Haiku
validation: {
intents: ['validate', 'check', 'verify', 'test', 'confirm'],
patterns: [/validate/i, /check/i, /verify/i, /test format/i],
tier: 'haiku',
confidence: 'high'
},
formatting: {
intents: ['format', 'lint', 'style', 'organize', 'clean'],
patterns: [/format/i, /lint/i, /prettier/i, /eslint/i],
tier: 'haiku',
confidence: 'high'
},
extraction: {
intents: ['extract', 'parse', 'find', 'locate', 'identify'],
patterns: [/extract/i, /parse/i, /find \w+ in/i],
tier: 'haiku',
confidence: 'high'
},
// Code generation - Use Sonnet
implementation: {
intents: ['implement', 'create', 'build', 'develop', 'code'],
patterns: [/implement/i, /create \w+ component/i, /build/i],
tier: 'sonnet',
confidence: 'high'
},
debugging: {
intents: ['debug', 'fix', 'troubleshoot', 'resolve', 'diagnose'],
patterns: [/debug/i, /fix \w+ bug/i, /troubleshoot/i],
tier: 'sonnet',
confidence: 'medium'
},
refactoring: {
intents: ['refactor', 'optimize', 'improve', 'enhance', 'modernize'],
patterns: [/refactor/i, /optimize performance/i, /improve/i],
tier: 'sonnet',
confidence: 'medium'
},
// Architecture decisions - Use Opus
systemDesign: {
intents: ['design', 'architect', 'plan', 'structure', 'organize'],
patterns: [/design \w+ architecture/i, /system design/i, /architect/i],
tier: 'opus',
confidence: 'high'
},
security: {
intents: ['secure', 'authenticate', 'authorize', 'encrypt', 'protect'],
patterns: [/security/i, /authentication/i, /authorization/i, /encrypt/i],
tier: 'opus',
confidence: 'high'
},
critical: {
intents: ['critical', 'essential', 'core', 'fundamental', 'payment'],
patterns: [/critical/i, /payment/i, /billing/i, /compliance/i],
tier: 'opus',
confidence: 'high'
},
database: {
intents: ['database', 'schema', 'migration', 'indexing', 'query'],
patterns: [/database \w+ design/i, /schema/i, /migration/i],
tier: 'opus',
confidence: 'medium'
}
};
// Quality thresholds for escalation
const QUALITY_THRESHOLDS = {
haiku: {
maxComplexity: 'low',
maxTokens: 2000,
escalateOn: ['unclear', 'ambiguous', 'complex']
},
sonnet: {
maxComplexity: 'medium',
maxTokens: 4000,
escalateOn: ['critical', 'security', 'architecture']
},
opus: {
maxComplexity: 'high',
maxTokens: 8000,
escalateOn: [] // No escalation from Opus
}
};
class ModelRouter {
constructor(config = {}) {
this.forceModel = config.forceModel || null;
this.budgetMode = config.budgetMode || 'balanced'; // 'aggressive' | 'balanced' | 'quality'
this.usage = {
haiku: { requests: 0, inputTokens: 0, outputTokens: 0, cost: 0 },
sonnet: { requests: 0, inputTokens: 0, outputTokens: 0, cost: 0 },
opus: { requests: 0, inputTokens: 0, outputTokens: 0, cost: 0 }
};
this.totalBudget = config.totalBudget || Infinity;
this.usedBudget = 0;
}
/**
* Route task to appropriate model based on complexity
*/
routeTask(task, agent = null, context = {}) {
// Check budget constraints
if (this.usedBudget >= this.totalBudget * 0.9) {
console.warn('⚠️ Budget warning: 90% of budget consumed');
if (this.budgetMode === 'aggressive') {
return this._selectModel('haiku', task);
}
}
// Force specific model if configured
if (this.forceModel && MODEL_TIERS[this.forceModel]) {
return this._selectModel(this.forceModel, task);
}
// Agent-specific routing
const agentRouting = this._routeByAgent(agent);
if (agentRouting) {
return this._selectModel(agentRouting, task);
}
// Intent-based routing (not pattern-based)
const intentRouting = this._routeByIntent(task);
if (intentRouting) {
return this._selectModel(intentRouting, task);
}
// Context-based routing
const contextRouting = this._routeByContext(context);
if (contextRouting) {
return this._selectModel(contextRouting, task);
}
// Default to balanced approach
return this._selectModel('sonnet', task);
}
/**
* Route based on agent type
*/
_routeByAgent(agent) {
if (!agent) return null;
const agentModelMap = {
// Architecture & Design - Need Opus
'backend-architect': 'opus',
'system-designer': 'opus',
'security-auditor': 'opus',
// Code Generation - Sonnet is sufficient
'frontend-developer': 'sonnet',
'api-builder': 'sonnet',
'test-writer-fixer': 'sonnet',
// Simple Tasks - Haiku is fine
'code-formatter': 'haiku',
'linter': 'haiku',
'validator': 'haiku',
'documentation': 'haiku'
};
return agentModelMap[agent] || null;
}
/**
* Route based on intent detection
*/
_routeByIntent(task) {
const taskLower = task.toLowerCase();
const words = taskLower.split(/\s+/);
// Check each intent category
for (const [category, config] of Object.entries(INTENT_ROUTING)) {
// Check if any intent words match
const hasIntent = config.intents.some(intent =>
words.includes(intent) || taskLower.includes(intent)
);
// Check if any patterns match
const hasPattern = config.patterns.some(pattern =>
pattern.test(taskLower)
);
if (hasIntent || hasPattern) {
// Check if we should escalate based on complexity signals
const shouldEscalate = this._checkEscalation(taskLower, config.tier);
if (shouldEscalate) {
console.log(`⬆️ Escalating from ${config.tier} due to complexity signals`);
return this._getNextTier(config.tier);
}
return config.tier;
}
}
return null;
}
/**
* Check if task should be escalated to higher tier
*/
_checkEscalation(task, currentTier) {
const threshold = QUALITY_THRESHOLDS[currentTier];
if (!threshold) return false;
// Check for escalation triggers
for (const trigger of threshold.escalateOn) {
if (task.includes(trigger)) {
return true;
}
}
// Check task length as complexity proxy
if (task.length > 500 && currentTier === 'haiku') {
return true;
}
return false;
}
/**
* Get next tier for escalation
*/
_getNextTier(currentTier) {
const tierOrder = ['haiku', 'sonnet', 'opus'];
const currentIndex = tierOrder.indexOf(currentTier);
if (currentIndex < tierOrder.length - 1) {
return tierOrder[currentIndex + 1];
}
return currentTier;
}
/**
* Route based on context signals
*/
_routeByContext(context) {
// Critical path items need highest quality
if (context.critical || context.security) {
return 'opus';
}
// Large codebases need better understanding
if (context.fileCount > 100 || context.complexity === 'high') {
return 'opus';
}
// Generation tasks
if (context.generateCode || context.refactor) {
return 'sonnet';
}
// Simple validations
if (context.validate || context.format) {
return 'haiku';
}
return null;
}
/**
* Select model and track usage
*/
_selectModel(tier, task) {
const model = MODEL_TIERS[tier];
// Log routing decision
console.log(`📊 Routing: "${task.substring(0, 50)}..." → ${tier.toUpperCase()} model`);
// Track usage
this.usage[tier].requests++;
return {
model: model.model,
maxTokens: model.maxTokens,
tier,
estimatedCost: this._estimateCost(tier, task),
reasoning: this._getRoutingReasoning(tier, task)
};
}
/**
* Estimate cost for task
*/
_estimateCost(tier, task) {
const model = MODEL_TIERS[tier];
// Rough estimation: 500 input tokens, 1500 output tokens average
const inputCost = (500 / 1000) * model.inputCostPer1K;
const outputCost = (1500 / 1000) * model.outputCostPer1K;
return inputCost + outputCost;
}
/**
* Get reasoning for routing decision
*/
_getRoutingReasoning(tier, task) {
const reasons = {
haiku: 'Simple task suitable for fast, efficient processing',
sonnet: 'Complex generation requiring balanced performance',
opus: 'Critical architecture decision requiring highest intelligence'
};
return reasons[tier] || 'Default routing';
}
/**
* Update usage after completion
*/
updateUsage(tier, inputTokens, outputTokens) {
if (!this.usage[tier]) return;
const model = MODEL_TIERS[tier];
const cost = (inputTokens / 1000) * model.inputCostPer1K +
(outputTokens / 1000) * model.outputCostPer1K;
this.usage[tier].inputTokens += inputTokens;
this.usage[tier].outputTokens += outputTokens;
this.usage[tier].cost += cost;
this.usedBudget += cost;
}
/**
* Get usage report
*/
getUsageReport() {
const total = Object.values(this.usage).reduce((sum, tier) => ({
requests: sum.requests + tier.requests,
inputTokens: sum.inputTokens + tier.inputTokens,
outputTokens: sum.outputTokens + tier.outputTokens,
cost: sum.cost + tier.cost
}), { requests: 0, inputTokens: 0, outputTokens: 0, cost: 0 });
const savings = this._calculateSavings();
return {
byTier: this.usage,
total,
budgetUsed: this.usedBudget,
budgetRemaining: this.totalBudget - this.usedBudget,
budgetPercentUsed: (this.usedBudget / this.totalBudget) * 100,
costSavings: savings,
recommendations: this._getRecommendations()
};
}
/**
* Calculate cost savings vs all-Opus
*/
_calculateSavings() {
const actualCost = this.usedBudget;
const opusOnlyCost = Object.values(this.usage).reduce((sum, tier) => {
const inputCost = (tier.inputTokens / 1000) * MODEL_TIERS.opus.inputCostPer1K;
const outputCost = (tier.outputTokens / 1000) * MODEL_TIERS.opus.outputCostPer1K;
return sum + inputCost + outputCost;
}, 0);
return {
amount: opusOnlyCost - actualCost,
percentage: ((opusOnlyCost - actualCost) / opusOnlyCost * 100).toFixed(1)
};
}
/**
* Get optimization recommendations
*/
_getRecommendations() {
const recommendations = [];
// Check if too much Opus usage
const opusPercent = (this.usage.opus.requests /
(this.usage.haiku.requests + this.usage.sonnet.requests + this.usage.opus.requests)) * 100;
if (opusPercent > 20) {
recommendations.push('Consider reviewing Opus usage - target is <10% for cost efficiency');
}
// Check if budget mode should change
if (this.usedBudget > this.totalBudget * 0.8) {
recommendations.push('Switch to aggressive budget mode to preserve remaining budget');
}
// Suggest caching
if (this.usage.total?.requests > 100) {
recommendations.push('Enable response caching to reduce repeated API calls');
}
return recommendations;
}
/**
* Reset usage tracking
*/
resetUsage() {
this.usage = {
haiku: { requests: 0, inputTokens: 0, outputTokens: 0, cost: 0 },
sonnet: { requests: 0, inputTokens: 0, outputTokens: 0, cost: 0 },
opus: { requests: 0, inputTokens: 0, outputTokens: 0, cost: 0 }
};
this.usedBudget = 0;
}
}
module.exports = { ModelRouter, MODEL_TIERS, INTENT_ROUTING, QUALITY_THRESHOLDS };