claude-flow
Version:
Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration
488 lines • 17.7 kB
JavaScript
/**
* Intelligent Model Router using Tiny Dancer
*
* Dynamically routes requests to optimal Claude model (haiku/sonnet/opus)
* based on task complexity, confidence scores, and historical performance.
*
* Features:
* - FastGRNN-based routing decisions (<100μs)
* - Uncertainty quantification for model escalation
* - Circuit breaker for failover
* - Online learning from routing outcomes
* - Complexity scoring via embeddings
*
* Routing Strategy:
* - Haiku: High confidence, low complexity (fast, cheap)
* - Sonnet: Medium confidence, moderate complexity (balanced)
* - Opus: Low confidence, high complexity (most capable)
*
* @module model-router
*/
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
import { dirname, join } from 'path';
/**
* Model capabilities and characteristics
*/
export const MODEL_CAPABILITIES = {
haiku: {
maxComplexity: 0.4,
costMultiplier: 0.04, // ~25x cheaper than Opus
speedMultiplier: 3.0, // ~3x faster than Sonnet
description: 'Fast, cost-effective for simple tasks',
},
sonnet: {
maxComplexity: 0.7,
costMultiplier: 0.2, // ~5x cheaper than Opus
speedMultiplier: 1.5, // ~1.5x faster than Opus
description: 'Balanced capability and cost',
},
opus: {
maxComplexity: 1.0,
costMultiplier: 1.0, // Baseline
speedMultiplier: 1.0, // Baseline
description: 'Most capable for complex reasoning',
},
inherit: {
maxComplexity: 1.0,
costMultiplier: 1.0,
speedMultiplier: 1.0,
description: 'Use parent model selection',
},
};
/**
* Complexity indicators for task classification
*/
export const COMPLEXITY_INDICATORS = {
high: [
'architect', 'design', 'refactor', 'optimize', 'security', 'audit',
'complex', 'analyze', 'investigate', 'debug', 'performance', 'scale',
'distributed', 'concurrent', 'algorithm', 'system', 'integration',
],
medium: [
'implement', 'feature', 'add', 'update', 'modify', 'fix', 'test',
'review', 'validate', 'check', 'improve', 'enhance', 'extend',
],
low: [
'simple', 'typo', 'comment', 'format', 'rename', 'move', 'copy',
'delete', 'documentation', 'readme', 'config', 'version', 'bump',
],
};
// ============================================================================
// Default Configuration
// ============================================================================
const DEFAULT_CONFIG = {
confidenceThreshold: 0.85,
maxUncertainty: 0.15,
enableCircuitBreaker: true,
circuitBreakerThreshold: 5,
statePath: '.swarm/model-router-state.json',
autoSaveInterval: 1, // Save after every decision for CLI persistence
enableCostOptimization: true,
preferSpeed: true,
};
// ============================================================================
// Model Router Implementation
// ============================================================================
/**
* Intelligent Model Router using complexity-based routing
*/
export class ModelRouter {
config;
state;
decisionCount = 0;
consecutiveFailures = {
haiku: 0,
sonnet: 0,
opus: 0,
inherit: 0,
};
constructor(config = {}) {
this.config = { ...DEFAULT_CONFIG, ...config };
this.state = this.loadState();
}
/**
* Route a task to the optimal model
*/
async route(task, embedding) {
const startTime = performance.now();
// Analyze task complexity
const complexity = this.analyzeComplexity(task, embedding);
// Compute base model scores
const scores = this.computeModelScores(complexity);
// Apply circuit breaker adjustments
const adjustedScores = this.applyCircuitBreaker(scores);
// Select best model
const { model, confidence, uncertainty } = this.selectModel(adjustedScores, complexity.score);
const inferenceTimeUs = (performance.now() - startTime) * 1000;
// Build result
const result = {
model,
confidence,
uncertainty,
complexity: complexity.score,
reasoning: this.buildReasoning(model, complexity, confidence),
alternatives: Object.entries(adjustedScores)
.filter(([m]) => m !== model)
.map(([m, score]) => ({ model: m, score }))
.sort((a, b) => b.score - a.score),
inferenceTimeUs,
costMultiplier: MODEL_CAPABILITIES[model].costMultiplier,
};
// Track decision
this.trackDecision(task, result);
return result;
}
/**
* Analyze task complexity
*/
analyzeComplexity(task, embedding) {
const taskLower = task.toLowerCase();
const words = taskLower.split(/\s+/);
// Find complexity indicators
const indicators = {
high: COMPLEXITY_INDICATORS.high.filter(ind => taskLower.includes(ind)),
medium: COMPLEXITY_INDICATORS.medium.filter(ind => taskLower.includes(ind)),
low: COMPLEXITY_INDICATORS.low.filter(ind => taskLower.includes(ind)),
};
// Compute feature scores
const lexicalComplexity = this.computeLexicalComplexity(task);
const semanticDepth = this.computeSemanticDepth(indicators, embedding);
const taskScope = this.computeTaskScope(task, words);
const uncertaintyLevel = this.computeUncertaintyLevel(task);
// Weighted combination
const score = Math.min(1, Math.max(0, lexicalComplexity * 0.2 +
semanticDepth * 0.35 +
taskScope * 0.25 +
uncertaintyLevel * 0.2));
return {
score,
indicators,
features: {
lexicalComplexity,
semanticDepth,
taskScope,
uncertaintyLevel,
},
};
}
/**
* Compute lexical complexity from text features
*/
computeLexicalComplexity(task) {
const words = task.split(/\s+/);
const avgWordLength = words.reduce((sum, w) => sum + w.length, 0) / Math.max(1, words.length);
const sentenceLength = words.length;
// Normalize: longer sentences with longer words = more complex
const lengthScore = Math.min(1, sentenceLength / 50);
const wordScore = Math.min(1, (avgWordLength - 3) / 7); // 3-10 char words
return lengthScore * 0.4 + wordScore * 0.6;
}
/**
* Compute semantic depth from indicators and embedding
*/
computeSemanticDepth(indicators, embedding) {
// Weight by indicator presence
const highWeight = indicators.high.length * 0.3;
const mediumWeight = indicators.medium.length * 0.15;
const lowWeight = indicators.low.length * -0.1;
let baseScore = Math.min(1, Math.max(0, 0.3 + highWeight + mediumWeight + lowWeight));
// Boost with embedding variance if available
if (embedding && embedding.length > 0) {
const mean = embedding.reduce((a, b) => a + b, 0) / embedding.length;
const variance = embedding.reduce((sum, v) => sum + Math.pow(v - mean, 2), 0) / embedding.length;
// Higher variance suggests more nuanced semantics
baseScore = baseScore * 0.7 + Math.min(1, variance * 10) * 0.3;
}
return baseScore;
}
/**
* Compute task scope from content analysis
*/
computeTaskScope(task, words) {
// Multi-file indicators
const multiFilePatterns = [
/multiple files?/i, /across.*modules?/i, /refactor.*codebase/i,
/all.*files/i, /entire.*project/i, /system.*wide/i,
];
const hasMultiFile = multiFilePatterns.some(p => p.test(task)) ? 0.4 : 0;
// Code generation indicators
const codeGenPatterns = [
/implement/i, /create.*feature/i, /build.*system/i,
/design.*api/i, /write.*tests/i, /add.*functionality/i,
];
const hasCodeGen = codeGenPatterns.some(p => p.test(task)) ? 0.3 : 0;
// Word count contribution
const wordCountScore = Math.min(0.3, words.length / 100);
return hasMultiFile + hasCodeGen + wordCountScore;
}
/**
* Compute uncertainty level from task phrasing
*/
computeUncertaintyLevel(task) {
const uncertainPatterns = [
/not sure/i, /might/i, /maybe/i, /possibly/i, /investigate/i,
/figure out/i, /unclear/i, /unknown/i, /debug/i, /strange/i,
/weird/i, /issue/i, /problem/i, /error/i, /bug/i,
];
const matchCount = uncertainPatterns.filter(p => p.test(task)).length;
return Math.min(1, matchCount * 0.2);
}
/**
* Compute scores for each model
*/
computeModelScores(complexity) {
const { score } = complexity;
// Base scoring: inverse relationship with complexity
// Low complexity → haiku scores high
// High complexity → opus scores high
return {
haiku: Math.max(0, 1 - score * 2), // Drops off quickly as complexity rises
sonnet: 1 - Math.abs(score - 0.5) * 2, // Peaks at medium complexity
opus: Math.min(1, score * 1.5), // Rises with complexity
inherit: 0.1, // Low baseline unless explicitly needed
};
}
/**
* Apply circuit breaker adjustments
*/
applyCircuitBreaker(scores) {
if (!this.config.enableCircuitBreaker) {
return scores;
}
const adjusted = { ...scores };
for (const model of Object.keys(adjusted)) {
if (this.consecutiveFailures[model] >= this.config.circuitBreakerThreshold) {
// Circuit is open - heavily penalize this model
adjusted[model] *= 0.1;
}
else if (this.consecutiveFailures[model] > 0) {
// Partial penalty for recent failures
adjusted[model] *= 1 - (this.consecutiveFailures[model] / this.config.circuitBreakerThreshold) * 0.5;
}
}
return adjusted;
}
/**
* Select the best model from scores
*/
selectModel(scores, complexityScore) {
// Get sorted models by score
const sorted = Object.entries(scores)
.filter(([m]) => m !== 'inherit')
.sort((a, b) => b[1] - a[1]);
const [bestModel, bestScore] = sorted[0];
const [secondModel, secondScore] = sorted[1] || ['sonnet', 0];
// Confidence is how much better the best is vs second
const confidence = bestScore > 0 ? Math.min(1, bestScore / (bestScore + secondScore + 0.01)) : 0.5;
// Uncertainty based on score spread and complexity
const scoreSpread = bestScore - secondScore;
const uncertainty = Math.max(0, 1 - scoreSpread - confidence * 0.5);
// Escalate if uncertainty is too high
let model = bestModel;
if (uncertainty > this.config.maxUncertainty && bestModel !== 'opus') {
// Escalate to more capable model
model = bestModel === 'haiku' ? 'sonnet' : 'opus';
}
return { model, confidence, uncertainty };
}
/**
* Build human-readable reasoning
*/
buildReasoning(model, complexity, confidence) {
const parts = [];
parts.push(`Complexity: ${(complexity.score * 100).toFixed(0)}%`);
if (complexity.indicators.high.length > 0) {
parts.push(`High-complexity indicators: ${complexity.indicators.high.join(', ')}`);
}
parts.push(`Confidence: ${(confidence * 100).toFixed(0)}%`);
parts.push(`Model: ${model} - ${MODEL_CAPABILITIES[model].description}`);
if (this.config.enableCostOptimization) {
parts.push(`Cost: ${MODEL_CAPABILITIES[model].costMultiplier}x baseline`);
}
return parts.join(' | ');
}
/**
* Track routing decision for learning
*/
trackDecision(task, result) {
this.decisionCount++;
this.state.totalDecisions++;
this.state.modelDistribution[result.model] =
(this.state.modelDistribution[result.model] || 0) + 1;
// Update running averages
const n = this.state.totalDecisions;
this.state.avgComplexity =
(this.state.avgComplexity * (n - 1) + result.complexity) / n;
this.state.avgConfidence =
(this.state.avgConfidence * (n - 1) + result.confidence) / n;
// Auto-save periodically
if (this.decisionCount % this.config.autoSaveInterval === 0) {
this.saveState();
}
}
/**
* Record outcome for learning
*/
recordOutcome(task, model, outcome) {
// Update circuit breaker state
if (outcome === 'failure') {
this.consecutiveFailures[model]++;
}
else {
this.consecutiveFailures[model] = 0;
}
// Track in history
this.state.learningHistory.push({
task: task.slice(0, 100),
model,
complexity: this.state.avgComplexity,
outcome,
timestamp: new Date().toISOString(),
});
// Keep history bounded
if (this.state.learningHistory.length > 100) {
this.state.learningHistory = this.state.learningHistory.slice(-100);
}
if (outcome === 'failure') {
this.state.circuitBreakerTrips++;
}
this.saveState();
}
/**
* Get router statistics
*/
getStats() {
return {
totalDecisions: this.state.totalDecisions,
modelDistribution: { ...this.state.modelDistribution },
avgComplexity: this.state.avgComplexity,
avgConfidence: this.state.avgConfidence,
circuitBreakerTrips: this.state.circuitBreakerTrips,
consecutiveFailures: { ...this.consecutiveFailures },
};
}
/**
* Load state from disk
*/
loadState() {
const defaultState = {
totalDecisions: 0,
modelDistribution: { haiku: 0, sonnet: 0, opus: 0, inherit: 0 },
avgComplexity: 0.5,
avgConfidence: 0.8,
circuitBreakerTrips: 0,
lastUpdated: new Date().toISOString(),
learningHistory: [],
};
try {
const fullPath = join(process.cwd(), this.config.statePath);
if (existsSync(fullPath)) {
const data = readFileSync(fullPath, 'utf-8');
return { ...defaultState, ...JSON.parse(data) };
}
}
catch {
// Ignore load errors
}
return defaultState;
}
/**
* Save state to disk
*/
saveState() {
try {
const fullPath = join(process.cwd(), this.config.statePath);
const dir = dirname(fullPath);
if (!existsSync(dir)) {
mkdirSync(dir, { recursive: true });
}
this.state.lastUpdated = new Date().toISOString();
writeFileSync(fullPath, JSON.stringify(this.state, null, 2));
}
catch {
// Ignore save errors in non-critical scenarios
}
}
/**
* Reset router state
*/
reset() {
this.state = {
totalDecisions: 0,
modelDistribution: { haiku: 0, sonnet: 0, opus: 0, inherit: 0 },
avgComplexity: 0.5,
avgConfidence: 0.8,
circuitBreakerTrips: 0,
lastUpdated: new Date().toISOString(),
learningHistory: [],
};
this.consecutiveFailures = { haiku: 0, sonnet: 0, opus: 0, inherit: 0 };
this.decisionCount = 0;
this.saveState();
}
}
// ============================================================================
// Singleton & Factory Functions
// ============================================================================
let modelRouterInstance = null;
/**
* Get or create the singleton ModelRouter instance
*/
export function getModelRouter(config) {
if (!modelRouterInstance) {
modelRouterInstance = new ModelRouter(config);
}
return modelRouterInstance;
}
/**
* Reset the singleton instance
*/
export function resetModelRouter() {
modelRouterInstance = null;
}
/**
* Create a new ModelRouter instance (non-singleton)
*/
export function createModelRouter(config) {
return new ModelRouter(config);
}
// ============================================================================
// Convenience Functions
// ============================================================================
/**
* Quick route function for common use case
*/
export async function routeToModel(task) {
const router = getModelRouter();
const result = await router.route(task);
return result.model;
}
/**
* Route with full result
*/
export async function routeToModelFull(task, embedding) {
const router = getModelRouter();
return router.route(task, embedding);
}
/**
* Analyze task complexity without routing
*/
export function analyzeTaskComplexity(task) {
const router = getModelRouter();
return router.analyzeComplexity(task, undefined);
}
/**
* Get model router statistics
*/
export function getModelRouterStats() {
const router = getModelRouter();
return router.getStats();
}
/**
* Record routing outcome for learning
*/
export function recordModelOutcome(task, model, outcome) {
const router = getModelRouter();
router.recordOutcome(task, model, outcome);
}
//# sourceMappingURL=model-router.js.map