claude-flow
Version:
Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration
668 lines • 25.7 kB
JavaScript
/**
* Intelligent Model Router using Tiny Dancer
*
* Dynamically routes requests to optimal Claude model (haiku/sonnet/opus)
* based on task complexity, confidence scores, and historical performance.
*
* Features:
* - FastGRNN-based routing decisions (<100μs)
* - Uncertainty quantification for model escalation
* - Circuit breaker for failover
* - Online learning from routing outcomes
* - Complexity scoring via embeddings
*
* Routing Strategy:
* - Haiku: High confidence, low complexity (fast, cheap)
* - Sonnet: Medium confidence, moderate complexity (balanced)
* - Opus: Low confidence, high complexity (most capable)
*
* @module model-router
*/
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'fs';
import { dirname, join } from 'path';
/**
* Model capabilities and characteristics
*/
export const MODEL_CAPABILITIES = {
haiku: {
maxComplexity: 0.4,
costMultiplier: 0.04, // ~25x cheaper than Opus
speedMultiplier: 3.0, // ~3x faster than Sonnet
description: 'Fast, cost-effective for simple tasks',
},
sonnet: {
maxComplexity: 0.7,
costMultiplier: 0.2, // ~5x cheaper than Opus
speedMultiplier: 1.5, // ~1.5x faster than Opus
description: 'Balanced capability and cost',
},
opus: {
maxComplexity: 1.0,
costMultiplier: 1.0, // Baseline
speedMultiplier: 1.0, // Baseline
description: 'Most capable for complex reasoning',
},
inherit: {
maxComplexity: 1.0,
costMultiplier: 1.0,
speedMultiplier: 1.0,
description: 'Use parent model selection',
},
};
/**
* Complexity indicators for task classification
*/
export const COMPLEXITY_INDICATORS = {
high: [
'architect', 'design', 'refactor', 'optimize', 'security', 'audit',
'complex', 'analyze', 'investigate', 'debug', 'performance', 'scale',
'distributed', 'concurrent', 'algorithm', 'system', 'integration',
],
medium: [
'implement', 'feature', 'add', 'update', 'modify', 'fix', 'test',
'review', 'validate', 'check', 'improve', 'enhance', 'extend',
],
low: [
'simple', 'typo', 'comment', 'format', 'rename', 'move', 'copy',
'delete', 'documentation', 'readme', 'config', 'version', 'bump',
],
};
/**
* Cost-adjusted Bernoulli rewards for Thompson sampling updates. Higher
* reward when the right tier is chosen — Haiku-success > Sonnet-success >
* Opus-success because Opus-success on a simple task is wasteful even when
* the answer is correct. Escalations get partial credit at best (Sonnet) or
* zero (Haiku/Opus) since they signal the initial choice was wrong.
*/
const BANDIT_REWARDS = {
haiku: { success: 1.0, failure: 0.0, escalated: 0.0 },
sonnet: { success: 0.7, failure: 0.0, escalated: 0.1 },
opus: { success: 0.4, failure: 0.0, escalated: 0.0 },
inherit: { success: 0.5, failure: 0.0, escalated: 0.0 },
};
function complexityBucket(score) {
if (score < 0.4)
return 'low'; // haiku territory
if (score < 0.7)
return 'med'; // sonnet territory
return 'high'; // opus territory
}
// ============================================================================
// Beta Sampling for Thompson Sampling Bandit
// ============================================================================
/**
* Standard normal sample via Box-Muller. Used by Marsaglia-Tsang Gamma.
* Module-local so the bandit doesn't pull in a heavy stats dep.
*/
function sampleStandardNormal() {
const u1 = Math.random() || 1e-12; // avoid log(0)
const u2 = Math.random();
return Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2);
}
/**
* Sample from Gamma(shape α, scale=1). Marsaglia & Tsang (2000), with the
* standard "boost α<1 by α+1 then scale by U^(1/α)" trick for shape parameters
* smaller than 1. O(1) expected, no rejection-loop pathology in practice.
*/
function sampleGamma(alpha) {
if (alpha < 1) {
const u = Math.random() || 1e-12;
return sampleGamma(alpha + 1) * Math.pow(u, 1 / alpha);
}
const d = alpha - 1 / 3;
const c = 1 / Math.sqrt(9 * d);
while (true) {
let x;
let v;
do {
x = sampleStandardNormal();
v = 1 + c * x;
} while (v <= 0);
v = v * v * v;
const u = Math.random();
const xx = x * x;
if (u < 1 - 0.0331 * xx * xx)
return d * v;
if (Math.log(u) < 0.5 * xx + d * (1 - v + Math.log(v)))
return d * v;
}
}
/**
* Sample θ ~ Beta(α, β) via the identity Beta(α,β) = X / (X+Y) where
* X ~ Gamma(α), Y ~ Gamma(β). Returns the mean for degenerate α+β=0
* (shouldn't happen in practice but defensive).
*/
function sampleBeta(alpha, beta) {
if (alpha <= 0 || beta <= 0)
return 0.5;
const x = sampleGamma(alpha);
const y = sampleGamma(beta);
const denom = x + y;
return denom > 0 ? x / denom : 0.5;
}
/**
* Default uniform priors (no prior knowledge). Beta(1,1) is the standard
* Bayesian-Bernoulli starting point — uniform over [0,1].
*/
function defaultBanditPriors() {
return {
haiku: { alpha: 1, beta: 1 },
sonnet: { alpha: 1, beta: 1 },
opus: { alpha: 1, beta: 1 },
inherit: { alpha: 1, beta: 1 },
};
}
/** Uniform priors for every complexity bucket (cold start). */
function defaultBucketedPriors() {
return { low: defaultBanditPriors(), med: defaultBanditPriors(), high: defaultBanditPriors() };
}
function clonePriors(p) {
return { haiku: { ...p.haiku }, sonnet: { ...p.sonnet }, opus: { ...p.opus }, inherit: { ...p.inherit } };
}
/**
* Forward-migrate a persisted `priors` field of any layout to the bucketed
* shape, never throwing (ADR-142):
* - missing/garbage → fresh uniform buckets
* - already bucketed (has `low.haiku`) → kept, backfilling any missing bucket
* - flat per-model (v1 bandit) → seed ALL buckets from it (lossless: prior
* learning becomes a shared starting point that then diverges per bucket)
*/
function migratePriors(p) {
if (!p || typeof p !== 'object')
return defaultBucketedPriors();
const obj = p;
if (obj.low && typeof obj.low === 'object' && obj.low.haiku) {
return {
low: obj.low,
med: obj.med ?? clonePriors(obj.low),
high: obj.high ?? clonePriors(obj.low),
};
}
if (obj.haiku && typeof obj.haiku.alpha === 'number') {
const flat = obj;
return { low: clonePriors(flat), med: clonePriors(flat), high: clonePriors(flat) };
}
return defaultBucketedPriors();
}
// ============================================================================
// Default Configuration
// ============================================================================
const DEFAULT_CONFIG = {
confidenceThreshold: 0.85,
maxUncertainty: 0.15,
enableCircuitBreaker: true,
circuitBreakerThreshold: 5,
statePath: '.swarm/model-router-state.json',
autoSaveInterval: 1, // Save after every decision for CLI persistence
enableCostOptimization: true,
preferSpeed: true,
};
// ============================================================================
// Model Router Implementation
// ============================================================================
/**
* Intelligent Model Router using complexity-based routing
*/
export class ModelRouter {
config;
state;
decisionCount = 0;
consecutiveFailures = {
haiku: 0,
sonnet: 0,
opus: 0,
inherit: 0,
};
constructor(config = {}) {
this.config = { ...DEFAULT_CONFIG, ...config };
this.state = this.loadState();
}
/**
* Route a task to the optimal model
*/
async route(task, embedding) {
const startTime = performance.now();
// Analyze task complexity
const complexity = this.analyzeComplexity(task, embedding);
// Compute base model scores
const scores = this.computeModelScores(complexity);
// Apply circuit breaker adjustments
const adjustedScores = this.applyCircuitBreaker(scores);
// Select best model
const { model, confidence, uncertainty } = this.selectModel(adjustedScores, complexity.score);
const inferenceTimeUs = (performance.now() - startTime) * 1000;
// Build result
const result = {
model,
confidence,
uncertainty,
complexity: complexity.score,
reasoning: this.buildReasoning(model, complexity, confidence),
alternatives: Object.entries(adjustedScores)
.filter(([m]) => m !== model)
.map(([m, score]) => ({ model: m, score }))
.sort((a, b) => b.score - a.score),
inferenceTimeUs,
costMultiplier: MODEL_CAPABILITIES[model].costMultiplier,
};
// Track decision
this.trackDecision(task, result);
return result;
}
/**
* Analyze task complexity
*/
analyzeComplexity(task, embedding) {
const taskLower = task.toLowerCase();
const words = taskLower.split(/\s+/);
// Find complexity indicators
const indicators = {
high: COMPLEXITY_INDICATORS.high.filter(ind => taskLower.includes(ind)),
medium: COMPLEXITY_INDICATORS.medium.filter(ind => taskLower.includes(ind)),
low: COMPLEXITY_INDICATORS.low.filter(ind => taskLower.includes(ind)),
};
// Compute feature scores
const lexicalComplexity = this.computeLexicalComplexity(task);
const semanticDepth = this.computeSemanticDepth(indicators, embedding);
const taskScope = this.computeTaskScope(task, words);
const uncertaintyLevel = this.computeUncertaintyLevel(task);
// Weighted combination
const score = Math.min(1, Math.max(0, lexicalComplexity * 0.2 +
semanticDepth * 0.35 +
taskScope * 0.25 +
uncertaintyLevel * 0.2));
return {
score,
indicators,
features: {
lexicalComplexity,
semanticDepth,
taskScope,
uncertaintyLevel,
},
};
}
/**
* Compute lexical complexity from text features
*/
computeLexicalComplexity(task) {
const words = task.split(/\s+/);
const avgWordLength = words.reduce((sum, w) => sum + w.length, 0) / Math.max(1, words.length);
const sentenceLength = words.length;
// Normalize: longer sentences with longer words = more complex
const lengthScore = Math.min(1, sentenceLength / 50);
const wordScore = Math.min(1, (avgWordLength - 3) / 7); // 3-10 char words
return lengthScore * 0.4 + wordScore * 0.6;
}
/**
* Compute semantic depth from indicators and embedding
*/
computeSemanticDepth(indicators, embedding) {
// Weight by indicator presence
const highWeight = indicators.high.length * 0.3;
const mediumWeight = indicators.medium.length * 0.15;
const lowWeight = indicators.low.length * -0.1;
let baseScore = Math.min(1, Math.max(0, 0.3 + highWeight + mediumWeight + lowWeight));
// Boost with embedding variance if available
if (embedding && embedding.length > 0) {
const mean = embedding.reduce((a, b) => a + b, 0) / embedding.length;
const variance = embedding.reduce((sum, v) => sum + Math.pow(v - mean, 2), 0) / embedding.length;
// Higher variance suggests more nuanced semantics
baseScore = baseScore * 0.7 + Math.min(1, variance * 10) * 0.3;
}
return baseScore;
}
/**
* Compute task scope from content analysis
*/
computeTaskScope(task, words) {
// Multi-file indicators
const multiFilePatterns = [
/multiple files?/i, /across.*modules?/i, /refactor.*codebase/i,
/all.*files/i, /entire.*project/i, /system.*wide/i,
];
const hasMultiFile = multiFilePatterns.some(p => p.test(task)) ? 0.4 : 0;
// Code generation indicators
const codeGenPatterns = [
/implement/i, /create.*feature/i, /build.*system/i,
/design.*api/i, /write.*tests/i, /add.*functionality/i,
];
const hasCodeGen = codeGenPatterns.some(p => p.test(task)) ? 0.3 : 0;
// Word count contribution
const wordCountScore = Math.min(0.3, words.length / 100);
return hasMultiFile + hasCodeGen + wordCountScore;
}
/**
* Compute uncertainty level from task phrasing
*/
computeUncertaintyLevel(task) {
const uncertainPatterns = [
/not sure/i, /might/i, /maybe/i, /possibly/i, /investigate/i,
/figure out/i, /unclear/i, /unknown/i, /debug/i, /strange/i,
/weird/i, /issue/i, /problem/i, /error/i, /bug/i,
];
const matchCount = uncertainPatterns.filter(p => p.test(task)).length;
return Math.min(1, matchCount * 0.2);
}
/**
* Compute scores for each model
*/
computeModelScores(complexity) {
const { score } = complexity;
// Base scoring: inverse relationship with complexity
// Low complexity → haiku scores high
// High complexity → opus scores high
return {
haiku: Math.max(0, 1 - score * 2), // Drops off quickly as complexity rises
sonnet: 1 - Math.abs(score - 0.5) * 2, // Peaks at medium complexity
opus: Math.min(1, score * 1.5), // Rises with complexity
inherit: 0.1, // Low baseline unless explicitly needed
};
}
/**
* Apply circuit breaker adjustments
*/
applyCircuitBreaker(scores) {
if (!this.config.enableCircuitBreaker) {
return scores;
}
const adjusted = { ...scores };
for (const model of Object.keys(adjusted)) {
if (this.consecutiveFailures[model] >= this.config.circuitBreakerThreshold) {
// Circuit is open - heavily penalize this model
adjusted[model] *= 0.1;
}
else if (this.consecutiveFailures[model] > 0) {
// Partial penalty for recent failures
adjusted[model] *= 1 - (this.consecutiveFailures[model] / this.config.circuitBreakerThreshold) * 0.5;
}
}
return adjusted;
}
/**
* Select the best model from scores. Uses Thompson sampling (#1772):
* each model's deterministic complexity score is multiplied by a draw
* θ_m ~ Beta(α_m, β_m) from its bandit prior. Models with strong empirical
* track records get sampled higher; models with poor outcomes get sampled
* lower; the system auto-corrects against tier overuse without manual
* threshold tuning. Beta(1,1) = uniform on cold start so behavior matches
* the prior deterministic router until outcomes accumulate.
*/
selectModel(scores, complexityScore) {
// Thompson sampling: combine deterministic score with bandit posterior,
// keyed by complexity bucket (ADR-142) so learning is task-type-local.
const bucketed = this.state.priors ?? defaultBucketedPriors();
const priors = bucketed[complexityBucket(complexityScore)] ?? defaultBanditPriors();
const sampledScores = {
haiku: scores.haiku * sampleBeta(priors.haiku.alpha, priors.haiku.beta),
sonnet: scores.sonnet * sampleBeta(priors.sonnet.alpha, priors.sonnet.beta),
opus: scores.opus * sampleBeta(priors.opus.alpha, priors.opus.beta),
inherit: scores.inherit, // not bandit-controlled
};
// Get sorted models by sampled score (drops 'inherit' from selection)
const sorted = Object.entries(sampledScores)
.filter(([m]) => m !== 'inherit')
.sort((a, b) => b[1] - a[1]);
const [bestModel, bestScore] = sorted[0];
const [, secondScore] = sorted[1] || ['sonnet', 0];
// Confidence is how much better the best is vs second
const confidence = bestScore > 0 ? Math.min(1, bestScore / (bestScore + secondScore + 0.01)) : 0.5;
// Uncertainty based on score spread and complexity
const scoreSpread = bestScore - secondScore;
const uncertainty = Math.max(0, 1 - scoreSpread - confidence * 0.5);
// Escalate if uncertainty is too high
let model = bestModel;
if (uncertainty > this.config.maxUncertainty && bestModel !== 'opus') {
// Escalate to more capable model
model = bestModel === 'haiku' ? 'sonnet' : 'opus';
}
return { model, confidence, uncertainty };
}
/**
* Build human-readable reasoning
*/
buildReasoning(model, complexity, confidence) {
const parts = [];
parts.push(`Complexity: ${(complexity.score * 100).toFixed(0)}%`);
if (complexity.indicators.high.length > 0) {
parts.push(`High-complexity indicators: ${complexity.indicators.high.join(', ')}`);
}
parts.push(`Confidence: ${(confidence * 100).toFixed(0)}%`);
parts.push(`Model: ${model} - ${MODEL_CAPABILITIES[model].description}`);
if (this.config.enableCostOptimization) {
parts.push(`Cost: ${MODEL_CAPABILITIES[model].costMultiplier}x baseline`);
}
return parts.join(' | ');
}
/**
* Track routing decision for learning
*/
trackDecision(task, result) {
this.decisionCount++;
this.state.totalDecisions++;
this.state.modelDistribution[result.model] =
(this.state.modelDistribution[result.model] || 0) + 1;
// Update running averages
const n = this.state.totalDecisions;
this.state.avgComplexity =
(this.state.avgComplexity * (n - 1) + result.complexity) / n;
this.state.avgConfidence =
(this.state.avgConfidence * (n - 1) + result.confidence) / n;
// Auto-save periodically
if (this.decisionCount % this.config.autoSaveInterval === 0) {
this.saveState();
}
}
/**
* Record outcome for learning
*/
recordOutcome(task, model, outcome) {
// Update circuit breaker state
if (outcome === 'failure') {
this.consecutiveFailures[model]++;
}
else {
this.consecutiveFailures[model] = 0;
}
// Re-derive this task's complexity bucket from the task string (the MCP
// outcome payload carries no complexity), using the SAME analyzeComplexity
// path route() uses so record-time and select-time buckets match.
const taskScore = this.analyzeComplexity(task).score;
const bucket = complexityBucket(taskScore);
// Track in history (record THIS task's score, not the running average)
this.state.learningHistory.push({
task: task.slice(0, 100),
model,
complexity: taskScore,
outcome,
timestamp: new Date().toISOString(),
});
// Keep history bounded
if (this.state.learningHistory.length > 100) {
this.state.learningHistory = this.state.learningHistory.slice(-100);
}
if (outcome === 'failure') {
this.state.circuitBreakerTrips++;
}
// Thompson sampling update (#1772): cost-adjusted Bernoulli reward.
// Haiku-success > Sonnet-success > Opus-success (Opus on simple tasks
// is wasteful even when correct). Failure/escalation always β++.
if (!this.state.priors)
this.state.priors = defaultBucketedPriors();
const bp = this.state.priors[bucket] ?? (this.state.priors[bucket] = defaultBanditPriors());
const reward = BANDIT_REWARDS[model]?.[outcome] ?? 0.5;
bp[model].alpha += reward;
bp[model].beta += 1 - reward;
this.saveState();
}
/**
* Get router statistics
*/
getStats() {
return {
totalDecisions: this.state.totalDecisions,
modelDistribution: { ...this.state.modelDistribution },
avgComplexity: this.state.avgComplexity,
avgConfidence: this.state.avgConfidence,
circuitBreakerTrips: this.state.circuitBreakerTrips,
consecutiveFailures: { ...this.consecutiveFailures },
};
}
/**
* Load state from disk
*/
loadState() {
const defaultState = {
totalDecisions: 0,
modelDistribution: { haiku: 0, sonnet: 0, opus: 0, inherit: 0 },
avgComplexity: 0.5,
avgConfidence: 0.8,
circuitBreakerTrips: 0,
lastUpdated: new Date().toISOString(),
learningHistory: [],
version: 2,
priors: defaultBucketedPriors(),
};
try {
const fullPath = join(process.cwd(), this.config.statePath);
if (existsSync(fullPath)) {
const data = readFileSync(fullPath, 'utf-8');
const loaded = JSON.parse(data);
// ADR-142: forward-migrate priors of ANY layout (missing / flat v1 /
// already-bucketed) to the bucketed shape without data loss or throwing.
loaded.priors = migratePriors(loaded.priors);
loaded.version = 2;
return { ...defaultState, ...loaded };
}
}
catch {
// Ignore load errors
}
return defaultState;
}
/**
* Save state to disk
*/
saveState() {
try {
const fullPath = join(process.cwd(), this.config.statePath);
const dir = dirname(fullPath);
if (!existsSync(dir)) {
mkdirSync(dir, { recursive: true });
}
this.state.lastUpdated = new Date().toISOString();
writeFileSync(fullPath, JSON.stringify(this.state, null, 2));
}
catch {
// Ignore save errors in non-critical scenarios
}
}
/**
* Reset router state
*/
reset() {
this.state = {
totalDecisions: 0,
modelDistribution: { haiku: 0, sonnet: 0, opus: 0, inherit: 0 },
avgComplexity: 0.5,
avgConfidence: 0.8,
circuitBreakerTrips: 0,
lastUpdated: new Date().toISOString(),
learningHistory: [],
version: 2,
priors: defaultBucketedPriors(),
};
this.consecutiveFailures = { haiku: 0, sonnet: 0, opus: 0, inherit: 0 };
this.decisionCount = 0;
this.saveState();
}
/**
* Public read-only accessor for the bandit priors. Useful for tests,
* dashboards, and the pending hooks_intelligence_stats integration that
* surfaces convergence in the dashboard. Returns a copy.
*/
getBanditPriors(bucket = 'med') {
const bucketed = this.state.priors ?? defaultBucketedPriors();
const p = bucketed[bucket] ?? defaultBanditPriors();
return {
haiku: { ...p.haiku },
sonnet: { ...p.sonnet },
opus: { ...p.opus },
inherit: { ...p.inherit },
};
}
/** All bucketed priors (copy) — for dashboards/tests. */
getBucketedPriors() {
const b = this.state.priors ?? defaultBucketedPriors();
return {
low: clonePriors(b.low ?? defaultBanditPriors()),
med: clonePriors(b.med ?? defaultBanditPriors()),
high: clonePriors(b.high ?? defaultBanditPriors()),
};
}
}
// ============================================================================
// Singleton & Factory Functions
// ============================================================================
let modelRouterInstance = null;
/**
* Get or create the singleton ModelRouter instance
*/
export function getModelRouter(config) {
if (!modelRouterInstance) {
modelRouterInstance = new ModelRouter(config);
}
return modelRouterInstance;
}
/**
* Reset the singleton instance
*/
export function resetModelRouter() {
modelRouterInstance = null;
}
/**
* Create a new ModelRouter instance (non-singleton)
*/
export function createModelRouter(config) {
return new ModelRouter(config);
}
// ============================================================================
// Convenience Functions
// ============================================================================
/**
* Quick route function for common use case
*/
export async function routeToModel(task) {
const router = getModelRouter();
const result = await router.route(task);
return result.model;
}
/**
* Route with full result
*/
export async function routeToModelFull(task, embedding) {
const router = getModelRouter();
return router.route(task, embedding);
}
/**
* Analyze task complexity without routing
*/
export function analyzeTaskComplexity(task) {
const router = getModelRouter();
return router.analyzeComplexity(task, undefined);
}
/**
* Get model router statistics
*/
export function getModelRouterStats() {
const router = getModelRouter();
return router.getStats();
}
/**
* Record routing outcome for learning
*/
export function recordModelOutcome(task, model, outcome) {
const router = getModelRouter();
router.recordOutcome(task, model, outcome);
}
//# sourceMappingURL=model-router.js.map