@clduab11/gemini-flow
Version:
Revolutionary AI agent swarm coordination platform with Google Services integration, multimedia processing, and production-ready monitoring. Features 8 Google AI services, quantum computing capabilities, and enterprise-grade security.
1,460 lines (1,253 loc) • 40 kB
text/typescript
/**
* High-Performance Smart Routing Engine
*
* Intelligent model routing with <75ms overhead guarantee
* Features: LRU cache, intelligent selection, performance monitoring
* Performance target: Sub-75ms routing decisions with 95% accuracy
*/
import { Logger } from "../utils/logger.js";
import { ModelConfig, RoutingContext } from "./model-orchestrator.js";
import { EventEmitter } from "events";
export interface RoutingRule {
id: string;
name: string;
condition: (context: RoutingContext) => boolean;
modelPreference: string[];
weight: number;
active: boolean;
}
export interface ModelPerformance {
modelName: string;
avgLatency: number;
successRate: number;
avgCost: number;
lastUsed: Date;
usageCount: number;
errorCount: number;
complexityScore: number;
tokenEfficiency: number;
}
export interface CacheEntry {
key: string;
modelName: string;
timestamp: number;
accessCount: number;
metadata: any;
}
export interface RoutingDecision {
modelName: string;
confidence: number;
reason: string;
routingTime: number;
fromCache: boolean;
}
export interface ComplexityAnalysis {
score: number;
factors: {
tokenCount: number;
keywordComplexity: number;
structuralComplexity: number;
domainSpecific: boolean;
};
}
export class ModelRouter extends EventEmitter {
private logger: Logger;
private rules: Map<string, RoutingRule> = new Map();
private performance: Map<string, ModelPerformance> = new Map();
private loadBalancer: Map<string, number> = new Map(); // Model usage counters
// High-performance LRU cache with 1000 entry limit
private routingCache: Map<string, CacheEntry> = new Map();
private cacheAccessOrder: string[] = [];
private readonly CACHE_LIMIT = 1000;
private readonly CACHE_TTL = 300000; // 5 minutes
// Performance monitoring
private routingTimes: number[] = [];
private readonly MAX_ROUTING_TIME_SAMPLES = 100;
private readonly ROUTING_TIME_TARGET = 75; // milliseconds
// Intelligent complexity analysis cache
private complexityCache: Map<string, ComplexityAnalysis> = new Map();
// Routing weights for different factors (optimized for <75ms)
private weights = {
latency: 0.35,
cost: 0.15,
reliability: 0.25,
userTier: 0.15,
complexity: 0.1,
};
// Model tier mapping for quick access
private modelTierMap: Map<string, string> = new Map();
private availabilityMap: Map<string, boolean> = new Map();
constructor() {
super();
this.logger = new Logger("SmartModelRouter");
this.initializeDefaultRules();
this.startPerformanceMonitoring();
this.warmupComplexityAnalyzer();
this.logger.info("Smart routing engine initialized", {
cacheLimit: this.CACHE_LIMIT,
routingTarget: `${this.ROUTING_TIME_TARGET}ms`,
features: ["LRU cache", "complexity analysis", "intelligent selection"],
});
}
/**
* Initialize default routing rules
*/
private initializeDefaultRules(): void {
// Rule 1: Route critical tasks to most reliable models
this.addRule({
id: "critical-tasks",
name: "Critical Task Routing",
condition: (ctx) => ctx.priority === "critical",
modelPreference: [
"gemini-2.5-deep-think",
"gemini-2.5-pro",
"gemini-pro-vertex",
"gemini-2.0-flash-thinking",
],
weight: 10,
active: true,
});
// Rule 2: Route enterprise users to premium models
this.addRule({
id: "enterprise-tier",
name: "Enterprise Tier Routing",
condition: (ctx) => ctx.userTier === "enterprise",
modelPreference: [
"gemini-2.5-deep-think",
"gemini-2.5-pro",
"gemini-pro-vertex",
"gemini-2.0-flash-thinking",
],
weight: 8,
active: true,
});
// Rule 3: Route low-latency requirements to fast models
this.addRule({
id: "low-latency",
name: "Low Latency Routing",
condition: (ctx) => ctx.latencyRequirement < 1000,
modelPreference: [
"gemini-2.5-flash",
"gemini-2.0-flash",
"gemini-2.0-flash-thinking",
],
weight: 7,
active: true,
});
// Rule 4: Route code tasks to specialized models
this.addRule({
id: "code-tasks",
name: "Code Task Routing",
condition: (ctx) =>
ctx.task.toLowerCase().includes("code") ||
ctx.capabilities?.includes("code"),
modelPreference: [
"gemini-2.5-pro",
"gemini-2.5-flash",
"gemini-2.0-flash-thinking",
"gemini-2.0-flash",
],
weight: 6,
active: true,
});
// Rule 5: Route large context tasks to appropriate models
this.addRule({
id: "large-context",
name: "Large Context Routing",
condition: (ctx) => (ctx.tokenBudget || 0) > 100000,
modelPreference: [
"gemini-2.5-deep-think",
"gemini-2.5-pro",
"gemini-pro-vertex",
],
weight: 5,
active: true,
});
// Rule 6: Free tier gets basic models
this.addRule({
id: "free-tier",
name: "Free Tier Routing",
condition: (ctx) => ctx.userTier === "free",
modelPreference: ["gemini-2.0-flash", "gemini-2.5-flash"],
weight: 3,
active: true,
});
// Rule 7: Deep reasoning tasks get specialized models
this.addRule({
id: "deep-reasoning",
name: "Deep Reasoning Routing",
condition: (ctx) =>
ctx.task.toLowerCase().includes("complex") ||
ctx.task.toLowerCase().includes("analyze") ||
ctx.capabilities?.includes("deep-reasoning"),
modelPreference: ["gemini-2.5-deep-think", "gemini-2.5-pro"],
weight: 9,
active: true,
});
this.logger.info("Default routing rules initialized", {
ruleCount: this.rules.size,
});
}
/**
* Add a routing rule
*/
addRule(rule: RoutingRule): void {
this.rules.set(rule.id, rule);
this.logger.debug("Routing rule added", { id: rule.id, name: rule.name });
}
/**
* Remove a routing rule
*/
removeRule(ruleId: string): boolean {
const removed = this.rules.delete(ruleId);
if (removed) {
this.logger.debug("Routing rule removed", { id: ruleId });
}
return removed;
}
/**
* High-performance model selection with <75ms guarantee
*/
async selectOptimalModel(
context: RoutingContext,
availableModels: Map<string, ModelConfig>,
): Promise<RoutingDecision> {
const startTime = performance.now();
try {
// Update model availability map for quick access
this.updateAvailabilityMap(availableModels);
// 1. Check LRU cache first for sub-10ms responses
const cacheKey = this.generateRoutingCacheKey(context);
const cachedResult = this.getFromCache(cacheKey);
if (cachedResult && this.isCacheValid(cachedResult)) {
const routingTime = performance.now() - startTime;
this.recordRoutingTime(routingTime);
return {
modelName: cachedResult.modelName,
confidence: 0.95,
reason: "LRU cache hit",
routingTime,
fromCache: true,
};
}
// 2. Intelligent complexity analysis (optimized for speed)
const complexityAnalysis = this.analyzeRequestComplexity(context);
// 3. Fast candidate selection based on complexity and user tier
const candidates = this.fastCandidateSelection(
context,
availableModels,
complexityAnalysis,
);
if (candidates.length === 0) {
const fallback = this.getFallbackModel(
context.userTier,
availableModels,
);
const routingTime = performance.now() - startTime;
return {
modelName: fallback,
confidence: 0.3,
reason: "No candidates available - fallback",
routingTime,
fromCache: false,
};
}
// 4. Optimized scoring with parallel processing
const bestCandidate = this.fastModelScoring(
candidates,
context,
availableModels,
complexityAnalysis,
);
// 5. Update caches and counters
this.updateCache(cacheKey, bestCandidate.modelName);
this.updateLoadBalancer(bestCandidate.modelName);
const routingTime = performance.now() - startTime;
this.recordRoutingTime(routingTime);
// 6. Performance warning if routing exceeds target
if (routingTime > this.ROUTING_TIME_TARGET) {
this.logger.warn("Routing time exceeded target", {
routingTime,
target: this.ROUTING_TIME_TARGET,
cacheHit: false,
});
this.emit("routing_slow", {
routingTime,
target: this.ROUTING_TIME_TARGET,
});
}
const decision: RoutingDecision = {
modelName: bestCandidate.modelName,
confidence: bestCandidate.confidence,
reason: bestCandidate.reason,
routingTime,
fromCache: false,
};
this.emit("routing_decision", decision);
return decision;
} catch (error) {
this.logger.error("Smart routing failed", { error, context });
// Emergency fallback with minimal overhead
const fallback = this.getFallbackModel(context.userTier, availableModels);
const routingTime = performance.now() - startTime;
return {
modelName: fallback,
confidence: 0.1,
reason: `Emergency fallback: ${error.message}`,
routingTime,
fromCache: false,
};
}
}
/**
* Apply routing rules to get candidate models
*/
private applyroutingRules(
context: RoutingContext,
availableModels: Map<string, ModelConfig>,
): string[] {
const matchedRules: Array<{ rule: RoutingRule; score: number }> = [];
// Find all matching rules
for (const rule of this.rules.values()) {
if (rule.active && rule.condition(context)) {
matchedRules.push({ rule, score: rule.weight });
}
}
// Sort by weight (highest first)
matchedRules.sort((a, b) => b.score - a.score);
// Collect candidate models from matching rules
const candidates = new Set<string>();
for (const { rule } of matchedRules) {
for (const model of rule.modelPreference) {
if (availableModels.has(model)) {
candidates.add(model);
}
}
}
// If no rules matched, use all available models
if (candidates.size === 0) {
return Array.from(availableModels.keys());
}
return Array.from(candidates);
}
/**
* Score candidate models based on multiple factors
*/
private async scoreCandidates(
candidates: string[],
context: RoutingContext,
availableModels: Map<string, ModelConfig>,
): Promise<Array<{ model: string; score: number }>> {
const scored: Array<{ model: string; score: number }> = [];
for (const modelName of candidates) {
const modelConfig = availableModels.get(modelName);
if (!modelConfig) continue;
const perf = this.performance.get(modelName);
let score = 0;
// Factor 1: Latency score (lower latency = higher score)
const latencyScore = this.calculateLatencyScore(
perf?.avgLatency || modelConfig.latencyTarget,
context.latencyRequirement,
);
score += latencyScore * this.weights.latency;
// Factor 2: Cost score (lower cost = higher score for non-enterprise)
const costScore = this.calculateCostScore(
modelConfig.costPerToken,
context.userTier,
);
score += costScore * this.weights.cost;
// Factor 3: Reliability score
const reliabilityScore = this.calculateReliabilityScore(perf);
score += reliabilityScore * this.weights.reliability;
// Factor 4: User tier compatibility
const tierScore = this.calculateTierScore(
modelConfig.tier,
context.userTier,
);
score += tierScore * this.weights.userTier;
// Factor 5: Capability match
const capabilityScore = this.calculateCapabilityScore(
modelConfig.capabilities,
context.capabilities || [],
);
score += capabilityScore * 0.1; // Small additional weight
scored.push({ model: modelName, score });
}
// Sort by score (highest first)
return scored.sort((a, b) => b.score - a.score);
}
/**
* Calculate latency score (0-1, higher is better)
*/
private calculateLatencyScore(
modelLatency: number,
requiredLatency: number,
): number {
if (modelLatency <= requiredLatency) {
return 1.0; // Perfect score if under requirement
}
// Penalize models that exceed requirement
const penalty = (modelLatency - requiredLatency) / requiredLatency;
return Math.max(0, 1.0 - penalty);
}
/**
* Calculate cost score (0-1, higher is better)
*/
private calculateCostScore(modelCost: number, userTier: string): number {
// Enterprise users care less about cost
if (userTier === "enterprise") {
return 0.8; // Moderate score regardless of cost
}
// For free/pro users, prefer lower cost models
const maxAcceptableCost = userTier === "pro" ? 0.000003 : 0.000001;
if (modelCost <= maxAcceptableCost) {
return 1.0;
}
// Penalize expensive models for cost-conscious tiers
const penalty = (modelCost - maxAcceptableCost) / maxAcceptableCost;
return Math.max(0, 1.0 - penalty);
}
/**
* Calculate reliability score based on historical performance
*/
private calculateReliabilityScore(perf?: ModelPerformance): number {
if (!perf || perf.usageCount < 10) {
return 0.7; // Neutral score for new/unused models
}
const successRate = (perf.usageCount - perf.errorCount) / perf.usageCount;
return successRate;
}
/**
* Calculate tier compatibility score
*/
private calculateTierScore(modelTier: string, userTier: string): number {
const tierHierarchy = { free: 0, pro: 1, enterprise: 2 };
const modelLevel =
tierHierarchy[modelTier as keyof typeof tierHierarchy] || 0;
const userLevel =
tierHierarchy[userTier as keyof typeof tierHierarchy] || 0;
// Users can access their tier and below
if (userLevel >= modelLevel) {
return 1.0;
}
// Penalty for accessing higher tier models
return 0.1;
}
/**
* Calculate capability match score
*/
private calculateCapabilityScore(
modelCaps: string[],
requiredCaps: string[],
): number {
if (requiredCaps.length === 0) {
return 1.0; // No specific requirements
}
const matches = requiredCaps.filter((cap) =>
modelCaps.includes(cap),
).length;
return matches / requiredCaps.length;
}
/**
* Apply load balancing to final selection
*/
private applyLoadBalancing(
scoredCandidates: Array<{ model: string; score: number }>,
): string {
if (scoredCandidates.length === 0) {
throw new Error("No candidates available for selection");
}
if (scoredCandidates.length === 1) {
return scoredCandidates[0].model;
}
// Use weighted random selection from top candidates
const topCandidates = scoredCandidates.slice(
0,
Math.min(3, scoredCandidates.length),
);
// Apply load balancing bias
const adjusted = topCandidates.map((candidate) => {
const usage = this.loadBalancer.get(candidate.model) || 0;
const balanceBonus = Math.max(0, 1.0 - usage / 100); // Reduce score for heavily used models
return {
model: candidate.model,
score: candidate.score * (1 + balanceBonus * 0.2), // 20% bonus for less used models
};
});
// Weighted random selection
const totalScore = adjusted.reduce(
(sum, candidate) => sum + candidate.score,
0,
);
let random = Math.random() * totalScore;
for (const candidate of adjusted) {
random -= candidate.score;
if (random <= 0) {
return candidate.model;
}
}
// Fallback to first candidate
return adjusted[0].model;
}
/**
* Update load balancer counters
*/
private updateLoadBalancer(modelName: string): void {
const current = this.loadBalancer.get(modelName) || 0;
this.loadBalancer.set(modelName, current + 1);
// Reset counters periodically to prevent overflow
if (current > 1000) {
this.resetLoadBalancer();
}
}
/**
* Reset load balancer counters
*/
private resetLoadBalancer(): void {
this.loadBalancer.clear();
this.logger.debug("Load balancer counters reset");
}
/**
* Get fallback model based on user tier
*/
private getFallbackModel(
userTier: string,
availableModels: Map<string, ModelConfig>,
): string {
const fallbacks = {
enterprise: [
"gemini-2.5-pro",
"gemini-pro-vertex",
"gemini-2.0-flash-thinking",
"gemini-2.0-flash",
],
pro: [
"gemini-2.5-flash",
"gemini-2.0-flash-thinking",
"gemini-2.0-flash",
],
free: ["gemini-2.0-flash", "gemini-2.5-flash"],
};
const tierFallbacks =
fallbacks[userTier as keyof typeof fallbacks] || fallbacks.free;
for (const model of tierFallbacks) {
if (availableModels.has(model)) {
return model;
}
}
// Last resort - return first available model
const firstAvailable = Array.from(availableModels.keys())[0];
if (!firstAvailable) {
throw new Error("No models available");
}
return firstAvailable;
}
/**
* Fast complexity analysis for intelligent routing
*/
private analyzeRequestComplexity(
context: RoutingContext,
): ComplexityAnalysis {
// Check cache first
const contextKey = `${context.task.substring(0, 100)}:${context.priority}`;
const cached = this.complexityCache.get(contextKey);
if (cached) return cached;
// Fast complexity scoring
const tokenCount = this.estimateTokenCount(context.task);
const keywordComplexity = this.analyzeKeywords(context.task);
const structuralComplexity = this.analyzeStructure(context.task);
const domainSpecific = this.isDomainSpecific(
context.task,
context.capabilities,
);
const score = this.calculateComplexityScore({
tokenCount,
keywordComplexity,
structuralComplexity,
domainSpecific,
});
const analysis: ComplexityAnalysis = {
score,
factors: {
tokenCount,
keywordComplexity,
structuralComplexity,
domainSpecific,
},
};
// Cache result with TTL
this.complexityCache.set(contextKey, analysis);
// Cleanup cache if it gets too large
if (this.complexityCache.size > 500) {
this.cleanupComplexityCache();
}
return analysis;
}
/**
* Fast candidate selection based on tier and complexity
*/
private fastCandidateSelection(
context: RoutingContext,
availableModels: Map<string, ModelConfig>,
complexity: ComplexityAnalysis,
): string[] {
const candidates: string[] = [];
const userTierLevel = this.getTierLevel(context.userTier);
// Fast iteration through available models
for (const [modelName, config] of availableModels) {
// Quick tier check
const modelTierLevel = this.getTierLevel(config.tier);
if (userTierLevel < modelTierLevel) continue;
// Availability check
if (!this.availabilityMap.get(modelName)) continue;
// Complexity-based filtering
if (this.isModelSuitableForComplexity(config, complexity, context)) {
candidates.push(modelName);
}
}
return candidates;
}
/**
* Optimized model scoring with minimal overhead
*/
private fastModelScoring(
candidates: string[],
context: RoutingContext,
availableModels: Map<string, ModelConfig>,
complexity: ComplexityAnalysis,
): { modelName: string; confidence: number; reason: string } {
let bestModel = candidates[0];
let bestScore = -1;
let reason = "Default selection";
for (const modelName of candidates) {
const config = availableModels.get(modelName)!;
const perf = this.performance.get(modelName);
// Fast scoring algorithm
let score = 0;
// Latency factor (most important for <75ms target)
const latencyScore = this.fastLatencyScore(config, perf, context);
score += latencyScore * this.weights.latency;
// Complexity matching
const complexityScore = this.fastComplexityScore(config, complexity);
score += complexityScore * this.weights.complexity;
// Reliability (quick calculation)
const reliabilityScore = this.fastReliabilityScore(perf);
score += reliabilityScore * this.weights.reliability;
// Cost consideration
const costScore = this.fastCostScore(config, context.userTier);
score += costScore * this.weights.cost;
if (score > bestScore) {
bestScore = score;
bestModel = modelName;
reason = `Best match: latency=${latencyScore.toFixed(2)}, complexity=${complexityScore.toFixed(2)}`;
}
}
const confidence = Math.min(0.95, bestScore);
return { modelName: bestModel, confidence, reason };
}
/**
* LRU cache operations
*/
private getFromCache(key: string): CacheEntry | null {
const entry = this.routingCache.get(key);
if (!entry) return null;
// Update access order for LRU
this.updateCacheAccess(key);
entry.accessCount++;
return entry;
}
private updateCache(key: string, modelName: string): void {
const entry: CacheEntry = {
key,
modelName,
timestamp: Date.now(),
accessCount: 1,
metadata: {},
};
// Remove if already exists
if (this.routingCache.has(key)) {
this.removeFromCacheOrder(key);
}
// Add to cache and access order
this.routingCache.set(key, entry);
this.cacheAccessOrder.push(key);
// Evict oldest if over limit
while (this.routingCache.size > this.CACHE_LIMIT) {
const oldestKey = this.cacheAccessOrder.shift();
if (oldestKey) {
this.routingCache.delete(oldestKey);
}
}
}
private isCacheValid(entry: CacheEntry): boolean {
return Date.now() - entry.timestamp < this.CACHE_TTL;
}
private updateCacheAccess(key: string): void {
this.removeFromCacheOrder(key);
this.cacheAccessOrder.push(key);
}
private removeFromCacheOrder(key: string): void {
const index = this.cacheAccessOrder.indexOf(key);
if (index !== -1) {
this.cacheAccessOrder.splice(index, 1);
}
}
/**
* Performance monitoring and optimization
*/
private recordRoutingTime(time: number): void {
this.routingTimes.push(time);
// Keep only recent samples
if (this.routingTimes.length > this.MAX_ROUTING_TIME_SAMPLES) {
this.routingTimes.shift();
}
// Emit performance metrics
if (this.routingTimes.length % 10 === 0) {
this.emitPerformanceMetrics();
}
}
private emitPerformanceMetrics(): void {
const avg =
this.routingTimes.reduce((a, b) => a + b, 0) / this.routingTimes.length;
const max = Math.max(...this.routingTimes);
const p95 = this.routingTimes.sort((a, b) => a - b)[
Math.floor(this.routingTimes.length * 0.95)
];
this.emit("performance_metrics", {
averageRoutingTime: avg,
maxRoutingTime: max,
p95RoutingTime: p95,
targetMet: p95 < this.ROUTING_TIME_TARGET,
cacheHitRate: this.getCacheHitRate(),
});
}
/**
* Helper methods for fast scoring
*/
private fastLatencyScore(
config: ModelConfig,
perf?: ModelPerformance,
context?: RoutingContext,
): number {
const targetLatency = context?.latencyRequirement || 2000;
const modelLatency = perf?.avgLatency || config.latencyTarget;
if (modelLatency <= targetLatency * 0.8) return 1.0;
if (modelLatency <= targetLatency) return 0.8;
if (modelLatency <= targetLatency * 1.5) return 0.5;
return 0.1;
}
private fastComplexityScore(
config: ModelConfig,
complexity: ComplexityAnalysis,
): number {
if (complexity.score < 0.3) {
// Simple tasks - prefer fast models
return config.latencyTarget < 1000 ? 1.0 : 0.7;
} else if (complexity.score < 0.7) {
// Medium complexity - balanced models
return config.capabilities.includes("reasoning") ? 0.9 : 0.6;
} else {
// High complexity - prefer advanced models
return config.capabilities.includes("advanced-reasoning") ? 1.0 : 0.4;
}
}
private fastReliabilityScore(perf?: ModelPerformance): number {
if (!perf || perf.usageCount < 5) return 0.8;
return Math.max(0.1, perf.successRate);
}
private fastCostScore(config: ModelConfig, userTier: string): number {
if (userTier === "enterprise") return 0.9;
if (userTier === "pro") return config.costPerToken < 0.000003 ? 1.0 : 0.7;
return config.costPerToken < 0.000001 ? 1.0 : 0.3;
}
/**
* Complexity analysis helpers
*/
private estimateTokenCount(text: string): number {
// Fast approximation: ~4 characters per token
return Math.ceil(text.length / 4);
}
private analyzeKeywords(text: string): number {
const complexKeywords = [
"analyze",
"implement",
"optimize",
"algorithm",
"architecture",
"debug",
];
const words = text.toLowerCase().split(/\s+/);
const matches = words.filter((word) =>
complexKeywords.some((kw) => word.includes(kw)),
);
return Math.min(1.0, matches.length / 10);
}
private analyzeStructure(text: string): number {
const structuralIndicators = [
"{",
"}",
"(",
")",
"[",
"]",
"=>",
"function",
"class",
"if",
"for",
];
const indicators = structuralIndicators.filter((indicator) =>
text.includes(indicator),
);
return Math.min(1.0, indicators.length / 15);
}
private isDomainSpecific(text: string, capabilities?: string[]): boolean {
if (!capabilities) return false;
const domainKeywords = [
"code",
"API",
"database",
"security",
"machine learning",
"data science",
];
return domainKeywords.some((keyword) =>
text.toLowerCase().includes(keyword.toLowerCase()),
);
}
private calculateComplexityScore(
factors: ComplexityAnalysis["factors"],
): number {
const tokenWeight = Math.min(1.0, factors.tokenCount / 1000) * 0.3;
const keywordWeight = factors.keywordComplexity * 0.3;
const structuralWeight = factors.structuralComplexity * 0.3;
const domainWeight = factors.domainSpecific ? 0.1 : 0;
return Math.min(
1.0,
tokenWeight + keywordWeight + structuralWeight + domainWeight,
);
}
private isModelSuitableForComplexity(
config: ModelConfig,
complexity: ComplexityAnalysis,
context: RoutingContext,
): boolean {
// High complexity requires advanced models
if (complexity.score > 0.8) {
return (
config.capabilities.includes("advanced-reasoning") ||
config.capabilities.includes("code")
);
}
// Low latency requirements filter out slow models
if (context.latencyRequirement < 1000) {
return config.latencyTarget < 1200;
}
return true;
}
/**
* Utility methods
*/
private generateRoutingCacheKey(context: RoutingContext): string {
const keyComponents = [
context.task.substring(0, 50),
context.userTier,
context.priority,
context.latencyRequirement,
];
return Buffer.from(keyComponents.join("|"))
.toString("base64")
.substring(0, 32);
}
private getTierLevel(tier: string): number {
const levels = { free: 0, pro: 1, enterprise: 2 };
return levels[tier as keyof typeof levels] || 0;
}
private updateAvailabilityMap(
availableModels: Map<string, ModelConfig>,
): void {
this.availabilityMap.clear();
for (const [modelName] of availableModels) {
this.availabilityMap.set(modelName, true);
}
}
private cleanupComplexityCache(): void {
const entries = Array.from(this.complexityCache.entries());
const half = Math.floor(entries.length / 2);
this.complexityCache.clear();
// Keep the second half (more recent)
for (let i = half; i < entries.length; i++) {
this.complexityCache.set(entries[i][0], entries[i][1]);
}
}
private getCacheHitRate(): number {
const totalRequests = this.routingTimes.length;
if (totalRequests === 0) return 0;
let cacheHits = 0;
for (const entry of this.routingCache.values()) {
cacheHits += entry.accessCount;
}
return Math.min(1.0, cacheHits / totalRequests);
}
private startPerformanceMonitoring(): void {
// Monitor routing performance every 30 seconds
setInterval(() => {
this.emitPerformanceMetrics();
}, 30000);
// Cleanup caches periodically
setInterval(() => {
this.cleanupCaches();
}, 300000); // Every 5 minutes
}
private cleanupCaches(): void {
const now = Date.now();
// Cleanup routing cache
for (const [key, entry] of this.routingCache) {
if (now - entry.timestamp > this.CACHE_TTL) {
this.routingCache.delete(key);
this.removeFromCacheOrder(key);
}
}
// Cleanup complexity cache
if (this.complexityCache.size > 300) {
this.cleanupComplexityCache();
}
}
private warmupComplexityAnalyzer(): void {
// Pre-analyze common patterns for faster routing
const commonPatterns = [
"implement function",
"analyze data",
"create API",
"debug error",
"optimize performance",
"design architecture",
];
for (const pattern of commonPatterns) {
this.analyzeRequestComplexity({
task: pattern,
userTier: "pro",
priority: "medium",
latencyRequirement: 1000,
});
}
}
/**
* Public API methods
*/
getRoutingPerformance(): {
averageTime: number;
p95Time: number;
cacheHitRate: number;
targetMet: boolean;
} {
if (this.routingTimes.length === 0) {
return { averageTime: 0, p95Time: 0, cacheHitRate: 0, targetMet: true };
}
const avg =
this.routingTimes.reduce((a, b) => a + b, 0) / this.routingTimes.length;
const sorted = [...this.routingTimes].sort((a, b) => a - b);
const p95 = sorted[Math.floor(sorted.length * 0.95)];
return {
averageTime: avg,
p95Time: p95,
cacheHitRate: this.getCacheHitRate(),
targetMet: p95 < this.ROUTING_TIME_TARGET,
};
}
/**
* Record model performance for future routing decisions with enhanced metrics
*/
recordPerformance(
modelName: string,
latency: number,
success: boolean,
cost: number,
tokenUsage?: { input: number; output: number; total: number },
): void {
const existing = this.performance.get(modelName) || {
modelName,
avgLatency: 0,
successRate: 1,
avgCost: 0,
lastUsed: new Date(),
usageCount: 0,
errorCount: 0,
complexityScore: 0.5,
tokenEfficiency: 1.0,
};
// Update metrics using exponential moving average
const alpha = 0.1; // Smoothing factor
existing.avgLatency = existing.avgLatency * (1 - alpha) + latency * alpha;
existing.avgCost = existing.avgCost * (1 - alpha) + cost * alpha;
existing.usageCount++;
existing.lastUsed = new Date();
if (!success) {
existing.errorCount++;
}
existing.successRate =
(existing.usageCount - existing.errorCount) / existing.usageCount;
// Update token efficiency if usage data provided
if (tokenUsage && tokenUsage.total > 0) {
const efficiency = tokenUsage.output / tokenUsage.total;
existing.tokenEfficiency =
existing.tokenEfficiency * 0.9 + efficiency * 0.1;
}
this.performance.set(modelName, existing);
// Emit performance update for monitoring
this.emit("model_performance_updated", {
modelName,
latency,
success,
cost,
performance: existing,
});
}
/**
* Intelligent fallback strategies for model unavailability
*/
async selectFallbackModel(
originalModel: string,
context: RoutingContext,
availableModels: Map<string, ModelConfig>,
reason: string,
): Promise<RoutingDecision> {
const startTime = performance.now();
this.logger.warn("Selecting fallback model", {
originalModel,
reason,
userTier: context.userTier,
});
// Strategy 1: Same tier, similar capabilities
let fallback = this.findSimilarTierModel(
originalModel,
context,
availableModels,
);
// Strategy 2: Lower tier with similar capabilities
if (!fallback) {
fallback = this.findLowerTierModel(
originalModel,
context,
availableModels,
);
}
// Strategy 3: Emergency fallback based on user tier
if (!fallback) {
fallback = this.getEmergencyFallback(context.userTier, availableModels);
}
const routingTime = performance.now() - startTime;
this.emit("fallback_triggered", {
originalModel,
fallbackModel: fallback,
reason,
routingTime,
});
return {
modelName: fallback,
confidence: 0.6,
reason: `Fallback from ${originalModel}: ${reason}`,
routingTime,
fromCache: false,
};
}
private findSimilarTierModel(
originalModel: string,
context: RoutingContext,
availableModels: Map<string, ModelConfig>,
): string | null {
const originalConfig = availableModels.get(originalModel);
if (!originalConfig) return null;
for (const [modelName, config] of availableModels) {
if (modelName === originalModel) continue;
if (!this.availabilityMap.get(modelName)) continue;
// Same tier and similar capabilities
if (config.tier === originalConfig.tier) {
const sharedCapabilities = config.capabilities.filter((cap) =>
originalConfig.capabilities.includes(cap),
);
if (
sharedCapabilities.length >=
originalConfig.capabilities.length * 0.7
) {
return modelName;
}
}
}
return null;
}
private findLowerTierModel(
originalModel: string,
context: RoutingContext,
availableModels: Map<string, ModelConfig>,
): string | null {
const originalConfig = availableModels.get(originalModel);
if (!originalConfig) return null;
const userTierLevel = this.getTierLevel(context.userTier);
for (const [modelName, config] of availableModels) {
if (modelName === originalModel) continue;
if (!this.availabilityMap.get(modelName)) continue;
const modelTierLevel = this.getTierLevel(config.tier);
// Lower or equal tier that user can access
if (
modelTierLevel <= userTierLevel &&
modelTierLevel < this.getTierLevel(originalConfig.tier)
) {
// Check if it has core capabilities
const hasCodeCapability = config.capabilities.includes("code");
const hasReasoningCapability =
config.capabilities.includes("reasoning");
if (hasCodeCapability || hasReasoningCapability) {
return modelName;
}
}
}
return null;
}
private getEmergencyFallback(
userTier: string,
availableModels: Map<string, ModelConfig>,
): string {
const emergencyOrder = {
enterprise: [
"gemini-2.5-pro",
"gemini-pro-vertex",
"gemini-2.0-flash-thinking",
"gemini-2.0-flash",
],
pro: [
"gemini-2.5-flash",
"gemini-2.0-flash-thinking",
"gemini-2.0-flash",
],
free: ["gemini-2.0-flash", "gemini-2.5-flash"],
};
const tierFallbacks =
emergencyOrder[userTier as keyof typeof emergencyOrder] ||
emergencyOrder.free;
for (const modelName of tierFallbacks) {
if (
availableModels.has(modelName) &&
this.availabilityMap.get(modelName)
) {
return modelName;
}
}
// Last resort - any available model
for (const [modelName] of availableModels) {
if (this.availabilityMap.get(modelName)) {
return modelName;
}
}
throw new Error("No models available for fallback");
}
/**
* Check and update model availability
*/
updateModelAvailability(modelName: string, available: boolean): void {
const previous = this.availabilityMap.get(modelName);
this.availabilityMap.set(modelName, available);
if (previous !== available) {
this.emit("model_availability_changed", {
modelName,
available,
timestamp: Date.now(),
});
this.logger.info("Model availability changed", {
modelName,
available,
previousState: previous,
});
}
}
/**
* Batch update model availability
*/
updateBatchAvailability(availabilityMap: Map<string, boolean>): void {
const changes: Array<{ model: string; available: boolean }> = [];
for (const [modelName, available] of availabilityMap) {
const previous = this.availabilityMap.get(modelName);
this.availabilityMap.set(modelName, available);
if (previous !== available) {
changes.push({ model: modelName, available });
}
}
if (changes.length > 0) {
this.emit("batch_availability_update", {
changes,
timestamp: Date.now(),
});
}
}
/**
* Get comprehensive router statistics
*/
getRouterStats(): {
performance: ReturnType<typeof this.getRoutingPerformance>;
cache: { size: number; hitRate: number; limit: number };
availability: { total: number; available: number; unavailable: number };
models: Array<{ name: string; performance: ModelPerformance }>;
} {
const performance = this.getRoutingPerformance();
const cache = {
size: this.routingCache.size,
hitRate: this.getCacheHitRate(),
limit: this.CACHE_LIMIT,
};
let availableCount = 0;
let totalCount = 0;
for (const available of this.availabilityMap.values()) {
totalCount++;
if (available) availableCount++;
}
const availability = {
total: totalCount,
available: availableCount,
unavailable: totalCount - availableCount,
};
const models = Array.from(this.performance.entries()).map(
([name, perf]) => ({
name,
performance: perf,
}),
);
return { performance, cache, availability, models };
}
/**
* Optimize routing based on performance data
*/
optimizeBasedOnPerformance(performanceData: any): void {
// Adjust weights based on recent performance trends
const recentFailures = performanceData.recentFailures || 0;
const avgLatency = performanceData.avgLatency || 0;
if (recentFailures > 5) {
// Increase reliability weight if we're seeing failures
this.weights.reliability = Math.min(0.5, this.weights.reliability + 0.1);
this.weights.cost = Math.max(0.1, this.weights.cost - 0.05);
}
if (avgLatency > 2000) {
// Increase latency weight if responses are slow
this.weights.latency = Math.min(0.6, this.weights.latency + 0.1);
this.weights.cost = Math.max(0.1, this.weights.cost - 0.05);
}
this.logger.info("Routing weights optimized", { weights: this.weights });
}
/**
* Get model usage statistics
*/
getModelUsageStats(): { [model: string]: number } {
return Object.fromEntries(this.loadBalancer);
}
/**
* Get routing rules
*/
getRules(): RoutingRule[] {
return Array.from(this.rules.values());
}
/**
* Get performance data
*/
getPerformanceData(): ModelPerformance[] {
return Array.from(this.performance.values());
}
}