UNPKG

jay-code

Version:

Streamlined AI CLI orchestration engine with mathematical rigor and enterprise-grade reliability

511 lines (432 loc) 13.3 kB
/** * Load balancer and rate limiting for MCP */ import type { MCPLoadBalancerConfig, MCPRequest, MCPResponse, MCPSession } from '../utils/types.js'; import type { ILogger } from '../core/logger.js'; import { MCPError } from '../utils/errors.js'; export interface RequestMetrics { requestId: string; sessionId: string; method: string; startTime: number; endTime?: number; success?: boolean; error?: string; } export interface LoadBalancerMetrics { totalRequests: number; successfulRequests: number; failedRequests: number; rateLimitedRequests: number; averageResponseTime: number; requestsPerSecond: number; circuitBreakerTrips: number; lastReset: Date; } export interface ILoadBalancer { shouldAllowRequest(session: MCPSession, request: MCPRequest): Promise<boolean>; recordRequestStart(session: MCPSession, request: MCPRequest): RequestMetrics; recordRequestEnd(metrics: RequestMetrics, response?: MCPResponse, error?: Error): void; getMetrics(): LoadBalancerMetrics; resetMetrics(): void; isCircuitBreakerOpen(): boolean; } /** * Circuit breaker state */ enum CircuitBreakerState { CLOSED = 'closed', OPEN = 'open', HALF_OPEN = 'half_open', } /** * Rate limiter using token bucket algorithm */ class RateLimiter { private tokens: number; private lastRefill: number; constructor( private maxTokens: number, private refillRate: number, // tokens per second ) { this.tokens = maxTokens; this.lastRefill = Date.now(); } tryConsume(tokens = 1): boolean { this.refill(); if (this.tokens >= tokens) { this.tokens -= tokens; return true; } return false; } private refill(): void { const now = Date.now(); const timePassed = (now - this.lastRefill) / 1000; const tokensToAdd = Math.floor(timePassed * this.refillRate); if (tokensToAdd > 0) { this.tokens = Math.min(this.maxTokens, this.tokens + tokensToAdd); this.lastRefill = now; } } getTokens(): number { this.refill(); return this.tokens; } } /** * Circuit breaker implementation */ class CircuitBreaker { private state = CircuitBreakerState.CLOSED; private failureCount = 0; private lastFailureTime = 0; private successCount = 0; constructor( private failureThreshold: number, private recoveryTimeout: number, // milliseconds private halfOpenMaxRequests = 3, ) {} canExecute(): boolean { const now = Date.now(); switch (this.state) { case CircuitBreakerState.CLOSED: return true; case CircuitBreakerState.OPEN: if (now - this.lastFailureTime >= this.recoveryTimeout) { this.state = CircuitBreakerState.HALF_OPEN; this.successCount = 0; return true; } return false; case CircuitBreakerState.HALF_OPEN: return this.successCount < this.halfOpenMaxRequests; default: return false; } } recordSuccess(): void { if (this.state === CircuitBreakerState.HALF_OPEN) { this.successCount++; if (this.successCount >= this.halfOpenMaxRequests) { this.state = CircuitBreakerState.CLOSED; this.failureCount = 0; } } else if (this.state === CircuitBreakerState.CLOSED) { this.failureCount = 0; } } recordFailure(): void { this.failureCount++; this.lastFailureTime = Date.now(); if (this.state === CircuitBreakerState.HALF_OPEN) { this.state = CircuitBreakerState.OPEN; } else if ( this.state === CircuitBreakerState.CLOSED && this.failureCount >= this.failureThreshold ) { this.state = CircuitBreakerState.OPEN; } } getState(): CircuitBreakerState { return this.state; } getMetrics(): { state: string; failureCount: number; successCount: number } { return { state: this.state, failureCount: this.failureCount, successCount: this.successCount, }; } } /** * Load balancer implementation */ export class LoadBalancer implements ILoadBalancer { private rateLimiter: RateLimiter; private circuitBreaker: CircuitBreaker; private sessionRateLimiters = new Map<string, RateLimiter>(); private metrics: LoadBalancerMetrics; private requestTimes: number[] = []; private requestsInLastSecond = 0; private lastSecondTimestamp = 0; constructor( private config: MCPLoadBalancerConfig, private logger: ILogger, ) { this.rateLimiter = new RateLimiter(config.maxRequestsPerSecond, config.maxRequestsPerSecond); this.circuitBreaker = new CircuitBreaker( config.circuitBreakerThreshold, 30000, // 30 second recovery timeout ); this.metrics = { totalRequests: 0, successfulRequests: 0, failedRequests: 0, rateLimitedRequests: 0, averageResponseTime: 0, requestsPerSecond: 0, circuitBreakerTrips: 0, lastReset: new Date(), }; // Clean up old session rate limiters periodically setInterval(() => { this.cleanupSessionRateLimiters(); }, 300000); // Every 5 minutes } async shouldAllowRequest(session: MCPSession, request: MCPRequest): Promise<boolean> { if (!this.config.enabled) { return true; } // Check circuit breaker if (!this.circuitBreaker.canExecute()) { this.logger.warn('Request rejected by circuit breaker', { sessionId: session.id, method: request.method, circuitState: this.circuitBreaker.getState(), }); this.metrics.circuitBreakerTrips++; return false; } // Check global rate limit if (!this.rateLimiter.tryConsume()) { this.logger.warn('Request rejected by global rate limiter', { sessionId: session.id, method: request.method, remainingTokens: this.rateLimiter.getTokens(), }); this.metrics.rateLimitedRequests++; return false; } // Check per-session rate limit const sessionRateLimiter = this.getSessionRateLimiter(session.id); if (!sessionRateLimiter.tryConsume()) { this.logger.warn('Request rejected by session rate limiter', { sessionId: session.id, method: request.method, remainingTokens: sessionRateLimiter.getTokens(), }); this.metrics.rateLimitedRequests++; return false; } return true; } recordRequestStart(session: MCPSession, request: MCPRequest): RequestMetrics { const requestMetrics: RequestMetrics = { requestId: request.id.toString(), sessionId: session.id, method: request.method, startTime: Date.now(), }; this.metrics.totalRequests++; this.updateRequestsPerSecond(); this.logger.debug('Request started', { requestId: requestMetrics.requestId, sessionId: session.id, method: request.method, }); return requestMetrics; } recordRequestEnd(metrics: RequestMetrics, response?: MCPResponse, error?: Error): void { metrics.endTime = Date.now(); const duration = metrics.endTime - metrics.startTime; // Update response time tracking this.requestTimes.push(duration); if (this.requestTimes.length > 1000) { this.requestTimes.shift(); // Keep only last 1000 requests } const success = !error && (!response || !response.error); metrics.success = success; const errorMessage = error?.message || response?.error?.message; if (errorMessage) { metrics.error = errorMessage; } if (success) { this.metrics.successfulRequests++; this.circuitBreaker.recordSuccess(); } else { this.metrics.failedRequests++; this.circuitBreaker.recordFailure(); } // Update average response time this.metrics.averageResponseTime = this.calculateAverageResponseTime(); this.logger.debug('Request completed', { requestId: metrics.requestId, sessionId: metrics.sessionId, method: metrics.method, duration, success, error: metrics.error, }); } getMetrics(): LoadBalancerMetrics { return { ...this.metrics }; } resetMetrics(): void { this.metrics = { totalRequests: 0, successfulRequests: 0, failedRequests: 0, rateLimitedRequests: 0, averageResponseTime: 0, requestsPerSecond: 0, circuitBreakerTrips: 0, lastReset: new Date(), }; this.requestTimes = []; this.logger.info('Load balancer metrics reset'); } isCircuitBreakerOpen(): boolean { return this.circuitBreaker.getState() === CircuitBreakerState.OPEN; } getDetailedMetrics(): { loadBalancer: LoadBalancerMetrics; circuitBreaker: { state: string; failureCount: number; successCount: number }; rateLimiter: { tokens: number; maxTokens: number }; sessions: number; } { return { loadBalancer: this.getMetrics(), circuitBreaker: this.circuitBreaker.getMetrics(), rateLimiter: { tokens: this.rateLimiter.getTokens(), maxTokens: this.config.maxRequestsPerSecond, }, sessions: this.sessionRateLimiters.size, }; } private getSessionRateLimiter(sessionId: string): RateLimiter { let rateLimiter = this.sessionRateLimiters.get(sessionId); if (!rateLimiter) { // Create a per-session rate limiter (more restrictive than global) const sessionLimit = Math.max(1, Math.floor(this.config.maxRequestsPerSecond / 10)); rateLimiter = new RateLimiter(sessionLimit, sessionLimit); this.sessionRateLimiters.set(sessionId, rateLimiter); } return rateLimiter; } private calculateAverageResponseTime(): number { if (this.requestTimes.length === 0) { return 0; } const sum = this.requestTimes.reduce((acc, time) => acc + time, 0); return sum / this.requestTimes.length; } private updateRequestsPerSecond(): void { const now = Math.floor(Date.now() / 1000); if (now !== this.lastSecondTimestamp) { this.metrics.requestsPerSecond = this.requestsInLastSecond; this.requestsInLastSecond = 1; this.lastSecondTimestamp = now; } else { this.requestsInLastSecond++; } } private cleanupSessionRateLimiters(): void { // Remove rate limiters for sessions that haven't been used recently const cutoffTime = Date.now() - 300000; // 5 minutes ago let cleaned = 0; for (const [sessionId, rateLimiter] of this.sessionRateLimiters.entries()) { // If the rate limiter has full tokens, it hasn't been used recently if (rateLimiter.getTokens() === this.config.maxRequestsPerSecond) { this.sessionRateLimiters.delete(sessionId); cleaned++; } } if (cleaned > 0) { this.logger.debug('Cleaned up session rate limiters', { count: cleaned }); } } } /** * Request queue for handling backpressure */ export class RequestQueue { private queue: Array<{ session: MCPSession; request: MCPRequest; resolve: (result: any) => void; reject: (error: Error) => void; timestamp: number; }> = []; private processing = false; private maxQueueSize: number; private requestTimeout: number; constructor( maxQueueSize = 1000, requestTimeout = 30000, // 30 seconds private logger: ILogger, ) { this.maxQueueSize = maxQueueSize; this.requestTimeout = requestTimeout; // Clean up expired requests periodically setInterval(() => { this.cleanupExpiredRequests(); }, 10000); // Every 10 seconds } async enqueue<T>( session: MCPSession, request: MCPRequest, processor: (session: MCPSession, request: MCPRequest) => Promise<T>, ): Promise<T> { if (this.queue.length >= this.maxQueueSize) { throw new MCPError('Request queue is full'); } return new Promise<T>((resolve, reject) => { this.queue.push({ session, request, resolve, reject, timestamp: Date.now(), }); if (!this.processing) { this.processQueue(processor); } }); } private async processQueue<T>( processor: (session: MCPSession, request: MCPRequest) => Promise<T>, ): Promise<void> { if (this.processing) { return; } this.processing = true; while (this.queue.length > 0) { const item = this.queue.shift()!; // Check if request has expired if (Date.now() - item.timestamp > this.requestTimeout) { item.reject(new MCPError('Request timeout')); continue; } try { const result = await processor(item.session, item.request); item.resolve(result); } catch (error) { item.reject(error instanceof Error ? error : new Error(String(error))); } } this.processing = false; } private cleanupExpiredRequests(): void { const now = Date.now(); let cleaned = 0; this.queue = this.queue.filter((item) => { if (now - item.timestamp > this.requestTimeout) { item.reject(new MCPError('Request timeout')); cleaned++; return false; } return true; }); if (cleaned > 0) { this.logger.warn('Cleaned up expired requests from queue', { count: cleaned }); } } getQueueSize(): number { return this.queue.length; } isProcessing(): boolean { return this.processing; } }