UNPKG

erosolar-cli

Version:

Unified AI agent framework for the command line - Multi-provider support with schema-driven tools, code intelligence, and transparent reasoning

370 lines 11.6 kB
/** * Resilient Provider Wrapper * * Adds rate limiting, exponential backoff retry, and circuit breaker * patterns to any LLM provider for maximum reliability and performance. * * PERF: Provider-agnostic wrapper that prevents rate limit errors and * automatically recovers from transient failures. */ import { RateLimiter, retry, sleep } from '../utils/asyncUtils.js'; // ============================================================================ // Rate Limit Error Detection // ============================================================================ const RATE_LIMIT_PATTERNS = [ 'rate limit', 'rate_limit', 'ratelimit', 'too many requests', '429', 'quota exceeded', 'request limit', 'throttled', 'overloaded', 'capacity', ]; const TRANSIENT_ERROR_PATTERNS = [ 'timeout', 'timed out', 'network', 'connection', 'econnrefused', 'econnreset', 'enotfound', 'epipe', 'econnaborted', 'ehostunreach', 'enetunreach', 'socket', 'temporarily unavailable', '502', '503', '504', 'bad gateway', 'service unavailable', 'gateway timeout', 'internal server error', '500', // Stream and fetch errors 'premature close', 'premature end', 'unexpected end', 'stream', 'aborted', 'fetcherror', 'fetch error', 'invalid response body', 'response body', 'gunzip', 'decompress', 'zlib', 'content-encoding', 'chunked encoding', 'transfer-encoding', // SSL/TLS errors 'ssl', 'tls', 'certificate', 'cert', 'handshake', ]; function isRateLimitError(error) { if (!(error instanceof Error)) return false; const message = error.message.toLowerCase(); return RATE_LIMIT_PATTERNS.some(pattern => message.includes(pattern)); } function isTransientError(error) { if (!(error instanceof Error)) return false; // Check message const message = error.message.toLowerCase(); if (TRANSIENT_ERROR_PATTERNS.some(pattern => message.includes(pattern))) { return true; } // Check error name/type (FetchError, AbortError, etc.) const errorName = error.name?.toLowerCase() ?? ''; if (errorName.includes('fetch') || errorName.includes('abort') || errorName.includes('network')) { return true; } // Check error code if present (Node.js style) const errorCode = error.code?.toLowerCase() ?? ''; if (errorCode && TRANSIENT_ERROR_PATTERNS.some(pattern => errorCode.includes(pattern))) { return true; } // Check cause chain for nested errors const cause = error.cause; if (cause instanceof Error) { return isTransientError(cause); } return false; } function shouldRetry(error) { return isRateLimitError(error) || isTransientError(error); } // ============================================================================ // Resilient Provider Implementation // ============================================================================ /** * Wraps any LLM provider with rate limiting and retry logic */ export class ResilientProvider { id; model; provider; rateLimiter; config; circuitBreaker; stats = { totalRequests: 0, rateLimitHits: 0, retries: 0, circuitBreakerTrips: 0, }; constructor(provider, config = {}) { this.provider = provider; this.id = provider.id; this.model = provider.model; this.config = { maxRequestsPerMinute: config.maxRequestsPerMinute ?? 50, maxRetries: config.maxRetries ?? 4, baseDelayMs: config.baseDelayMs ?? 1000, maxDelayMs: config.maxDelayMs ?? 32000, enableCircuitBreaker: config.enableCircuitBreaker ?? true, circuitBreakerThreshold: config.circuitBreakerThreshold ?? 5, circuitBreakerResetMs: config.circuitBreakerResetMs ?? 60000, }; this.rateLimiter = new RateLimiter({ maxRequests: this.config.maxRequestsPerMinute, windowMs: 60000, }); this.circuitBreaker = { failures: 0, lastFailure: 0, isOpen: false, }; } /** * Check and potentially reset circuit breaker */ checkCircuitBreaker() { if (!this.config.enableCircuitBreaker) return; if (this.circuitBreaker.isOpen) { const elapsed = Date.now() - this.circuitBreaker.lastFailure; if (elapsed >= this.config.circuitBreakerResetMs) { // Half-open: allow one request through this.circuitBreaker.isOpen = false; this.circuitBreaker.failures = Math.floor(this.circuitBreaker.failures / 2); } else { throw new Error(`Circuit breaker is open. Too many failures (${this.circuitBreaker.failures}). ` + `Retry in ${Math.ceil((this.config.circuitBreakerResetMs - elapsed) / 1000)}s.`); } } } /** * Record a failure for circuit breaker */ recordFailure(_error) { if (!this.config.enableCircuitBreaker) return; this.circuitBreaker.failures++; this.circuitBreaker.lastFailure = Date.now(); if (this.circuitBreaker.failures >= this.config.circuitBreakerThreshold) { this.circuitBreaker.isOpen = true; this.stats.circuitBreakerTrips++; } } /** * Record a success to reset circuit breaker */ recordSuccess() { if (this.config.enableCircuitBreaker && this.circuitBreaker.failures > 0) { this.circuitBreaker.failures = Math.max(0, this.circuitBreaker.failures - 1); } } /** * Execute a request with rate limiting and retry */ async executeWithResilience(operation, _operationName) { this.stats.totalRequests++; // Check circuit breaker this.checkCircuitBreaker(); // Acquire rate limit token await this.rateLimiter.acquire(); try { const result = await retry(operation, { maxRetries: this.config.maxRetries, baseDelayMs: this.config.baseDelayMs, maxDelayMs: this.config.maxDelayMs, backoffMultiplier: 2, shouldRetry: (error) => { if (shouldRetry(error)) { this.stats.retries++; if (isRateLimitError(error)) { this.stats.rateLimitHits++; } return true; } return false; }, }); this.recordSuccess(); return result; } catch (error) { this.recordFailure(error); throw error; } } /** * Generate a response with resilience */ async generate(messages, tools) { return this.executeWithResilience(() => this.provider.generate(messages, tools), 'generate'); } /** * Generate a streaming response with resilience * * Note: Retry logic is limited for streaming - we can only retry * before the stream starts, not mid-stream. */ async *generateStream(messages, tools) { if (!this.provider.generateStream) { // Fall back to non-streaming const response = await this.generate(messages, tools); if (response.type === 'message') { yield { type: 'content', content: response.content }; } else if (response.type === 'tool_calls') { if (response.content) { yield { type: 'content', content: response.content }; } if (response.toolCalls) { for (const call of response.toolCalls) { yield { type: 'tool_call', toolCall: call }; } } } if (response.usage) { yield { type: 'usage', usage: response.usage }; } return; } this.stats.totalRequests++; // Check circuit breaker this.checkCircuitBreaker(); // Acquire rate limit token await this.rateLimiter.acquire(); let attempts = 0; let lastError; while (attempts <= this.config.maxRetries) { try { const stream = this.provider.generateStream(messages, tools); for await (const chunk of stream) { yield chunk; } this.recordSuccess(); return; } catch (err) { lastError = err; attempts++; if (attempts <= this.config.maxRetries && shouldRetry(err)) { this.stats.retries++; if (isRateLimitError(err)) { this.stats.rateLimitHits++; } const delay = Math.min(this.config.baseDelayMs * Math.pow(2, attempts - 1), this.config.maxDelayMs); await sleep(delay); continue; } this.recordFailure(err); throw err; } } this.recordFailure(lastError); throw lastError; } /** * Get resilience statistics */ getStats() { return { ...this.stats, circuitBreakerOpen: this.circuitBreaker.isOpen, availableTokens: this.rateLimiter.availableTokens, }; } /** * Reset statistics */ resetStats() { this.stats = { totalRequests: 0, rateLimitHits: 0, retries: 0, circuitBreakerTrips: 0, }; } } // ============================================================================ // Factory Function // ============================================================================ /** * Wrap any provider with resilience features */ export function withResilience(provider, config) { return new ResilientProvider(provider, config); } /** * Provider-specific recommended configurations */ export const PROVIDER_RESILIENCE_CONFIGS = { anthropic: { maxRequestsPerMinute: 50, maxRetries: 4, baseDelayMs: 1500, maxDelayMs: 40000, }, openai: { maxRequestsPerMinute: 60, maxRetries: 3, baseDelayMs: 1000, maxDelayMs: 30000, }, google: { maxRequestsPerMinute: 60, maxRetries: 3, baseDelayMs: 1000, maxDelayMs: 30000, }, deepseek: { maxRequestsPerMinute: 30, maxRetries: 4, baseDelayMs: 2000, maxDelayMs: 45000, }, xai: { maxRequestsPerMinute: 40, maxRetries: 3, baseDelayMs: 1500, maxDelayMs: 35000, }, ollama: { maxRequestsPerMinute: 100, maxRetries: 2, baseDelayMs: 500, maxDelayMs: 10000, enableCircuitBreaker: false, // Local, less likely to need circuit breaker }, }; /** * Wrap a provider with resilience using provider-specific defaults */ export function withProviderResilience(provider, providerId, overrides) { const defaults = PROVIDER_RESILIENCE_CONFIGS[providerId] ?? {}; return new ResilientProvider(provider, { ...defaults, ...overrides }); } //# sourceMappingURL=resilientProvider.js.map