erosolar-cli
Version:
Unified AI agent framework for the command line - Multi-provider support with schema-driven tools, code intelligence, and transparent reasoning
370 lines • 11.6 kB
JavaScript
/**
* Resilient Provider Wrapper
*
* Adds rate limiting, exponential backoff retry, and circuit breaker
* patterns to any LLM provider for maximum reliability and performance.
*
* PERF: Provider-agnostic wrapper that prevents rate limit errors and
* automatically recovers from transient failures.
*/
import { RateLimiter, retry, sleep } from '../utils/asyncUtils.js';
// ============================================================================
// Rate Limit Error Detection
// ============================================================================
const RATE_LIMIT_PATTERNS = [
'rate limit',
'rate_limit',
'ratelimit',
'too many requests',
'429',
'quota exceeded',
'request limit',
'throttled',
'overloaded',
'capacity',
];
const TRANSIENT_ERROR_PATTERNS = [
'timeout',
'timed out',
'network',
'connection',
'econnrefused',
'econnreset',
'enotfound',
'epipe',
'econnaborted',
'ehostunreach',
'enetunreach',
'socket',
'temporarily unavailable',
'502',
'503',
'504',
'bad gateway',
'service unavailable',
'gateway timeout',
'internal server error',
'500',
// Stream and fetch errors
'premature close',
'premature end',
'unexpected end',
'stream',
'aborted',
'fetcherror',
'fetch error',
'invalid response body',
'response body',
'gunzip',
'decompress',
'zlib',
'content-encoding',
'chunked encoding',
'transfer-encoding',
// SSL/TLS errors
'ssl',
'tls',
'certificate',
'cert',
'handshake',
];
function isRateLimitError(error) {
if (!(error instanceof Error))
return false;
const message = error.message.toLowerCase();
return RATE_LIMIT_PATTERNS.some(pattern => message.includes(pattern));
}
function isTransientError(error) {
if (!(error instanceof Error))
return false;
// Check message
const message = error.message.toLowerCase();
if (TRANSIENT_ERROR_PATTERNS.some(pattern => message.includes(pattern))) {
return true;
}
// Check error name/type (FetchError, AbortError, etc.)
const errorName = error.name?.toLowerCase() ?? '';
if (errorName.includes('fetch') || errorName.includes('abort') || errorName.includes('network')) {
return true;
}
// Check error code if present (Node.js style)
const errorCode = error.code?.toLowerCase() ?? '';
if (errorCode && TRANSIENT_ERROR_PATTERNS.some(pattern => errorCode.includes(pattern))) {
return true;
}
// Check cause chain for nested errors
const cause = error.cause;
if (cause instanceof Error) {
return isTransientError(cause);
}
return false;
}
function shouldRetry(error) {
return isRateLimitError(error) || isTransientError(error);
}
// ============================================================================
// Resilient Provider Implementation
// ============================================================================
/**
* Wraps any LLM provider with rate limiting and retry logic
*/
export class ResilientProvider {
id;
model;
provider;
rateLimiter;
config;
circuitBreaker;
stats = {
totalRequests: 0,
rateLimitHits: 0,
retries: 0,
circuitBreakerTrips: 0,
};
constructor(provider, config = {}) {
this.provider = provider;
this.id = provider.id;
this.model = provider.model;
this.config = {
maxRequestsPerMinute: config.maxRequestsPerMinute ?? 50,
maxRetries: config.maxRetries ?? 4,
baseDelayMs: config.baseDelayMs ?? 1000,
maxDelayMs: config.maxDelayMs ?? 32000,
enableCircuitBreaker: config.enableCircuitBreaker ?? true,
circuitBreakerThreshold: config.circuitBreakerThreshold ?? 5,
circuitBreakerResetMs: config.circuitBreakerResetMs ?? 60000,
};
this.rateLimiter = new RateLimiter({
maxRequests: this.config.maxRequestsPerMinute,
windowMs: 60000,
});
this.circuitBreaker = {
failures: 0,
lastFailure: 0,
isOpen: false,
};
}
/**
* Check and potentially reset circuit breaker
*/
checkCircuitBreaker() {
if (!this.config.enableCircuitBreaker)
return;
if (this.circuitBreaker.isOpen) {
const elapsed = Date.now() - this.circuitBreaker.lastFailure;
if (elapsed >= this.config.circuitBreakerResetMs) {
// Half-open: allow one request through
this.circuitBreaker.isOpen = false;
this.circuitBreaker.failures = Math.floor(this.circuitBreaker.failures / 2);
}
else {
throw new Error(`Circuit breaker is open. Too many failures (${this.circuitBreaker.failures}). ` +
`Retry in ${Math.ceil((this.config.circuitBreakerResetMs - elapsed) / 1000)}s.`);
}
}
}
/**
* Record a failure for circuit breaker
*/
recordFailure(_error) {
if (!this.config.enableCircuitBreaker)
return;
this.circuitBreaker.failures++;
this.circuitBreaker.lastFailure = Date.now();
if (this.circuitBreaker.failures >= this.config.circuitBreakerThreshold) {
this.circuitBreaker.isOpen = true;
this.stats.circuitBreakerTrips++;
}
}
/**
* Record a success to reset circuit breaker
*/
recordSuccess() {
if (this.config.enableCircuitBreaker && this.circuitBreaker.failures > 0) {
this.circuitBreaker.failures = Math.max(0, this.circuitBreaker.failures - 1);
}
}
/**
* Execute a request with rate limiting and retry
*/
async executeWithResilience(operation, _operationName) {
this.stats.totalRequests++;
// Check circuit breaker
this.checkCircuitBreaker();
// Acquire rate limit token
await this.rateLimiter.acquire();
try {
const result = await retry(operation, {
maxRetries: this.config.maxRetries,
baseDelayMs: this.config.baseDelayMs,
maxDelayMs: this.config.maxDelayMs,
backoffMultiplier: 2,
shouldRetry: (error) => {
if (shouldRetry(error)) {
this.stats.retries++;
if (isRateLimitError(error)) {
this.stats.rateLimitHits++;
}
return true;
}
return false;
},
});
this.recordSuccess();
return result;
}
catch (error) {
this.recordFailure(error);
throw error;
}
}
/**
* Generate a response with resilience
*/
async generate(messages, tools) {
return this.executeWithResilience(() => this.provider.generate(messages, tools), 'generate');
}
/**
* Generate a streaming response with resilience
*
* Note: Retry logic is limited for streaming - we can only retry
* before the stream starts, not mid-stream.
*/
async *generateStream(messages, tools) {
if (!this.provider.generateStream) {
// Fall back to non-streaming
const response = await this.generate(messages, tools);
if (response.type === 'message') {
yield { type: 'content', content: response.content };
}
else if (response.type === 'tool_calls') {
if (response.content) {
yield { type: 'content', content: response.content };
}
if (response.toolCalls) {
for (const call of response.toolCalls) {
yield { type: 'tool_call', toolCall: call };
}
}
}
if (response.usage) {
yield { type: 'usage', usage: response.usage };
}
return;
}
this.stats.totalRequests++;
// Check circuit breaker
this.checkCircuitBreaker();
// Acquire rate limit token
await this.rateLimiter.acquire();
let attempts = 0;
let lastError;
while (attempts <= this.config.maxRetries) {
try {
const stream = this.provider.generateStream(messages, tools);
for await (const chunk of stream) {
yield chunk;
}
this.recordSuccess();
return;
}
catch (err) {
lastError = err;
attempts++;
if (attempts <= this.config.maxRetries && shouldRetry(err)) {
this.stats.retries++;
if (isRateLimitError(err)) {
this.stats.rateLimitHits++;
}
const delay = Math.min(this.config.baseDelayMs * Math.pow(2, attempts - 1), this.config.maxDelayMs);
await sleep(delay);
continue;
}
this.recordFailure(err);
throw err;
}
}
this.recordFailure(lastError);
throw lastError;
}
/**
* Get resilience statistics
*/
getStats() {
return {
...this.stats,
circuitBreakerOpen: this.circuitBreaker.isOpen,
availableTokens: this.rateLimiter.availableTokens,
};
}
/**
* Reset statistics
*/
resetStats() {
this.stats = {
totalRequests: 0,
rateLimitHits: 0,
retries: 0,
circuitBreakerTrips: 0,
};
}
}
// ============================================================================
// Factory Function
// ============================================================================
/**
* Wrap any provider with resilience features
*/
export function withResilience(provider, config) {
return new ResilientProvider(provider, config);
}
/**
* Provider-specific recommended configurations
*/
export const PROVIDER_RESILIENCE_CONFIGS = {
anthropic: {
maxRequestsPerMinute: 50,
maxRetries: 4,
baseDelayMs: 1500,
maxDelayMs: 40000,
},
openai: {
maxRequestsPerMinute: 60,
maxRetries: 3,
baseDelayMs: 1000,
maxDelayMs: 30000,
},
google: {
maxRequestsPerMinute: 60,
maxRetries: 3,
baseDelayMs: 1000,
maxDelayMs: 30000,
},
deepseek: {
maxRequestsPerMinute: 30,
maxRetries: 4,
baseDelayMs: 2000,
maxDelayMs: 45000,
},
xai: {
maxRequestsPerMinute: 40,
maxRetries: 3,
baseDelayMs: 1500,
maxDelayMs: 35000,
},
ollama: {
maxRequestsPerMinute: 100,
maxRetries: 2,
baseDelayMs: 500,
maxDelayMs: 10000,
enableCircuitBreaker: false, // Local, less likely to need circuit breaker
},
};
/**
* Wrap a provider with resilience using provider-specific defaults
*/
export function withProviderResilience(provider, providerId, overrides) {
const defaults = PROVIDER_RESILIENCE_CONFIGS[providerId] ?? {};
return new ResilientProvider(provider, { ...defaults, ...overrides });
}
//# sourceMappingURL=resilientProvider.js.map