UNPKG

@hivetechs/hive-ai

Version:

Real-time streaming AI consensus platform with HTTP+SSE MCP integration for Claude Code, VS Code, Cursor, and Windsurf - powered by OpenRouter's unified API

382 lines 15.2 kB
/** * Bulletproof Error Handling System * * Comprehensive error handling with retry logic, exponential backoff, * circuit breakers, and intelligent fallback mechanisms. */ import { structuredLogger } from './structured-logger.js'; export var ErrorType; (function (ErrorType) { ErrorType["NETWORK_ERROR"] = "NETWORK_ERROR"; ErrorType["API_ERROR"] = "API_ERROR"; ErrorType["RATE_LIMIT"] = "RATE_LIMIT"; ErrorType["MODEL_UNAVAILABLE"] = "MODEL_UNAVAILABLE"; ErrorType["AUTHENTICATION_ERROR"] = "AUTHENTICATION_ERROR"; ErrorType["VALIDATION_ERROR"] = "VALIDATION_ERROR"; ErrorType["TIMEOUT_ERROR"] = "TIMEOUT_ERROR"; ErrorType["UNKNOWN_ERROR"] = "UNKNOWN_ERROR"; })(ErrorType || (ErrorType = {})); export class CircuitBreaker { config; failures = 0; lastFailureTime = 0; state = 'CLOSED'; nextAttemptTime = 0; constructor(config) { this.config = config; } async execute(operation) { if (this.state === 'OPEN') { if (Date.now() < this.nextAttemptTime) { throw new Error(`Circuit breaker is OPEN. Next attempt allowed at ${new Date(this.nextAttemptTime)}`); } this.state = 'HALF_OPEN'; } try { const result = await operation(); this.onSuccess(); return result; } catch (error) { this.onFailure(); throw error; } } onSuccess() { if (this.failures > 0) { structuredLogger.info('Circuit breaker recovered', { circuitState: this.state, previousFailures: this.failures }); } this.failures = 0; this.state = 'CLOSED'; } onFailure() { this.failures++; this.lastFailureTime = Date.now(); if (this.failures >= this.config.failureThreshold) { this.state = 'OPEN'; this.nextAttemptTime = Date.now() + this.config.recoveryTimeout; structuredLogger.warn('Circuit breaker opened', { circuitState: this.state, failures: this.failures, nextAttempt: new Date(this.nextAttemptTime).toISOString() }); } } getState() { return { state: this.state, failures: this.failures, nextAttemptTime: this.nextAttemptTime > 0 ? new Date(this.nextAttemptTime) : undefined }; } } export class ErrorHandler { circuitBreakers = new Map(); retryConfig; fallbackConfig; errorLog = []; constructor(retryConfig = { maxRetries: 3, initialDelay: 1000, maxDelay: 30000, backoffMultiplier: 2, jitter: true }, fallbackConfig = { enableModelFallback: true, enableProviderFallback: true, maxFallbackAttempts: 2, fallbackModels: ['openai/gpt-4o-mini', 'anthropic/claude-3-haiku', 'google/gemini-pro'] }) { this.retryConfig = retryConfig; this.fallbackConfig = fallbackConfig; } /** * Execute operation with comprehensive error handling */ async executeWithRetry(operation, context) { const circuitBreakerKey = `${context.provider}:${context.model}`; const circuitBreaker = this.getCircuitBreaker(circuitBreakerKey); let lastError; for (let attempt = 0; attempt <= this.retryConfig.maxRetries; attempt++) { try { return await circuitBreaker.execute(operation); } catch (error) { lastError = error; const errorContext = this.classifyError(error, context); // Log error for monitoring this.logError(errorContext); // Don't retry non-retryable errors if (!errorContext.retryable || attempt === this.retryConfig.maxRetries) { throw error; } // Calculate delay with exponential backoff and jitter const delay = this.calculateDelay(attempt); structuredLogger.warn(`Retrying operation`, { provider: context.provider, model: context.model, stage: context.stage, attempt: attempt + 1, maxAttempts: this.retryConfig.maxRetries + 1, delay, errorType: errorContext.type, requestId: context.requestId }); console.warn(`🔄 Retrying ${context.stage} (${context.provider}/${context.model}) in ${delay}ms. Attempt ${attempt + 1}/${this.retryConfig.maxRetries + 1}`); await this.sleep(delay); } } throw lastError; } /** * Execute with automatic fallback to alternative models/providers */ async executeWithFallback(operation, primaryProvider, primaryModel, stage, requestId) { // Try primary model first try { const result = await this.executeWithRetry(() => operation(primaryProvider, primaryModel), { provider: primaryProvider, model: primaryModel, stage, requestId }); return { result, usedProvider: primaryProvider, usedModel: primaryModel, attempts: 1 }; } catch (primaryError) { structuredLogger.warn('Primary model failed, attempting fallbacks', { provider: primaryProvider, model: primaryModel, stage, requestId, error: primaryError.message }); console.warn(`❌ Primary model failed: ${primaryProvider}/${primaryModel}`); if (!this.fallbackConfig.enableModelFallback) { throw primaryError; } // Try fallback models let attempts = 1; for (const fallbackModel of this.fallbackConfig.fallbackModels) { if (attempts > this.fallbackConfig.maxFallbackAttempts) break; // Skip if it's the same as primary if (fallbackModel === `${primaryProvider}/${primaryModel}`) continue; const [fallbackProvider, fallbackModelName] = fallbackModel.split('/'); attempts++; try { console.warn(`🔄 Trying fallback: ${fallbackProvider}/${fallbackModelName}`); const result = await this.executeWithRetry(() => operation(fallbackProvider, fallbackModelName), { provider: fallbackProvider, model: fallbackModelName, stage, requestId }); structuredLogger.info('Fallback successful', { provider: fallbackProvider, model: fallbackModelName, stage, requestId, attempts }); console.warn(`✅ Fallback successful: ${fallbackProvider}/${fallbackModelName}`); return { result, usedProvider: fallbackProvider, usedModel: fallbackModelName, attempts }; } catch (fallbackError) { console.warn(`❌ Fallback failed: ${fallbackProvider}/${fallbackModelName}`); // Continue to next fallback } } // All fallbacks failed throw new Error(`All models failed. Primary: ${primaryProvider}/${primaryModel}, Fallbacks attempted: ${attempts - 1}`); } } /** * Classify error type and determine if it's retryable */ classifyError(error, context) { let errorType = ErrorType.UNKNOWN_ERROR; let retryable = false; let statusCode; const errorMessage = error.message.toLowerCase(); // Network errors - usually retryable if (errorMessage.includes('network') || errorMessage.includes('connection') || errorMessage.includes('timeout')) { errorType = ErrorType.NETWORK_ERROR; retryable = true; } // Rate limiting - retryable with backoff else if (errorMessage.includes('rate limit') || errorMessage.includes('429')) { errorType = ErrorType.RATE_LIMIT; retryable = true; statusCode = 429; } // Model unavailable - might be retryable else if (errorMessage.includes('model') && (errorMessage.includes('unavailable') || errorMessage.includes('not found'))) { errorType = ErrorType.MODEL_UNAVAILABLE; retryable = false; // Model unavailable usually isn't fixed by retrying } // Authentication errors - not retryable else if (errorMessage.includes('auth') || errorMessage.includes('401') || errorMessage.includes('403')) { errorType = ErrorType.AUTHENTICATION_ERROR; retryable = false; statusCode = parseInt(errorMessage.match(/\b(401|403)\b/)?.[0] || '0'); } // Validation errors - not retryable else if (errorMessage.includes('validation') || errorMessage.includes('400')) { errorType = ErrorType.VALIDATION_ERROR; retryable = false; statusCode = 400; } // Server errors - usually retryable else if (errorMessage.includes('500') || errorMessage.includes('502') || errorMessage.includes('503') || errorMessage.includes('504')) { errorType = ErrorType.API_ERROR; retryable = true; statusCode = parseInt(errorMessage.match(/\b(50[0-9])\b/)?.[0] || '500'); } // Timeout errors - retryable else if (errorMessage.includes('timeout')) { errorType = ErrorType.TIMEOUT_ERROR; retryable = true; } return { type: errorType, message: error.message, statusCode, retryable, provider: context.provider, model: context.model, stage: context.stage, timestamp: new Date(), requestId: context.requestId }; } /** * Calculate retry delay with exponential backoff and jitter */ calculateDelay(attempt) { const baseDelay = Math.min(this.retryConfig.initialDelay * Math.pow(this.retryConfig.backoffMultiplier, attempt), this.retryConfig.maxDelay); if (this.retryConfig.jitter) { // Add ±20% jitter to prevent thundering herd const jitterFactor = 0.8 + Math.random() * 0.4; // 0.8 to 1.2 return Math.floor(baseDelay * jitterFactor); } return baseDelay; } /** * Get or create circuit breaker for provider/model combination */ getCircuitBreaker(key) { if (!this.circuitBreakers.has(key)) { const config = { failureThreshold: 5, // Open circuit after 5 failures recoveryTimeout: 60000, // Wait 1 minute before trying again monitoringWindow: 300000 // 5 minute window }; this.circuitBreakers.set(key, new CircuitBreaker(config)); } return this.circuitBreakers.get(key); } /** * Log error for monitoring and debugging */ logError(errorContext) { this.errorLog.push(errorContext); // Keep only recent errors (last 1000) if (this.errorLog.length > 1000) { this.errorLog = this.errorLog.slice(-1000); } // Log to console with structured format console.error(`❌ [${errorContext.type}] ${errorContext.stage} failed:`, { provider: errorContext.provider, model: errorContext.model, message: errorContext.message, statusCode: errorContext.statusCode, retryable: errorContext.retryable, requestId: errorContext.requestId, timestamp: errorContext.timestamp.toISOString() }); } /** * Get error statistics for monitoring */ getErrorStats(timeWindow = 3600000) { const cutoff = new Date(Date.now() - timeWindow); const recentErrors = this.errorLog.filter(error => error.timestamp >= cutoff); const errorsByType = {}; const errorsByProvider = {}; const errorsByModel = {}; recentErrors.forEach(error => { errorsByType[error.type] = (errorsByType[error.type] || 0) + 1; errorsByProvider[error.provider] = (errorsByProvider[error.provider] || 0) + 1; errorsByModel[error.model] = (errorsByModel[error.model] || 0) + 1; }); return { totalErrors: recentErrors.length, errorsByType, errorsByProvider, errorsByModel, recentErrors: recentErrors.slice(-10) // Last 10 errors }; } /** * Get circuit breaker status for all monitored services */ getCircuitBreakerStatus() { const status = {}; this.circuitBreakers.forEach((breaker, key) => { status[key] = breaker.getState(); }); return status; } /** * Reset circuit breaker for a specific provider/model */ resetCircuitBreaker(provider, model) { const key = `${provider}:${model}`; if (this.circuitBreakers.has(key)) { this.circuitBreakers.delete(key); return true; } return false; } /** * Update retry configuration */ updateRetryConfig(config) { this.retryConfig = { ...this.retryConfig, ...config }; } /** * Update fallback configuration */ updateFallbackConfig(config) { this.fallbackConfig = { ...this.fallbackConfig, ...config }; } /** * Sleep utility for delays */ sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } } // Singleton instance for global use export const globalErrorHandler = new ErrorHandler(); /** * Decorator for automatic error handling */ export function withErrorHandling(provider, model, stage) { return function (target, propertyName, descriptor) { const method = descriptor.value; descriptor.value = async function (...args) { const requestId = `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`; return await globalErrorHandler.executeWithRetry(() => method.apply(this, args), { provider, model, stage, requestId }); }; return descriptor; }; } /** * Decorator for automatic fallback handling */ export function withFallback(stage) { return function (target, propertyName, descriptor) { const method = descriptor.value; descriptor.value = async function (provider, model, ...args) { const requestId = `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`; return await globalErrorHandler.executeWithFallback((fallbackProvider, fallbackModel) => method.apply(this, [fallbackProvider, fallbackModel, ...args]), provider, model, stage, requestId); }; return descriptor; }; } //# sourceMappingURL=error-handling.js.map