UNPKG

cmte

Version:

Design by Committee™ except it's just you and LLMs

322 lines (291 loc) 14.1 kB
import { logger } from '../../utils/logger.js'; export class BaseLLMClient { maxRetries = parseInt(process.env.LLM_MAX_RETRIES || '20', 10); baseDelay = 1000; // 1 second maxExponentialDelay = parseInt(process.env.LLM_MAX_RETRY_DELAY_MS || '300000', 10); maxParallelRequests = parseInt(process.env.MAX_PARALLEL_REQUESTS || '10', 10); // Default to 10, configurable via env // Read min delay in seconds from env, default to 100ms if not set or invalid _minInterRequestDelayMs = (() => { const delaySeconds = parseFloat(process.env.REQUEST_DELAY_SECONDS); if (!isNaN(delaySeconds) && delaySeconds >= 0) { return Math.round(delaySeconds * 1000); // Convert to ms } return 100; // Default to 100ms })(); _maxInterRequestDelayMs = 45000; // Max delay (currently set on rate limit) constructor(config) { this.config = config; // Initialize queue for managing parallel requests this.activeRequests = 0; this.requestQueue = []; // Initialize current limit to the configured maximum this._currentConcurrencyLimit = this.maxParallelRequests; this._increaseConcurrencyTimeout = null; // Timeout handle for gradual increase // Start with the configured (or default) minimum delay this._interRequestDelayMs = this._minInterRequestDelayMs; logger.debug(`Initialized LLM client with max retries: ${this.maxRetries}, max parallel: ${this.maxParallelRequests}, current limit: ${this._currentConcurrencyLimit}, min request delay: ${this._minInterRequestDelayMs}ms, initial delay: ${this._interRequestDelayMs}ms, max exponential delay: ${this.maxExponentialDelay}ms`); } /** * Handles streaming response from LLM providers * @param response Fetch Response object containing a stream * @returns Concatenated response text */ async handleStreamingResponse(response) { if (!response.body) { throw new Error('Response body is null'); } const reader = response.body.getReader(); const decoder = new TextDecoder(); let responseText = ''; try { while (true) { const { done, value } = await reader.read(); if (done) break; const chunk = decoder.decode(value, { stream: true }); responseText += chunk; } } finally { reader.releaseLock(); } return responseText; } /** * Executes a function with exponential backoff retry logic * @param fn Function to execute * @returns Result of the function */ async withExponentialBackoff(fn) { let lastError = null; // Log the maxRetries value being used for this execution logger.debug(`[Retry] Entering withExponentialBackoff. Max retries set to: ${this.maxRetries}`); for (let attempt = 0; attempt < this.maxRetries; attempt++) { try { // Attempt the function call return await fn(); } catch (error) { lastError = error; let specificRetryAfterMs = null; let isHttpClientError = false; let isRateLimitError = false; let httpStatusCode = null; // --- Identify Error Type --- if (error && typeof error === 'object') { if (typeof error.status === 'number') { // Check common property for SDK errors httpStatusCode = error.status; isHttpClientError = true; if (httpStatusCode === 429) isRateLimitError = true; } else if (error instanceof Response) { // Check for raw fetch Response httpStatusCode = error.status; isHttpClientError = true; if (httpStatusCode === 429) isRateLimitError = true; } } // --- Handle Non-Retriable Client Errors (4xx excluding 429) --- if (isHttpClientError && httpStatusCode >= 400 && httpStatusCode < 500 && !isRateLimitError) { logger.error(`Client error ${httpStatusCode}. Not retrying.`, { error }); throw error; } // --- Handle Rate Limit Error (429) --- if (isRateLimitError) { logger.warn(`Rate limit exceeded (429). Checking for Retry-After header.`); // <<< Call the handler to reduce concurrency >>> this._handleRateLimitHit(); try { // ** Look for headers nested under error.error.headers first ** let headers = null; if (error.error?.headers) { // Check nested structure first based on logs headers = error.error.headers; } else if (error.response?.headers) { // Fallback check common for SDKs headers = error.response.headers; } else if (error.headers) { // Fallback for raw Response or other error types headers = error.headers; } // Need to check if headers is a plain object (from SDK error) or Headers object (from Response) let retryAfterHeader = null; let retryAfterMsHeader = null; if (headers) { if (typeof headers.get === 'function') { // Standard Headers object retryAfterHeader = headers.get('retry-after') || headers.get('Retry-After'); retryAfterMsHeader = headers.get('retry-after-ms') || headers.get('Retry-After-Ms'); } else if (typeof headers === 'object') { // Plain object from SDK error // Access properties directly (case-insensitive check) const lowerCaseHeaders = Object.fromEntries(Object.entries(headers).map(([k, v]) => [k.toLowerCase(), v])); retryAfterHeader = lowerCaseHeaders['retry-after']; retryAfterMsHeader = lowerCaseHeaders['retry-after-ms']; } } // Now parse the values if found if (retryAfterMsHeader) { const retryAfterMs = parseInt(retryAfterMsHeader, 10); if (!isNaN(retryAfterMs)) { specificRetryAfterMs = retryAfterMs; logger.info(`Retry-After-Ms header found: waiting ${retryAfterMs} ms.`); } else { logger.warn(`Could not parse Retry-After-Ms header value: ${retryAfterMsHeader}`); } } else if (retryAfterHeader) { const retryAfterSeconds = parseInt(retryAfterHeader, 10); if (!isNaN(retryAfterSeconds)) { specificRetryAfterMs = retryAfterSeconds * 1000; logger.info(`Retry-After header found: waiting ${retryAfterSeconds} seconds.`); } else { logger.warn(`Could not parse Retry-After header value: ${retryAfterHeader}`); } } if (specificRetryAfterMs === null) { logger.warn('Could not find or parse Retry-After/Retry-After-Ms headers on the rate limit error.'); } } catch (headerError) { logger.warn(`Error accessing headers on rate limit error object: ${headerError.message}`); } } // --- Check Max Retries --- if (attempt >= this.maxRetries - 1) { logger.error(`Max retries (${this.maxRetries}) reached. Giving up on error:`, { error: { message: error?.message, status: httpStatusCode } }); throw lastError; } // --- Calculate Delay --- let delay = 0; if (specificRetryAfterMs !== null) { delay = specificRetryAfterMs; // Use specific API delay } else { // Calculate exponential backoff let exponentialDelay = this.baseDelay * Math.pow(2, attempt); // Apply jitter exponentialDelay = exponentialDelay * (0.75 + Math.random() * 0.5); // Apply max *exponential* delay cap delay = Math.min(exponentialDelay, this.maxExponentialDelay); } // --- Log and Wait --- const errorMsg = error instanceof Error ? error.message : (httpStatusCode ? `Status ${httpStatusCode}` : 'Unknown error'); logger.warn(`Attempt ${attempt + 1}/${this.maxRetries} failed (${errorMsg}). Retrying in ${Math.round(delay)}ms...`); await new Promise(resolve => setTimeout(resolve, delay)); } } logger.error('Retry loop completed without success or throwing final error. Rethrowing last known error.'); throw lastError || new Error('Operation failed after retries, but last error was lost.'); } /** * Saves prompt content to a file if savePrompts is enabled * @param prompt Prompt content to save * @param type Type of prompt (e.g., 'thinking', 'response') */ async savePromptToFile(prompt, type) { if (!this.config.savePrompts || !this.config.outputPath) { return; } // Implementation for saving prompts to be added // This will depend on the file system utilities available in the project } /** * Tries to process the next request in the queue if slots are available. */ _tryProcessQueue() { if (this.activeRequests < this._currentConcurrencyLimit && this.requestQueue.length > 0) { this.activeRequests++; const { requestFn, resolve, reject } = this.requestQueue.shift(); const requestId = Date.now() + Math.random().toString(16).slice(2); logger.debug(`[Parallelism] Starting request ${requestId}. Active: ${this.activeRequests} (Limit: ${this._currentConcurrencyLimit}, Delay: ${this._interRequestDelayMs}ms)`); Promise.resolve().then(async () => { // Introduce delay before actually starting the request if (this._interRequestDelayMs > 0) { await new Promise(res => setTimeout(res, this._interRequestDelayMs)); } const startTime = Date.now(); try { const result = await requestFn(); resolve(result); } catch (error) { reject(error); } finally { const endTime = Date.now(); const duration = endTime - startTime; this.activeRequests--; logger.debug(`[Parallelism] Finished request ${requestId}. Duration: ${duration}ms. Active: ${this.activeRequests}. Queue size: ${this.requestQueue.length}`); this._tryProcessQueue(); // Try to process next item } }); } } /** * Enqueues a request and processes it when a slot is available * @param {Function} requestFn Function that returns a Promise for the request * @returns {Promise} Result of the request */ async enqueueRequest(requestFn) { return new Promise((resolve, reject) => { this.requestQueue.push({ requestFn, resolve, reject }); // Immediately try to process in case a slot is free. this._tryProcessQueue(); }); } /** * Validates and merges model configuration * @param config Partial model configuration to merge * @returns Complete model configuration */ getMergedConfig(config) { return { temperature: config?.temperature ?? 0.7, maxTokens: config?.maxTokens ?? parseInt(process.env.MAX_TOKENS || '10000', 10), topP: config?.topP ?? 1, model: config?.model ?? this.config.model, maxParallelRequests: config?.maxParallelRequests ?? this.maxParallelRequests }; } /** * Reduces the concurrency limit when a rate limit error occurs. * Optionally schedules a task to try increasing it later. */ _handleRateLimitHit() { const oldLimit = this._currentConcurrencyLimit; const oldDelay = this._interRequestDelayMs; // Reduce concurrency limit if (this._currentConcurrencyLimit > 1) { this._currentConcurrencyLimit = Math.max(1, Math.floor(this._currentConcurrencyLimit / 2)); logger.warn(`[RATE_LIMIT_ADAPT] Rate limit hit. Reducing concurrency limit from ${oldLimit} to ${this._currentConcurrencyLimit}.`); } else { logger.warn(`[RATE_LIMIT_ADAPT] Rate limit hit, but concurrency limit already at minimum (1).`); } // Set a fixed, large inter-request delay immediately this._interRequestDelayMs = 45000; // Set fixed 45-second delay logger.warn(`[RATE_LIMIT_ADAPT] Setting fixed inter-request delay to ${this._interRequestDelayMs / 1000} seconds.`); // Clear any existing timeout trying to increase concurrency/decrease delay if (this._increaseConcurrencyTimeout) { clearTimeout(this._increaseConcurrencyTimeout); this._increaseConcurrencyTimeout = null; } // Schedule an attempt to recover concurrency/delay later const recoveryDelay = 60 * 1000; logger.info(`[RATE_LIMIT_ADAPT] Scheduling attempt to recover concurrency/delay in ${recoveryDelay / 1000} seconds.`); this._increaseConcurrencyTimeout = setTimeout(() => { this._tryRecoverConcurrencyAndDelay(); this._increaseConcurrencyTimeout = null; }, recoveryDelay); } _tryRecoverConcurrencyAndDelay() { const oldLimit = this._currentConcurrencyLimit; const oldDelay = this._interRequestDelayMs; let changed = false; // Try increasing concurrency if (this._currentConcurrencyLimit < this.maxParallelRequests) { this._currentConcurrencyLimit = Math.min(this.maxParallelRequests, this._currentConcurrencyLimit + 1); changed = true; } // Reset delay back to minimum on recovery attempt if (this._interRequestDelayMs !== this._minInterRequestDelayMs) { this._interRequestDelayMs = this._minInterRequestDelayMs; changed = true; } if (changed) { logger.info(`[RATE_LIMIT_ADAPT] Attempting to recover. New limit: ${this._currentConcurrencyLimit}, Resetting delay to: ${this._interRequestDelayMs}ms.`); } else { logger.debug('[RATE_LIMIT_ADAPT] Concurrency limit at max and delay at min.'); } } }