qerrors
Version:
Intelligent error handling middleware with AI-powered analysis, environment validation, caching, and production-ready logging. Provides OpenAI-based error suggestions, queue management, retry mechanisms, and comprehensive configuration options for Node.js
462 lines (395 loc) • 29.3 kB
JavaScript
/**
* Core qerrors module - provides intelligent error analysis using OpenAI's API
*
* This module implements a sophisticated error handling system that not only logs errors
* but also provides AI-powered analysis and suggestions for resolution. The design balances
* practical error handling needs with advanced AI capabilities.
*
* Key design decisions:
* - Uses OpenAI GPT models for error analysis to provide contextual debugging help
* - Implements graceful degradation when AI services are unavailable
* - Generates unique error identifiers for tracking and correlation
* - Supports both Express middleware usage and standalone error handling
*/
; //(enable strict mode for improved error detection)
const config = require('./config'); //load default environment variables and helpers
const logger = require('./logger'); //centralized winston logger configuration promise
const axios = require('axios'); //HTTP client used for OpenAI API calls
const http = require('http'); //node http for agent keep alive
const https = require('https'); //node https for agent keep alive
const crypto = require('crypto'); //node crypto for hashing cache keys
const { randomUUID } = require('crypto'); //import UUID generator for unique names
const Denque = require('denque'); //double ended queue for O(1) dequeue
const escapeHtml = require('escape-html'); //secure HTML escaping library
const util = require('util'); //node util to stringify circular context safely
/**
* Creates a custom concurrency limiter for controlling OpenAI API calls
*
* This implementation replaces the p-limit npm package to reduce dependencies
* while providing exactly the functionality needed for qerrors. The design
* prioritizes simplicity and reliability over feature completeness.
*
* Design rationale:
* - Custom implementation reduces npm dependency footprint
* - Simple queue structure using Denque provides O(1) operations
* - Direct control over queuing logic enables specific behavior needed for error analysis
* - Exposed metrics allow monitoring of queue health in production
*
* @param {number} max - Maximum number of concurrent operations
* @returns {Function} Limiter function that accepts async operations
*/
function createLimiter(max) { //(local concurrency limiter to avoid p-limit dependency while providing identical functionality)
let active = 0; //count currently running tasks
const queue = new Denque(); //queued functions waiting for a slot with O(1) shift
const next = () => { //execute next when possible
if (active >= max || queue.length === 0) return; //respect limit
const { fn, resolve, reject } = queue.shift(); //get next job
active++; //increase active before run
Promise.resolve().then(fn).then(val => { //run job then resolve
active--; //decrement active after success
resolve(val); //pass value
next(); //run following job
}).catch(err => { //handle rejection
active--; //decrement active on failure
reject(err); //propagate error
next(); //continue queue
});
};
const limiter = fn => new Promise((resolve, reject) => { //limiter wrapper
queue.push({ fn, resolve, reject }); //add to queue
next(); //attempt run
});
Object.defineProperties(limiter, { //expose counters like p-limit
activeCount: { get: () => active },
pendingCount: { get: () => queue.length }
});
return limiter; //return throttle function
}
const { LRUCache } = require('lru-cache'); //LRU cache class used for caching advice
/**
* Conditional logging utility for debugging and development feedback
*
* This function provides optional verbose output that can be enabled via environment
* variable. It's designed to help developers understand qerrors behavior without
* impacting production performance when disabled.
*
* Design rationale:
* - Environment-controlled logging prevents performance impact in production
* - Direct console.log usage avoids logger dependency cycles
* - Simple boolean check minimizes overhead when disabled
* - Centralized control allows easy debugging toggle across entire module
*/
function verboseLog(msg) { //conditional console output helper for debugging without logger dependency
if (config.getEnv('QERRORS_VERBOSE') === 'true') console.log(msg); //only log when enabled to avoid production noise
}
function stringifyContext(ctx) { //safely stringify context without errors
console.log(`stringifyContext is running with ${typeof ctx}`); //trace helper entry
try { const out = typeof ctx === 'string' ? ctx : JSON.stringify(ctx); console.log(`stringifyContext is returning ${out}`); return out; } catch { const out = util.inspect(ctx, { depth: 5 }); console.log(`stringifyContext is returning ${out}`); return out; } //fallback to util.inspect on circular data
}
const rawConc = config.getInt('QERRORS_CONCURRENCY'); //(raw concurrency from env)
const rawQueue = config.getInt('QERRORS_QUEUE_LIMIT'); //(raw queue limit from env)
const SAFE_THRESHOLD = config.getInt('QERRORS_SAFE_THRESHOLD'); //limit considered safe for concurrency and queue without enforced minimum //(configurable)
const CONCURRENCY_LIMIT = Math.min(rawConc, SAFE_THRESHOLD); //(clamp concurrency to safe threshold)
const QUEUE_LIMIT = Math.min(rawQueue, SAFE_THRESHOLD); //(clamp queue limit to safe threshold)
if (rawConc > SAFE_THRESHOLD || rawQueue > SAFE_THRESHOLD) { logger.then(l => l.warn(`High qerrors limits clamped conc ${rawConc} queue ${rawQueue}`)); } //(warn when original limits exceed threshold)
const rawSockets = config.getInt('QERRORS_MAX_SOCKETS'); //raw sockets from env
const MAX_SOCKETS = Math.min(rawSockets, SAFE_THRESHOLD); //clamp sockets to safe threshold
if (rawSockets > SAFE_THRESHOLD) { logger.then(l => l.warn(`max sockets clamped ${rawSockets}`)); } //warn on clamp when limit exceeded
const rawFreeSockets = config.getInt('QERRORS_MAX_FREE_SOCKETS'); //raw free socket count from env //(new env)
const MAX_FREE_SOCKETS = Math.min(rawFreeSockets, SAFE_THRESHOLD); //clamp free sockets to safe threshold //(new const)
if (rawFreeSockets > SAFE_THRESHOLD) { logger.then(l => l.warn(`max free sockets clamped ${rawFreeSockets}`)); } //warn when clamped //(new warn)
const parsedLimit = config.getInt('QERRORS_CACHE_LIMIT', 0); //parse limit with zero allowed
const ADVICE_CACHE_LIMIT = parsedLimit === 0 ? 0 : Math.min(parsedLimit, SAFE_THRESHOLD); //clamp to safe threshold when >0
if (parsedLimit > SAFE_THRESHOLD) { logger.then(l => l.warn(`cache limit clamped ${parsedLimit}`)); } //warn after logger ready
const CACHE_TTL_SECONDS = config.getInt('QERRORS_CACHE_TTL', 0); //expire advice after ttl seconds when nonzero //(new ttl env)
const adviceCache = new LRUCache({ max: ADVICE_CACHE_LIMIT || 0, ttl: CACHE_TTL_SECONDS * 1000 }); //create cache with ttl and max settings
let warnedMissingToken = false; //track if missing token message already logged
const axiosInstance = axios.create({ //axios instance with keep alive agents
httpAgent: new http.Agent({ keepAlive: true, maxSockets: MAX_SOCKETS, maxFreeSockets: MAX_FREE_SOCKETS }), //reuse http connections with max free limit //(updated agent)
httpsAgent: new https.Agent({ keepAlive: true, maxSockets: MAX_SOCKETS, maxFreeSockets: MAX_FREE_SOCKETS }), //reuse https connections with max free limit //(updated agent)
timeout: config.getInt('QERRORS_TIMEOUT') //abort request after timeout
});
const limit = createLimiter(CONCURRENCY_LIMIT); //create limiter with stored concurrency without external module
let queueRejectCount = 0; //track how many analyses the queue rejects
let cleanupHandle = null; //hold interval id for periodic cache purge
let metricHandle = null; //store interval id for queue metric logging
const METRIC_INTERVAL_MS = config.getInt('QERRORS_METRIC_INTERVAL_MS', 0); //interval for metrics, zero disables
function startAdviceCleanup() { //(kick off periodic advice cleanup)
if (CACHE_TTL_SECONDS === 0 || ADVICE_CACHE_LIMIT === 0 || cleanupHandle) { return; } //(skip when ttl or cache disabled or already scheduled)
cleanupHandle = setInterval(purgeExpiredAdvice, CACHE_TTL_SECONDS * 1000); //(run purge at ttl interval)
cleanupHandle.unref(); //(allow process exit without clearing interval)
}
function stopAdviceCleanup() { //(stop periodic purge when needed)
if (!cleanupHandle) { return; } //(do nothing when no interval)
clearInterval(cleanupHandle); //(cancel interval)
cleanupHandle = null; //(reset handle state)
}
function logQueueMetrics() { //(write queue metrics to logger)
logger.then(l => l.info(`metrics queueLength=${getQueueLength()} queueRejects=${getQueueRejectCount()}`));
//(info level ensures operators can monitor queue health without triggering qerrors recursion on logging errors)
}
function startQueueMetrics() { //(begin periodic queue metric logging)
if (metricHandle || METRIC_INTERVAL_MS === 0) { return; } //(avoid multiple intervals or disabled)
metricHandle = setInterval(logQueueMetrics, METRIC_INTERVAL_MS); //(schedule logging every interval)
metricHandle.unref(); //(allow process exit without manual cleanup)
}
function stopQueueMetrics() { //(halt metric emission)
if (!metricHandle) { return; } //(no-op when not running)
clearInterval(metricHandle); //(cancel metrics interval)
metricHandle = null; //(reset handle state)
}
async function scheduleAnalysis(err, ctx) { //limit analyzeError concurrency
startAdviceCleanup(); //(ensure cleanup interval scheduled once)
const idle = limit.activeCount === 0 && limit.pendingCount === 0; //track if queue idle before scheduling
const total = limit.pendingCount + limit.activeCount; //sum queued and active analyses
if (total >= QUEUE_LIMIT) { queueRejectCount++; (await logger).warn(`analysis queue full pending ${limit.pendingCount} active ${limit.activeCount}`); return Promise.reject(new Error('queue full')); } //(reject when queue limit reached)
const run = limit(() => analyzeError(err, ctx)); //queue via limiter and get promise
if (idle) startQueueMetrics(); //(start metrics when queue transitions from idle)
await run.finally(() => { if (limit.activeCount === 0 && limit.pendingCount === 0) stopQueueMetrics(); }); //(await finally to ensure proper cleanup timing)
return run; //return scheduled promise
}
function getQueueRejectCount() { return queueRejectCount; } //expose reject count
function clearAdviceCache() { adviceCache.clear(); if (adviceCache.size === 0) { stopAdviceCleanup(); } } //empty cache and stop interval when empty
function purgeExpiredAdvice() { //trigger lru-cache cleanup cycle
if (CACHE_TTL_SECONDS === 0 || ADVICE_CACHE_LIMIT === 0) { return; } //skip when ttl or cache disabled
adviceCache.purgeStale(); if (adviceCache.size === 0) { stopAdviceCleanup(); } //remove expired entries and stop interval when empty
} //lru-cache handles its own batch logic
function getQueueLength() { return limit.pendingCount; } //expose queue length
async function postWithRetry(url, data, opts, capMs) { //post wrapper with retry logic and cap
const retries = config.getInt('QERRORS_RETRY_ATTEMPTS'); //default retry count
const base = config.getInt('QERRORS_RETRY_BASE_MS'); //base delay ms
const cap = capMs !== undefined ? capMs : config.getInt('QERRORS_RETRY_MAX_MS', 0); //choose cap
for (let i = 0; i <= retries; i++) { //attempt request with retries
try { return await axiosInstance.post(url, data, opts); } //(try post once)
catch (err) { //handle failure and compute wait
if (i >= retries) throw err; //throw when out of retries
const jitter = Math.random() * base; //random jitter added to delay
let wait = base * 2 ** i + jitter; //compute exponential delay with jitter
if (err.response && (err.response.status === 429 || err.response.status === 503)) { //detect rate limit
const retryAfter = err.response.headers?.['retry-after']; //header with wait seconds
if (retryAfter) { //parse header when provided
const secs = Number(retryAfter); //numeric seconds when parsed
if (!Number.isNaN(secs)) { wait = secs * 1000; } //use parsed seconds
else {
const date = Date.parse(retryAfter); //parse HTTP date string
if (!Number.isNaN(date)) { wait = date - Date.now(); } //ms until retry date
}
} else { wait *= 2; } //double delay when header missing
}
if (cap > 0 && wait > cap) { wait = cap; } //enforce cap when provided
await new Promise(r => setTimeout(r, wait)); //pause before next attempt
}
}
}
/**
* Analyzes an error using OpenAI's API to provide intelligent debugging suggestions
*
* This function represents the core AI-powered feature of qerrors. It sends error details
* to OpenAI's API and returns actionable advice for developers.
*
* Design rationale:
* - Early return for AxiosErrors prevents infinite loops when network issues occur
* - Environment variable check ensures graceful degradation without API keys
* - Prompt engineering optimizes for practical, console-readable advice
* - Response validation handles various API response formats safely
* - Temperature=1 provides creative but relevant suggestions
* - Max tokens=2048 balances detail with cost considerations
*
* @param {Error} error - The error object containing name, message, and stack trace
* @param {string} contextString - Contextual information already stringified by qerrors
* @returns {Promise<Object|null>} - AI-generated advice object or null if analysis fails or is skipped for Axios errors //(update return description)
*/
async function analyzeError(error, contextString) {
if (typeof error.name === 'string' && error.name.includes('AxiosError')) { //(skip axios error objects early to prevent infinite loops when our API calls fail)
verboseLog(`Axios Error`); //(log axios detection for analysis skip)
return null; //(avoid API call when axios error encountered)
};
verboseLog(`qerrors error analysis is running for
error name: "${error.uniqueErrorName}",
error message: "${error.message}",
with context: "${contextString}"`); //(log analysis attempt for debugging with pre-stringified context)
if (ADVICE_CACHE_LIMIT !== 0 && !error.qerrorsKey) { //generate hash key when caching
error.qerrorsKey = crypto.createHash('sha256').update(`${error.message}${error.stack}`).digest('hex'); //create cache key from message and stack
}
if (ADVICE_CACHE_LIMIT !== 0) { //lookup cached advice when enabled
const cached = adviceCache.get(error.qerrorsKey); //fetch entry from lru-cache
if (cached) { verboseLog(`cache hit for ${error.uniqueErrorName}`); return cached; } //return when present and valid
}
if (!process.env.OPENAI_TOKEN) { //(graceful degradation when API token unavailable)
if (!warnedMissingToken) { //(check if warning already logged to avoid console spam)
console.error(`Missing OPENAI_TOKEN in environment variables.`); //(inform developer about missing token)
warnedMissingToken = true; //(set flag so we do not warn again on subsequent calls)
}
return null; //(skip analysis when token absent)
}
const truncatedStack = (error.stack || '').split('\n').slice(0, 20).join('\n'); //(limit stack trace to 20 lines for smaller API payloads and faster processing)
const errorPrompt = `Analyze this error and provide debugging advice. You must respond with a valid JSON object containing an "advice" field with a concise solution:
Error: ${error.name} - ${error.message}
Context: ${contextString}
Stack: ${truncatedStack}`; //(JSON format prompt for structured response with explicit instruction)
// OpenAI API call with optimized parameters for error analysis
// Model choice balances capability with cost and response time
// Parameters tuned for creative but focused debugging suggestions
let response; //(will hold axios response from OpenAI API call)
const openaiBody = { //(payload for OpenAI API structured for optimal error analysis)
model: 'gpt-4o', //(GPT-4o model for optimal error analysis quality and speed)
messages: [{ role: 'user', content: errorPrompt }], //(user role prompt to analyze error with context)
response_format: { type: 'json_object' }, //(structured JSON response expected for consistent parsing)
temperature: 1, //(creative but focused suggestions, balanced for debugging advice)
max_tokens: config.getInt('QERRORS_MAX_TOKENS'), //(limit response length using configurable env setting)
top_p: 1, //(full vocabulary access for technical terminology)
frequency_penalty: 0, //(allow repetition when useful for error analysis)
presence_penalty: 0 //(permit technical term usage without penalty)
};
try {
response = await module.exports.postWithRetry(config.getEnv('QERRORS_OPENAI_URL'), openaiBody, { //use env url for retry //(use new env)
headers: {
'Authorization': `Bearer ${process.env.OPENAI_TOKEN}`,
'Content-Type': 'application/json'
}
});
} catch (apiErr) {
verboseLog(`OpenAI request failed after retries`); //(log final failure after all retry attempts exhausted)
return null; //(abort analysis when request fails entirely to prevent blocking)
}
let advice = response?.data?.choices?.[0]?.message?.content || null; //(capture raw advice which may be JSON string from API response)
if (typeof advice === 'string') { //(convert string to object when possible for structured access)
try { advice = JSON.parse(advice); } catch { advice = null; }; //(ignore parse errors gracefully to prevent crashes on malformed JSON)
}
if (advice && typeof advice === 'object') { //(validate response structure and handle different API response formats with defensive programming)
verboseLog(`qerrors is returning advice for
the error name: "${error.uniqueErrorName}",
with the error message: "${error.message}",
with context: "${contextString}"`); //(log successful advice return with pre-stringified context)
//(cache and return the parsed advice object directly)
verboseLog(`${error.uniqueErrorName} ${JSON.stringify(advice)}`); //(stringify advice for consistent logging)
if (ADVICE_CACHE_LIMIT !== 0) { adviceCache.set(error.qerrorsKey, advice); startAdviceCleanup(); } //cache advice and ensure cleanup interval
return advice;
} else {
verboseLog(`Problem in analyzeError function of qerrors for ${error.uniqueErrorName}: ${error.message}`); //(log analysis failure for debugging and monitoring)
return null; //(graceful failure allows application to continue without API dependency)
}
}
/**
* Main qerrors function - comprehensive error handling with AI analysis and smart response handling
*
* This is the primary entry point for error processing. It handles the complete error lifecycle:
* logging, analysis, response generation, and middleware chain continuation.
*
* Design philosophy:
* - Works as both Express middleware and standalone error handler
* - Generates unique identifiers for error tracking and correlation
* - Provides intelligent response format detection (HTML vs JSON)
* - Maintains Express middleware contract while adding AI capabilities
* - Implements defensive programming to prevent secondary errors
*
* @param {Error} error - The error object to process
* @param {string|Object} context - Descriptive context about where/when error occurred; objects are JSON stringified
* @param {Object} [req] - Express request object (optional, enables middleware features)
* @param {Object} [res] - Express response object (optional, enables automatic responses)
* @param {Function} [next] - Express next function (optional, enables middleware chaining)
* @returns {Promise<void>}
*/
async function qerrors(error, context, req, res, next) {
// Input validation - prevent processing null/undefined errors
// Early return prevents downstream errors and provides clear feedback
if (!error) {
console.warn('qerrors called without an error object');
return;
}
// Context defaulting ensures we always have meaningful error context
// This helps with debugging and error correlation across logs
context = context || 'unknown context';
const contextString = stringifyContext(context); //normalize context using helper to avoid circular errors
// Generate unique error identifier for tracking and correlation
// Format: "ERROR: " + errorType + timestamp + randomString
// This allows linking related log entries and tracking error resolution
const uniqueErrorName = `ERROR:${error.name}_${randomUUID()}`; //generate identifier via crypto uuid
// Log error processing start with full context
// Multi-line format improves readability in log aggregation systems
verboseLog(`qerrors is running for error message: "${error.message}",
with context: "${contextString}",
assigning it the unique error name: "${uniqueErrorName}"`);
// Generate ISO timestamp for consistent log timing across time zones
// This is critical for distributed systems and log correlation
const timestamp = new Date().toISOString(); //create standardized timestamp for logs
// Destructure error properties with sensible defaults
// This pattern handles custom error objects that may lack standard properties
// Default values prevent undefined fields in logs and responses
const {
message = 'An error occurred', // Generic fallback message
statusCode = 500, // HTTP 500 for unspecified server errors
isOperational = true, // Assume operational error unless specified otherwise
} = error;
// Create comprehensive error log object
// Structure designed for JSON logging systems and error tracking services
// Includes all essential debugging information in a standardized format
const errorLog = {
uniqueErrorName, // For correlation and tracking
timestamp, // For chronological analysis
message, // Human-readable error description
statusCode, // HTTP status for web context
isOperational, // Distinguishes expected vs unexpected errors
context: contextString, // Contextual information for debugging now stringified
stack: error.stack // Full stack trace for technical debugging
};
// Augment original error object with unique identifier
// This allows downstream code to reference this specific error instance
error.uniqueErrorName = uniqueErrorName;
// Log error through winston logger for persistent storage and processing
// Uses structured logging format compatible with log aggregation systems
(await logger).error(errorLog);
// HTTP response handling - only if Express response object is available
// Check headersSent prevents "Cannot set headers after they are sent" errors
if (res && !res.headersSent) { //(send response only if headers not already sent to prevent double response errors)
const acceptHeader = req?.headers?.['accept'] || null; //(inspect client preference for HTML via content negotiation for appropriate response format)
if (acceptHeader && acceptHeader.includes('text/html')) { //(browser client detected via Accept header)
const safeMsg = escapeHtml(message); //(escape message for safe HTML display preventing XSS)
const safeStack = escapeHtml(error.stack || 'No stack trace available'); //(escape stack trace for safe HTML rendering)
const htmlErrorPage = `
<!DOCTYPE html>
<html>
<head>
<title>Error: ${statusCode}</title>
<style>
body { font-family: sans-serif; padding: 2em; }
.error { color: #d32f2f; }
pre { background: #f5f5f5; padding: 1em; border-radius: 4px; overflow: auto; }
</style>
</head>
<body>
<h1 class="error">Error: ${statusCode}</h1>
<h2>${safeMsg}</h2>
<pre>${safeStack}</pre>
</body>
</html>
`; //(generate HTML error page for browser requests with inline CSS to avoid external dependencies)
res.status(statusCode).send(htmlErrorPage); //(send user-friendly HTML error page with technical details for developers)
} else {
res.status(statusCode).json({ error: errorLog }); //(JSON response for API clients and AJAX requests with structured format for programmatic error handling)
}
}
if (next) { //(Express middleware chain continuation when next function provided)
if (!res || !res.headersSent) { //(only call next if headers not sent to prevent response conflicts)
next(error); //(pass error to next middleware for additional processing while maintaining Express contract)
}
}
Promise.resolve() //(start async analysis without blocking response to maintain fast error handling)
.then(() => scheduleAnalysis(error, contextString)) //(invoke queued analysis after sending response with context string)
.catch(async (analysisErr) => (await logger).error(analysisErr)); //(log any scheduleAnalysis failures to prevent silent errors)
verboseLog(`qerrors ran`); //(log completion after scheduling analysis for debugging flow)
}
module.exports = qerrors; //(export main qerrors function as default export providing primary interface most users interact with)
module.exports.analyzeError = analyzeError; //(expose analyzeError for testing and advanced usage scenarios allowing unit testing of AI analysis in isolation)
module.exports.axiosInstance = axiosInstance; //export axios instance for testing
module.exports.postWithRetry = postWithRetry; //export retry helper for tests
module.exports.getQueueRejectCount = getQueueRejectCount; //export queue reject count
module.exports.clearAdviceCache = clearAdviceCache; //export cache clearing function
module.exports.purgeExpiredAdvice = purgeExpiredAdvice; //export ttl cleanup function
module.exports.startAdviceCleanup = startAdviceCleanup; //export cleanup scheduler
module.exports.stopAdviceCleanup = stopAdviceCleanup; //export cleanup canceller
module.exports.startQueueMetrics = startQueueMetrics; //export metrics scheduler
module.exports.stopQueueMetrics = stopQueueMetrics; //export metrics canceller
module.exports.getQueueLength = getQueueLength; //export queue length
function getAdviceCacheLimit() { return ADVICE_CACHE_LIMIT; } //expose clamped cache limit for tests
module.exports.getAdviceCacheLimit = getAdviceCacheLimit; //export clamp accessor