UNPKG

ak-fetch

Version:

Production-ready HTTP client for bulk operations with connection pooling, exponential backoff, streaming, and comprehensive error handling

1,490 lines (1,348 loc) 55.3 kB
#! /usr/bin/env node // @ts-check /** * ak-fetch - Modern HTTP client for bulk operations * 2025 production-ready version with advanced features */ import RunQueue from "run-queue"; import { json, isJSONStr, comma, makeExist } from "ak-tools"; import cli from './cli.js'; import { execSync } from 'child_process'; import { Readable } from 'stream'; import path from 'path'; import { createReadStream, existsSync } from 'fs'; import { config as dotenvConfig } from 'dotenv'; dotenvConfig({ debug: false, override: false }); // Import new modular components import HttpClient from './lib/http-client.js'; import CircularBuffer from './lib/circular-buffer.js'; import StreamProcessors from './lib/stream-processors.js'; import { createLogger } from './lib/logger.js'; import { getPresetTransform, applyPresetTransform } from './lib/presets.js'; import { AkFetchError, NetworkError, TimeoutError, ValidationError, ConfigurationError, MemoryError } from './lib/errors.js'; /** @typedef {import("./types").BatchRequestConfig} BatchRequestConfig */ /** @typedef {import("./types").Result} Result */ /** * Main ak-fetch function for HTTP request processing * * @description * Primary entry point for ak-fetch. Handles both single configurations and arrays of * configurations for multiple endpoints. Provides intelligent batching, retry logic, * connection pooling, and comprehensive error handling. * * @param {BatchRequestConfig|BatchRequestConfig[]} PARAMS - Configuration object or array * @description * - Single config: Process one endpoint with data batching * - Array of configs: Process multiple endpoints with concurrency control * * @returns {Promise<Result|Result[]>} Promise resolving to results * @description * - Single config: Returns Result object * - Array of configs: Returns array of Result objects or processed results from hook * * @throws {ValidationError} When required parameters are missing or invalid * @throws {ConfigurationError} When configuration values are invalid * @throws {NetworkError} When network connectivity issues occur * @throws {TimeoutError} When requests exceed timeout limits * @throws {RetryError} When all retry attempts are exhausted * @throws {MemoryError} When memory usage exceeds configured limits * * @example * // Simple POST request with data array * const result = await akFetch({ * url: 'https://api.example.com/users', * data: [{ name: 'John' }, { name: 'Jane' }], * batchSize: 10, * concurrency: 5 * }); * console.log(`Processed ${result.rowCount} records in ${result.clockTime}`); * * @example * // File upload with multipart form data * const result = await akFetch({ * url: 'https://api.example.com/upload', * method: 'POST', * data: [{ * name: 'document', * file: './important-file.pdf', * description: 'Important document' * }], * headers: { 'Content-Type': 'multipart/form-data' } * }); * * @example * // High-performance streaming from large file * const result = await akFetch({ * url: 'https://api.example.com/events', * data: './million-records.jsonl', * batchSize: 1000, * concurrency: 20, * enableConnectionPooling: true, * verbose: true, * logFile: './results.json' * }); * * @example * // Multiple endpoints with custom processing * const results = await akFetch([ * { * url: 'https://api1.example.com/data', * data: dataset1, * method: 'POST' * }, * { * url: 'https://api2.example.com/sync', * data: dataset2, * method: 'PUT', * retries: 5 * } * ]); * * @example * // Advanced configuration with all features * const result = await akFetch({ * url: 'https://api.example.com/analytics', * data: './events.jsonl', * method: 'POST', * batchSize: 500, * concurrency: 10, * retries: 3, * retryDelay: 2000, * useStaticRetryDelay: false, // Use exponential backoff * enableCookies: true, * enableConnectionPooling: true, * maxResponseBuffer: 1000, * verbose: true, * logFile: './analytics-results.json', * headers: { * 'Authorization': 'Bearer token123', * 'X-API-Version': '2.0' * }, * transform: (item) => ({ * ...item, * timestamp: Date.now(), * processed: true * }), * responseHandler: (response) => { * console.log(`Batch completed: ${response.status}`); * }, * errorHandler: (error) => { * console.error(`Batch failed: ${error.message}`); * } * }); * * @since 1.0.0 * @version 2.0.0 */ async function main(PARAMS) { validateInput(PARAMS); // Handle array of configurations (multiple endpoints) if (Array.isArray(PARAMS)) { return await processMultipleConfigs(PARAMS); } // Process single configuration return await processSingleConfig(PARAMS); } /** * Validate input parameters for ak-fetch operations * * @description * Validates the input parameters to ensure they meet the basic requirements. * Checks for URL presence and validates array configurations. * * @param {any} PARAMS - Parameters to validate * @throws {ValidationError} When parameters are invalid or missing required fields * * @example * validateInput({ url: 'https://api.example.com' }); // Valid * validateInput([{ url: 'https://api1.com' }, { url: 'https://api2.com' }]); // Valid array * validateInput({}); // Throws ValidationError - no URL * * @since 2.0.0 */ function validateInput(PARAMS) { if (!PARAMS) { throw new ValidationError('No parameters provided'); } if (Array.isArray(PARAMS)) { if (PARAMS.length === 0) { throw new ValidationError('Empty configuration array provided'); } PARAMS.forEach((config, index) => { if (!config.url) { throw new ValidationError(`No URL provided for config at index ${index}`); } }); } else { if (!PARAMS.url) { throw new ValidationError('No URL provided'); } } } /** * Process multiple configurations with concurrency control * * @description * Handles multiple endpoint configurations concurrently. Each configuration * is processed independently with shared concurrency limits and timing. * Results can be post-processed with a hook function. * * @throws {ValidationError} When configurations are invalid * @throws {NetworkError} When network issues occur * @throws {TimeoutError} When operations exceed timeout limits * * @example * // Process multiple API endpoints * const results = await processMultipleConfigs([ * { * url: 'https://api1.example.com/users', * data: userData, * method: 'POST' * }, * { * url: 'https://api2.example.com/events', * data: eventData, * method: 'PUT' * } * ]); * * @example * // With custom hook processing * const configs = [...]; * configs[0].hook = (results) => results.map(r => r.responses).flat(); * const flatResults = await processMultipleConfigs(configs); * * @since 2.0.0 * @param {BatchRequestConfig[]} configs - Array of configurations to process ITS RIGHT HERE!!!!! * @returns {Promise<Result>} */ async function processMultipleConfigs(configs) { const startTime = Date.now(); /** @type {BatchRequestConfig} */ const firstConfig = configs[0]; const concurrency = firstConfig?.concurrency || 10; const delay = firstConfig?.delay || 0; const verbose = firstConfig?.verbose !== false; const logFile = firstConfig?.logFile; const format = firstConfig?.format || 'json'; const hook = firstConfig?.hook; // Create logger for this operation const logger = createLogger({ verbose, showThroughput: true, logPrefix: '🌍' }); logger.start('Processing multiple endpoints', { // @ts-ignore count: configs.length, concurrency, delay: delay ? `${delay}ms` : 'none' }); const queue = new RunQueue({ maxConcurrency: concurrency }); /** @type {Result[]} */ const results = []; let reqCount = 0; let errorCount = 0; const totalCount = configs.length; for (const config of configs) { queue.add(0, async () => { if (delay > 0) { await new Promise(resolve => setTimeout(resolve, delay)); } try { reqCount++; const result = await processSingleConfig(config, false); if (Array.isArray(result)) { results.push(...result); } else { results.push(result); } logger.progress(reqCount, totalCount); } catch (error) { errorCount++; // @ts-ignore logger.error(`Config ${reqCount} failed:`, error.message); // @ts-ignore results.push({ error: error.message, config: sanitizeConfig(config) }); logger.progress(reqCount, totalCount); } }); } await queue.run(); const endTime = Date.now(); const duration = endTime - startTime; const finalResults = typeof hook === 'function' ? hook(results) : results; // Write log file if specified if (logFile) { logger.fileOperation('Writing', logFile, format); await writeLogFile(logFile, finalResults, format, verbose); } const finalStats = { responses: finalResults, duration, clockTime: prettyTime(duration), reqCount, rowCount: reqCount, rps: Math.floor(reqCount / (duration / 1000)), errors: errorCount, configCount: totalCount }; logger.complete(finalStats); // Return comprehensive stats for multiple configurations return { responses: finalResults, duration, clockTime: prettyTime(duration), reqCount, rowCount: totalCount, rps: Math.floor(reqCount / (duration / 1000)), errors: errorCount, configCount: totalCount }; } /** * Process a single configuration with streaming and batching * * @description * Main processing function for a single endpoint configuration. Handles * data streaming, batching, HTTP requests, and result aggregation. * Supports both batch and single request modes. * * @param {BatchRequestConfig} config - Configuration object defining the operation * @param {boolean} [isMainJob=true] - Whether this is the primary operation (affects logging) * @description When false, reduces logging output for multi-config operations * * @returns {Promise<Result>} Promise resolving to result object with metrics * @description Contains responses, timing, throughput, and error statistics * * @throws {ValidationError} When configuration is invalid * @throws {ConfigurationError} When configuration values are invalid * @throws {NetworkError} When network issues occur * @throws {TimeoutError} When requests exceed timeout * @throws {RetryError} When all retry attempts fail * @throws {MemoryError} When memory limits are exceeded * * @example * // Standard batch processing * const result = await processSingleConfig({ * url: 'https://api.example.com/bulk', * data: largeDataset, * batchSize: 100, * concurrency: 5 * }); * console.log(`Processed ${result.rowCount} records in ${result.clockTime}`); * * @example * // Single request mode * const result = await processSingleConfig({ * url: 'https://api.example.com/single', * data: { id: 123, name: 'test' }, * noBatch: true, * method: 'PUT' * }); * * @since 2.0.0 */ async function processSingleConfig(config, isMainJob = true) { const startTime = Date.now(); // Set defaults and validate const processedConfig = setDefaults(config); validateConfig(processedConfig); // Create logger for this operation const logger = createLogger({ verbose: processedConfig.verbose, showThroughput: true, showMemory: processedConfig.verbose && isMainJob }); // Handle shell commands for dynamic headers (execute even in dry run for testing) if (processedConfig.shell) { // @ts-ignore processedConfig.headers = await executeShellCommand(processedConfig.shell, processedConfig.headers); } if (isMainJob) { logger.start('Starting HTTP request job', sanitizeConfig(processedConfig)); } // Handle dry run mode if (processedConfig.dryRun) { if (processedConfig.dryRun === 'curl') { return await handleCurlGeneration(processedConfig, logger, isMainJob); } else { return await handleDryRun(processedConfig, logger, isMainJob); } } // Handle no-batch mode (single request) if (processedConfig.noBatch) { const result = await executeSingleRequest(processedConfig, logger); if (isMainJob) { // @ts-ignore logger.complete(result); } return result; } // Process with streaming and batching const stream = await createDataStream(processedConfig); const [responses, reqCount, rowCount, errorCount] = await processDataStream(stream, processedConfig, logger); const endTime = Date.now(); const duration = endTime - startTime; const rps = Math.floor(reqCount / (duration / 1000)); const result = { responses, duration, clockTime: prettyTime(duration), reqCount, rowCount, rps, errors: errorCount || 0, stats: getMemoryStats() }; // Write log file if specified if (processedConfig.logFile && isMainJob) { logger.fileOperation('Writing', processedConfig.logFile, processedConfig.format); if (processedConfig.verbose === undefined) processedConfig.verbose = false; await writeLogFile( processedConfig.logFile, responses, processedConfig.format, processedConfig.verbose ); } if (isMainJob) { logger.complete(result); } return result; } /** * Set default values for configuration object * * @description * Applies sensible defaults to configuration options while preserving * user-specified values. Ensures all required properties exist with * appropriate default values for production use. * * @param {BatchRequestConfig} config - Input configuration object * @description User-provided configuration that may have partial settings * * @returns {BatchRequestConfig} Complete configuration with defaults applied * @description All optional properties will have default values set * * @example * const config = setDefaults({ url: 'https://api.example.com' }); * // Returns config with batchSize: 1, concurrency: 10, retries: 3, etc. * * @example * const config = setDefaults({ * url: 'https://api.example.com', * concurrency: 20, // User override * verbose: true // User override * }); * // Returns config with user values preserved and other defaults applied * * @since 2.0.0 */ function setDefaults(config) { const defaults = { batchSize: 1, concurrency: 10, maxTasks: 25, delay: 0, verbose: false, retries: 3, retryDelay: 1000, retryOn: [408, 429, 500, 502, 503, 504, 520, 521, 522, 523, 524], timeout: 60000, keepAlive: true, method: 'POST', debug: false, highWaterMark: 16384, storeResponses: true, responseHeaders: false, forceGC: false, clone: false, noBatch: false, format: /** @type {"json"|"csv"|"ndjson"} */ ('json'), enableCookies: false, maxResponseBuffer: 1000, useStaticRetryDelay: false, enableConnectionPooling: true, dryRun: false, headers: {}, searchParams: null, bodyParams: null, transform: null, shell: null, ...config }; // Fire-and-forget mode: automatically disable response storage when retries is null // (unless explicitly overridden by user) if (defaults.retries === null && !config.hasOwnProperty('storeResponses')) { defaults.storeResponses = false; } return defaults; } /** * Validate configuration object for correctness and consistency * * @description * Performs comprehensive validation of configuration values to ensure * they are within acceptable ranges and logically consistent. Prevents * runtime errors by catching invalid configurations early. * * @param {BatchRequestConfig} config - Configuration object to validate * @description Must be a complete configuration object (after defaults applied) * * @throws {ConfigurationError} When configuration values are invalid * @throws {ValidationError} When required fields are missing * * @example * validateConfig({ * url: 'https://api.example.com', * method: 'POST', * data: [{ test: true }], * batchSize: 10, * concurrency: 5 * }); // Valid - no error thrown * * @example * validateConfig({ * url: 'https://api.example.com', * method: 'POST', * // No data provided * batchSize: -1, // Invalid * concurrency: 0 // Invalid * }); // Throws ConfigurationError * * @since 2.0.0 */ function validateConfig(config) { if (!config.url) { throw new ConfigurationError('URL is required'); } // Use optional chaining and default values for type safety const method = config.method || 'POST'; const batchSize = config.batchSize ?? 1; const concurrency = config.concurrency ?? 10; const timeout = config.timeout ?? 60000; if (!config.data && ['POST', 'PUT', 'PATCH'].includes(method.toUpperCase())) { throw new ConfigurationError(`${method} request requires data`); } if (batchSize < 0) { throw new ConfigurationError('batchSize must be non-negative'); } if (concurrency < 1) { throw new ConfigurationError('concurrency must be at least 1'); } if (timeout < 1000) { throw new ConfigurationError('timeout must be at least 1000ms'); } if (config.transform !== null && config.transform !== undefined && typeof config.transform !== 'function') { throw new ConfigurationError('transform must be a function or null'); } } /** * Execute shell command for dynamic header generation * * @description * Executes a shell command to generate dynamic header values, typically * for authentication tokens that need to be refreshed. Commonly used * with AWS CLI, gcloud, or other authentication tools. * * @param {Object} shellConfig - Shell command configuration * @param {string} shellConfig.command - Shell command to execute * @param {string} [shellConfig.header='Authorization'] - Header name to set * @param {string} [shellConfig.prefix='Bearer'] - Prefix for the header value * * @param {Object} [headers={}] - Existing headers to merge with * @description New header will be added to existing headers * * @returns {Promise<Object>} Promise resolving to updated headers object * @description Contains all original headers plus the new dynamic header * * @throws {ConfigurationError} When shell command fails or returns invalid output * * @example * // AWS token generation * const headers = await executeShellCommand({ * command: 'aws sts get-session-token --query "Credentials.SessionToken" --output text', * header: 'Authorization', * prefix: 'AWS4-HMAC-SHA256' * }, { 'Content-Type': 'application/json' }); * // Returns: { 'Content-Type': 'application/json', 'Authorization': 'AWS4-HMAC-SHA256 token...' } * * @example * // Google Cloud token * const headers = await executeShellCommand({ * command: 'gcloud auth print-access-token', * header: 'Authorization', * prefix: 'Bearer' * }); * * @since 2.0.0 */ async function executeShellCommand(shellConfig, headers = {}) { try { const commandOutput = execSync(shellConfig.command, { encoding: 'utf8' }).trim(); const headerName = shellConfig.header || 'Authorization'; const prefix = shellConfig.prefix || 'Bearer'; return { ...headers, [headerName]: `${prefix} ${commandOutput}` }; } catch (error) { // @ts-ignore throw new ConfigurationError(`Shell command failed: ${error.message}`); } } /** * Execute a single HTTP request without batching * * @description * Performs a single HTTP request without any batching logic. Used when * noBatch is true or for simple single-item operations. Creates and * destroys HTTP client for the single request. * * @param {BatchRequestConfig} config - Configuration object for the request * @description Must include url, method, and any data to send * * @param {any} logger - Logger instance for output * @description Used for verbose logging and error reporting * * @returns {Promise<Result>} Promise resolving to result object * @description Contains single response, timing, and memory statistics * * @throws {NetworkError} When network issues occur * @throws {TimeoutError} When request exceeds timeout * @throws {RetryError} When all retry attempts fail * * @example * const result = await executeSingleRequest({ * url: 'https://api.example.com/item/123', * method: 'PUT', * data: { name: 'Updated Item' }, * noBatch: true * }, logger); * console.log(result.responses[0]); // Single response object * * @since 2.0.0 */ async function executeSingleRequest(config, logger) { // Create a type-safe configuration with proper defaults const httpClientConfig = { timeout: config.timeout ?? 60000, enableConnectionPooling: config.enableConnectionPooling ?? true, keepAlive: config.keepAlive ?? true, concurrency: config.concurrency ?? 10, retries: config.retries ?? 3, retryDelay: config.retryDelay ?? 1000, retryOn: config.retryOn ?? [408, 429, 500, 502, 503, 504, 520, 521, 522, 523, 524], retryHandler: config.retryHandler || undefined, useStaticRetryDelay: config.useStaticRetryDelay ?? false, enableCookies: config.enableCookies ?? false, maxFileSize: config.maxFileSize ?? undefined, // Include required properties for BatchRequestConfig compatibility url: config.url, data: config.data, method: config.method ?? 'POST', headers: config.headers ?? {} }; const httpClient = createHttpClient(httpClientConfig); try { if (logger && logger.isVerbose()) { logger.info('Executing single request (no batching)'); } const response = await httpClient.request(config); const endTime = Date.now(); const startTime = endTime; // Single request, minimal duration const duration = endTime - startTime; const result = { responses: [response], duration, clockTime: prettyTime(duration), reqCount: 1, rowCount: Array.isArray(config.data) ? config.data.length : 1, rps: 0, errors: 0, stats: getMemoryStats() }; return result; } finally { httpClient.destroy(); } } /** * Create data stream from various input types * * @description * Converts different data input types (arrays, files, strings, streams) * into a standardized readable stream for processing. Handles JSON, JSONL, * file paths, and existing streams with appropriate transformations. * * @param {BatchRequestConfig} config - Configuration containing data source * * @returns {Promise<import('stream').Readable>} Promise resolving to readable stream * @description Stream will emit data objects ready for batching and HTTP requests * * @throws {ValidationError} When data format is invalid or unsupported * * @example * // From array * const stream = await createDataStream({ * data: [{ id: 1 }, { id: 2 }], * highWaterMark: 16384 * }); * * @example * // From file path * const stream = await createDataStream({ * data: './data.jsonl', * method: 'POST' * }); * * @example * // From existing stream * const stream = await createDataStream({ * data: fs.createReadStream('./large-file.jsonl'), * highWaterMark: 32768 * }); * * @since 2.0.0 */ async function createDataStream(config) { const { data, highWaterMark } = config; const streamProcessors = new StreamProcessors({ highWaterMark }); if (data instanceof Readable) { return data.readableObjectMode ? data : data.pipe(streamProcessors.createJSONLTransform()); } if (typeof data === 'string') { if (existsSync(path.resolve(data))) { // File path - check extension to determine if it's JSON or JSONL const filePath = path.resolve(data); const fileExt = path.extname(filePath).toLowerCase(); if (fileExt === '.json') { // Regular JSON file - read and parse as array const fileContent = require('fs').readFileSync(filePath, 'utf8'); const jsonData = JSON.parse(fileContent); return Readable.from(Array.isArray(jsonData) ? jsonData : [jsonData]); } else { // JSONL file or other text file return createReadStream(filePath, { highWaterMark }) .pipe(streamProcessors.createJSONLTransform()); } } else if (isJSONStr(data)) { // JSON string const parsed = JSON.parse(data); return Readable.from(Array.isArray(parsed) ? parsed : [parsed]); } else if (data.split('\n').every(line => line.trim() === '' || isJSONStr(line))) { // JSONL string return Readable.from(data.split('\n').filter(line => line.trim()).map(line => JSON.parse(line))); } else { throw new ValidationError('Invalid data format'); } } if (Array.isArray(data)) { return Readable.from(data); } if (typeof data === 'object' && data !== null) { return Readable.from([data]); } // For GET/HEAD/OPTIONS requests without data const method = config.method || 'POST'; if (['GET', 'HEAD', 'OPTIONS'].includes(method.toUpperCase())) { return Readable.from([{}]); // Use empty object instead of null for object mode streams } throw new ValidationError('No valid data source provided'); } /** * Process data stream with batching and HTTP requests * * @description * Main stream processing engine that handles batching, concurrency control, * HTTP requests, retry logic, and response collection. Manages memory usage * and provides backpressure control for large datasets. * * @param {import('stream').Readable} stream - Data stream to process * @description Stream should emit objects ready for HTTP requests * * @param {BatchRequestConfig} config - Configuration for processing * * * @param {any} logger - Logger instance for progress tracking * @description Used for progress updates and error reporting * * @returns {Promise<Array<any>>} Promise resolving to processing results * @description Returns [responses, reqCount, rowCount, errorCount] tuple * * @throws {MemoryError} When memory usage exceeds configured limits * @throws {NetworkError} When network issues occur during processing * @throws {TimeoutError} When requests exceed timeout limits * * @example * const [responses, reqCount, rowCount, errors] = await processDataStream( * dataStream, * { * batchSize: 100, * concurrency: 10, * maxTasks: 25, * delay: 0, * maxResponseBuffer: 1000, * storeResponses: true * }, * logger * ); * console.log(`Processed ${rowCount} records in ${reqCount} requests with ${errors} errors`); * * @since 2.0.0 */ async function processDataStream(stream, config, logger) { const { batchSize = 1, concurrency = 10, maxTasks = 25, delay = 0, maxResponseBuffer = 1000, maxMemoryUsage, storeResponses = true, forceGC = false } = config; let queue = new RunQueue({ maxConcurrency: concurrency }); const responseBuffer = storeResponses ? new CircularBuffer(maxResponseBuffer) : null; const streamProcessors = new StreamProcessors({ maxMemoryUsage }); // Create type-safe http client config with defaults const httpClientConfig = { timeout: config.timeout ?? 60000, enableConnectionPooling: config.enableConnectionPooling ?? true, keepAlive: config.keepAlive ?? true, concurrency: config.concurrency ?? 10, retries: config.retries ?? 3, retryDelay: config.retryDelay ?? 1000, retryOn: config.retryOn ?? [408, 429, 500, 502, 503, 504, 520, 521, 522, 523, 524], retryHandler: config.retryHandler || undefined, useStaticRetryDelay: config.useStaticRetryDelay ?? false, enableCookies: config.enableCookies ?? false, maxFileSize: config.maxFileSize ?? undefined, // Required properties for BatchRequestConfig compatibility url: config.url, data: config.data, method: config.method ?? 'POST', headers: config.headers ?? {} }; const httpClient = createHttpClient(httpClientConfig); let reqCount = 0; let rowCount = 0; let errorCount = 0; let batch = []; let isStreamPaused = false; // Add memory monitoring if specified if (maxMemoryUsage) { stream = stream.pipe(streamProcessors.createMemoryMonitorTransform()); } stream.on('error', error => { logger.error('Stream error:', error.message); }); stream.on('end', () => { if (logger.isVerbose()) { logger.info(`Stream ended: processed ${comma(rowCount)} records, ${comma(reqCount)} requests`); } }); // Process stream data for await (const data of stream) { const isGetRequest = ['GET', 'HEAD', 'OPTIONS'].includes((config.method || 'POST').toUpperCase()); const isEmptyGetData = isGetRequest && typeof data === 'object' && data !== null && Object.keys(data).length === 0; if (data !== null && !isEmptyGetData) { // Skip null data and empty objects for GET requests let processedData = data; // Clone data if requested to avoid mutations (do this before any transforms) if (config.clone) { processedData = typeof data === 'object' && data !== null ? JSON.parse(JSON.stringify(data)) : data; } // Apply preset transform first (if specified) if (config.preset) { try { processedData = applyPresetTransform(processedData, config.preset, config.errorHandler); } catch (error) { // @ts-ignore logger.error(`Preset transform error (${config.preset}): ${error.message}`); if (config.errorHandler && typeof config.errorHandler === 'function') { config.errorHandler(error, processedData); } else { throw error; // Preset transform errors should fail the operation } } } // Apply user transform function (if provided) - runs AFTER preset transform if (typeof config.transform === 'function') { try { const transformed = config.transform(processedData); if (transformed !== undefined) { processedData = transformed; // When not cloning, also update the original data object (but only for objects, not arrays) if (!config.clone && typeof data === 'object' && data !== null && !Array.isArray(data) && typeof transformed === 'object' && !Array.isArray(transformed)) { Object.assign(data, transformed); } } } catch (error) { // @ts-ignore logger.error(`Transform error: ${error.message}`); if (config.errorHandler && typeof config.errorHandler === 'function') { config.errorHandler(error, processedData); } else { throw error; // Transform errors should fail the operation } } } batch.push(processedData); rowCount++; } // Process batch when it reaches batchSize or handle GET requests if (batch.length >= batchSize || (data === null && batch.length > 0) || (isEmptyGetData && batch.length === 0)) { if (isEmptyGetData && batch.length === 0) { // For GET requests with no data, create a single empty batch to trigger the request addBatchToQueue([]); } else { addBatchToQueue(batch); batch = []; } } // Pause stream if queue is full if (queue.queued >= maxTasks && !isStreamPaused) { // Suppressed: Pausing stream message to avoid jittery output stream.pause(); isStreamPaused = true; await queue.run(); queue = new RunQueue({ maxConcurrency: concurrency }); // Suppressed: Resuming stream message to avoid jittery output stream.resume(); isStreamPaused = false; } } // Process any remaining batch if (batch.length > 0) { addBatchToQueue(batch); } // Wait for all requests to complete if (queue.queued > 0) { await queue.run(); } // Cleanup cleanup(); httpClient.destroy(); const responses = responseBuffer ? responseBuffer.toArray() : []; // Force garbage collection if enabled if (forceGC && global.gc) { global.gc(); } return [responses, reqCount, rowCount, errorCount]; /** * @param {Array<any>} currentBatch */ function addBatchToQueue(currentBatch) { const batchData = currentBatch.slice(); // Create copy queue.add(0, async () => { try { const requestConfig = { ...config, data: batchData.length === 0 ? undefined : (batchData.length === 1 ? batchData[0] : batchData) }; const response = await httpClient.request(requestConfig); if (responseBuffer) { responseBuffer.push(response); } reqCount++; if (delay > 0) { await new Promise(resolve => setTimeout(resolve, delay)); } // Update progress logger.progress(reqCount, Math.ceil(rowCount / (batchSize || 1)), rowCount); // Handle response callback if (config.responseHandler && typeof config.responseHandler === 'function') { config.responseHandler(response); } } catch (error) { errorCount++; // @ts-ignore logger.error('Batch processing failed:', error.message); if (config.errorHandler && typeof config.errorHandler === 'function') { config.errorHandler(error); } else { // Store error in response buffer if enabled if (responseBuffer) { // Include full error details including response body const errorDetails = { error: error.message, batch: batchData }; // Add optional error properties if they exist if (error.statusCode) errorDetails.statusCode = error.statusCode; if (error.body) errorDetails.body = error.body; if (error.url) errorDetails.url = error.url; if (error.method) errorDetails.method = error.method; if (error.timestamp) errorDetails.timestamp = error.timestamp; // @ts-ignore responseBuffer.push(errorDetails); } } } }); } function cleanup() { if (stream && typeof stream.destroy === 'function') { stream.removeAllListeners(); stream.destroy(); } } } /** * Create HTTP client with configuration * * @description * Factory function that creates and configures an HTTP client with * connection pooling, retry strategies, cookie handling, and form data * support based on the provided configuration. * * @param {BatchRequestConfig} config - Configuration for HTTP client * * @returns {HttpClient} Configured HTTP client instance * @description Client with connection pooling, retry logic, and session management * * @example * const client = createHttpClient({ * timeout: 30000, * enableConnectionPooling: true, * concurrency: 10, * retries: 3, * retryDelay: 1000, * enableCookies: true * }); * * @since 2.0.0 */ function createHttpClient(config) { const concurrency = config.concurrency ?? 10; const clientOptions = { timeout: config.timeout, keepAlive: config.enableConnectionPooling ? config.keepAlive : false, maxSockets: concurrency * 2, maxFreeSockets: concurrency, retry: { maxRetries: config.retries, baseDelay: config.retryDelay, retryOn: config.retryOn, retryHandler: config.retryHandler, useStaticDelay: config.useStaticRetryDelay, staticRetryDelay: config.retryDelay }, cookies: { enabled: config.enableCookies }, formData: { maxFileSize: config.maxFileSize } }; return new HttpClient(clientOptions); } /** * Write results to log file in specified format * * @description * Writes operation results to a file in the specified format (JSON, CSV, NDJSON). * Creates parent directories if needed and handles streaming for large datasets. * * @param {string} filePath - Destination file path * @description File will be created if it doesn't exist, overwritten if it does * * @param {any} data - Data to write to file * @description Can be array of responses, single response, or any serializable data * * @param {string} format - Output format * @description Supported: 'json', 'csv', 'ndjson' * * @param {boolean} verbose - Enable verbose logging * @description Controls whether success/error messages are displayed * * @throws {Error} When file write operations fail * * @example * await writeLogFile('./results.json', responses, 'json', true); * // Writes responses array as JSON with success logging * * @example * await writeLogFile('./output.csv', responses, 'csv', false); * // Writes responses as CSV without logging * * @example * await writeLogFile('./stream.ndjson', largeDataset, 'ndjson', true); * // Writes as newline-delimited JSON for streaming * * @since 2.0.0 */ async function writeLogFile(filePath, data, format, verbose) { try { await makeExist(filePath); const streamProcessors = new StreamProcessors(); const dataStream = Readable.from(Array.isArray(data) ? data : [data]); await streamProcessors.streamToFile(dataStream, filePath, format); if (verbose) { const logger = createLogger({ verbose }); logger.success(`Log written to ${filePath}`); } } catch (error) { const logger = createLogger({ verbose: true }); // @ts-ignore logger.error(`Failed to write log file: ${error.message}`); } } /** * Sanitize configuration for logging (remove sensitive data) * * @description * Creates a copy of configuration object with sensitive information removed * or masked. Prevents secrets from appearing in logs while preserving * useful debugging information. * * @param {BatchRequestConfig} config - Configuration object to sanitize * @description Original object is not modified * * @returns {BatchRequestConfig} Sanitized configuration object * @description Copy with sensitive headers masked and data removed * * @example * const clean = sanitizeConfig({ * url: 'https://api.example.com', * headers: { * 'Authorization': 'Bearer secret123', * 'Content-Type': 'application/json', * 'X-API-Key': 'apikey456' * }, * data: [{ sensitive: 'info' }] * }); * // Returns: * // { * // url: 'https://api.example.com', * // headers: { * // 'Authorization': '[REDACTED]', * // 'Content-Type': 'application/json', * // 'X-API-Key': '[REDACTED]' * // } * // // data property removed * // } * * @since 2.0.0 */ function sanitizeConfig(config) { const sanitized = { ...config }; delete sanitized.data; // Mask sensitive headers if (sanitized.headers) { const sensitiveHeaders = ['authorization', 'cookie', 'x-api-key']; sanitized.headers = { ...sanitized.headers }; Object.keys(sanitized.headers).forEach(key => { if (sensitiveHeaders.includes(key.toLowerCase())) { // @ts-ignore sanitized.headers[key] = '[REDACTED]'; } }); } return sanitized; } /** * Get current memory usage statistics * * @description * Retrieves current Node.js process memory usage and formats it * into readable megabyte values for monitoring and reporting. * * @returns {Object} Memory statistics object * @property {number} heapUsed - Used heap memory in MB * @property {number} heapTotal - Total heap memory in MB * @property {number} external - External memory in MB * @property {number} rss - Resident set size in MB * * @example * const stats = getMemoryStats(); * console.log(`Heap: ${stats.heapUsed}/${stats.heapTotal}MB, RSS: ${stats.rss}MB`); * // Output: "Heap: 25.5/50.2MB, RSS: 75.8MB" * * @since 2.0.0 */ function getMemoryStats() { const memUsage = process.memoryUsage(); return { heapUsed: Math.round(memUsage.heapUsed / 1024 / 1024 * 100) / 100, // MB heapTotal: Math.round(memUsage.heapTotal / 1024 / 1024 * 100) / 100, // MB external: Math.round(memUsage.external / 1024 / 1024 * 100) / 100, // MB rss: Math.round(memUsage.rss / 1024 / 1024 * 100) / 100 // MB }; } /** * Handle curl generation mode - generate curl commands instead of making requests * * @description * Processes configuration to generate curl commands that would be equivalent * to the HTTP requests that would be made. Useful for debugging, sharing, * and understanding what requests would be sent. * * @param {BatchRequestConfig} config - Configuration object for curl generation * @param {any} logger - Logger instance for output * @param {boolean} isMainJob - Whether this is the primary operation * @returns {Promise<Result>} Promise resolving to result with curl commands * * @example * const result = await handleCurlGeneration(config, logger, true); * console.log(result.responses[0]); // "curl -X POST https://api.example.com/data ..." * * @since 2.0.0 */ async function handleCurlGeneration(config, logger, isMainJob) { const startTime = Date.now(); if (logger.isVerbose()) { logger.info('Generating curl commands - no actual requests will be made'); } // Create data stream to process items const stream = await createDataStream(config); let rowCount = 0; const curlCommands = []; // Process data items and generate curl commands for await (const data of stream) { if (data !== null) { rowCount++; let processedData = data; // Apply transform for curl generation if provided if (typeof config.transform === 'function') { try { if (config.clone) { processedData = typeof data === 'object' && data !== null ? JSON.parse(JSON.stringify(data)) : data; } const transformed = config.transform(processedData); if (transformed !== undefined) { processedData = transformed; } } catch (error) { logger.error(`Transform error in curl generation: ${error.message}`); throw error; } } // Generate curl command for this data item const curlCommand = generateCurlCommand(config, processedData); curlCommands.push(curlCommand); } } const endTime = Date.now(); const duration = endTime - startTime; const result = { responses: curlCommands, duration, clockTime: prettyTime(duration), reqCount: 0, // No actual requests made rowCount, rps: 0, errors: 0, stats: getMemoryStats() }; if (isMainJob) { logger.complete(result); } return result; } /** * Generate a curl command for a given configuration and data * * @description * Creates a curl command string that represents the HTTP request * that would be made with the given configuration and data. * * @param {BatchRequestConfig} config - Request configuration * @param {any} data - Data payload for the request * @returns {string} Curl command string * * @since 2.0.0 */ function generateCurlCommand(config, data) { const parts = ['curl']; // Add method if (config.method && config.method !== 'GET') { parts.push(`-X ${config.method}`); } // Add headers if (config.headers) { Object.entries(config.headers).forEach(([key, value]) => { parts.push(`-H "${key}: ${value}"`); }); } // Add data payload for POST/PUT/PATCH methods if (['POST', 'PUT', 'PATCH'].includes(config.method?.toUpperCase()) && data) { const payload = typeof data === 'string' ? data : JSON.stringify(data); parts.push(`-d '${payload}'`); // Add content-type if not already specified const hasContentType = config.headers && Object.keys(config.headers).some(key => key.toLowerCase() === 'content-type'); if (!hasContentType) { parts.push(`-H "Content-Type: application/json"`); } } // Add URL with search params let url = config.url; if (config.searchParams) { const params = new URLSearchParams(config.searchParams); url += `?${params.toString()}`; } parts.push(`"${url}"`); return parts.join(' '); } /** * Handle dry run mode - simulate operations without making actual requests * * @description * Processes configuration in dry run mode, counting data items and simulating * request processing without making actual HTTP calls. Useful for testing * configuration validity and estimating request counts. * * @param {BatchRequestConfig} config - Configuration object for dry run * @param {any} logger - Logger instance for output * @param {boolean} isMainJob - Whether this is the primary operation * @returns {Promise<Result>} Promise resolving to simulated result * * @example * const result = await handleDryRun(config, logger, true); * console.log(`Would make ${result.reqCount} requests for ${result.rowCount} items`); * * @since 2.0.0 */ async function handleDryRun(config, logger, isMainJob) { const startTime = Date.now(); if (logger.isVerbose()) { logger.info('Running in dry run mode - no actual requests will be made'); } // Create data stream to count items const stream = await createDataStream(config); let rowCount = 0; const responses = []; // Count data items and simulate batching for await (const data of stream) { if (data !== null) { rowCount++; let processedData = data; // Clone data if requested to avoid mutations (do this before any transforms) if (config.clone) { processedData = typeof data === 'object' && data !== null ? JSON.parse(JSON.stringify(data)) : data; } // Apply preset transform first (if specified) if (config.preset) { try { processedData = applyPresetTransform(processedData, config.preset, config.errorHandler); } catch (error) { logger.error(`Preset transform error in dry run (${config.preset}): ${error.message}`); if (config.errorHandler && typeof config.errorHandler === 'function') { config.errorHandler(error, processedData); } else { throw error; // Preset transform errors should fail the operation } } }