UNPKG

arehs

Version:

The arehs ensures the best possible large batch processing, which is oriented towards event-driven chunk processing.

215 lines (214 loc) 7.74 kB
import { EventEmitter } from 'events'; import { ProcessStatus } from './types'; export class Arehs { data; results; concurrency; inFlightTasks; processedEntries; processor; eventEmitter; promiseExecution; timeout; error; allowStopOnFailure; retryLimitCount; /** * Constructor that initializes an instance of the class. * Takes input data (data), a parallelism limit (concurrency), and a data processing function (processor), * and a timeout (timeout) in milliseconds. * * @param data * @param concurrency * @param processor * @param timeout */ constructor(data, concurrency, processor, timeout) { this.data = data; this.results = []; this.concurrency = concurrency; this.inFlightTasks = 0; this.processedEntries = 0; this.processor = processor; this.eventEmitter = new EventEmitter(); this.promiseExecution = null; this.timeout = timeout; this.error = null; this.allowStopOnFailure = false; this.retryLimitCount = 0; } /** * The purpose of the create method is to create an Arehs instance from a specific array of data. * * @param data */ static create(data) { return new this(data, 10, () => Promise.resolve({}), 0); } /** * Methods that set the value for parallelism and return the current instance. * * @param concurrency */ withConcurrency(concurrency) { this.concurrency = concurrency; return this; } /** * Set the timeout time. * The default value is 0. If it's greater than 0, the option works, and an error is thrown if the operation takes longer than the timeout time(ms). * * @param ms */ timeoutLimit(ms = 0) { if (ms < 0) { throw new Error('The parameter for timeoutLimit must be set to a value greater than 0.'); } this.timeout = ms; return this; } /** * Set whether to stop on failure. * * @param stopOnFailure */ stopOnFailure(stopOnFailure) { this.allowStopOnFailure = stopOnFailure; return this; } /** * Set a limit on the number of retries on failure. * * @param retryLimit */ retryLimit(retryLimit) { this.retryLimitCount = retryLimit; return this; } /** * Calling the mapAsync function starts the process of asynchronously processing the input data and returning the results. * If the stopOnFailure option is set to true, the function stops processing and emits appropriate events. * This can be useful for handling transient errors or ensuring data processing resilience. * Also, if the retryLimit option is greater than 0, you can set a limit on the number of retries on failure. * * @param processor The function responsible for processing each data item. If allowStopOnFailure is true, retry logic is applied. * @returns A Promise that resolves to an array of results after processing all data items. */ mapAsync(processor) { const retryableProcessor = async (data) => { let attempts = 0; while (true) { try { return await processor(data); } catch (error) { if (this.retryLimitCount > 0 && attempts < this.retryLimitCount) { attempts++; console.error(`Error occurred (${attempts}/${this.retryLimitCount}):`, error); } else { if (this.allowStopOnFailure) { this.inFlightTasks = 0; this.processedEntries = 0; while (this.data.length) this.data.pop(); this.eventEmitter.emit(ProcessStatus.ERROR, () => Promise.reject(error)); this.eventEmitter.emit(ProcessStatus.TASK_COMPLETED); this.eventEmitter.emit(ProcessStatus.FINISH); } throw error; } } } }; this.processor = this.allowStopOnFailure ? retryableProcessor : processor; return this._executeProcess(); } /** * Method that waits for the currently in-progress task to not exceed the concurrency limit. * When the task is complete, raise the TASK_COMPLETED event. * * @private */ _waitForTaskCompletion() { if (this.inFlightTasks >= this.concurrency) { return new Promise(resolve => { this.eventEmitter.once(ProcessStatus.TASK_COMPLETED, resolve); }); } else { return Promise.resolve(); } } /** * Runs an asynchronous task that processes each data item and stores the results in the results array. * When the task is complete, raise the TASK_COMPLETED event. * * @param data * @private */ async _executeTask(data) { try { this.inFlightTasks++; const resultPromise = this.processor(data); const operations = [resultPromise]; if (this.timeout > 0) { const timeoutPromise = new Promise((_, reject) => { setTimeout(() => { reject(new Error(`The current task has exceeded the ${this.timeout}ms. `)); }, this.timeout); }); operations.push(timeoutPromise); } const result = await Promise.race(operations); this.results.push(result); } catch (error) { this.error = error; console.error('_processRecord error:', error); this.eventEmitter.emit(ProcessStatus.ERROR, () => Promise.reject(error)); } finally { this.inFlightTasks--; this.processedEntries++; this.eventEmitter.emit(ProcessStatus.TASK_COMPLETED); if (this.inFlightTasks === 0 && this.processedEntries === this.data.length) { this.eventEmitter.emit(ProcessStatus.FINISH); } } } /** * A method that executes an asynchronous operation and collects the result. * If a promiseExecution already exists, it returns that promise * otherwise, it creates a new promise to start the asynchronous operation. * * @private */ _executeProcess() { if (this.data.length === 0) { this.eventEmitter.emit(ProcessStatus.TASK_COMPLETED); this.eventEmitter.emit(ProcessStatus.FINISH); return Promise.resolve([]); } if (this.promiseExecution !== null) { return this.promiseExecution; } this.promiseExecution = new Promise((resolve, reject) => { const executeTasks = async () => { try { for (const element of this.data) { await this._waitForTaskCompletion(); this._executeTask(element); } this.eventEmitter.once(ProcessStatus.FINISH, () => resolve(this.results)); this.eventEmitter.once(ProcessStatus.ERROR, () => reject(this.error)); } catch (error) { console.error('_executeProcess: ', error); } }; executeTasks(); }); return this.promiseExecution; } }