arehs
Version:
The arehs ensures the best possible large batch processing, which is oriented towards event-driven chunk processing.
215 lines (214 loc) • 7.74 kB
JavaScript
import { EventEmitter } from 'events';
import { ProcessStatus } from './types';
export class Arehs {
data;
results;
concurrency;
inFlightTasks;
processedEntries;
processor;
eventEmitter;
promiseExecution;
timeout;
error;
allowStopOnFailure;
retryLimitCount;
/**
* Constructor that initializes an instance of the class.
* Takes input data (data), a parallelism limit (concurrency), and a data processing function (processor),
* and a timeout (timeout) in milliseconds.
*
* @param data
* @param concurrency
* @param processor
* @param timeout
*/
constructor(data, concurrency, processor, timeout) {
this.data = data;
this.results = [];
this.concurrency = concurrency;
this.inFlightTasks = 0;
this.processedEntries = 0;
this.processor = processor;
this.eventEmitter = new EventEmitter();
this.promiseExecution = null;
this.timeout = timeout;
this.error = null;
this.allowStopOnFailure = false;
this.retryLimitCount = 0;
}
/**
* The purpose of the create method is to create an Arehs instance from a specific array of data.
*
* @param data
*/
static create(data) {
return new this(data, 10, () => Promise.resolve({}), 0);
}
/**
* Methods that set the value for parallelism and return the current instance.
*
* @param concurrency
*/
withConcurrency(concurrency) {
this.concurrency = concurrency;
return this;
}
/**
* Set the timeout time.
* The default value is 0. If it's greater than 0, the option works, and an error is thrown if the operation takes longer than the timeout time(ms).
*
* @param ms
*/
timeoutLimit(ms = 0) {
if (ms < 0) {
throw new Error('The parameter for timeoutLimit must be set to a value greater than 0.');
}
this.timeout = ms;
return this;
}
/**
* Set whether to stop on failure.
*
* @param stopOnFailure
*/
stopOnFailure(stopOnFailure) {
this.allowStopOnFailure = stopOnFailure;
return this;
}
/**
* Set a limit on the number of retries on failure.
*
* @param retryLimit
*/
retryLimit(retryLimit) {
this.retryLimitCount = retryLimit;
return this;
}
/**
* Calling the mapAsync function starts the process of asynchronously processing the input data and returning the results.
* If the stopOnFailure option is set to true, the function stops processing and emits appropriate events.
* This can be useful for handling transient errors or ensuring data processing resilience.
* Also, if the retryLimit option is greater than 0, you can set a limit on the number of retries on failure.
*
* @param processor The function responsible for processing each data item. If allowStopOnFailure is true, retry logic is applied.
* @returns A Promise that resolves to an array of results after processing all data items.
*/
mapAsync(processor) {
const retryableProcessor = async (data) => {
let attempts = 0;
while (true) {
try {
return await processor(data);
}
catch (error) {
if (this.retryLimitCount > 0 && attempts < this.retryLimitCount) {
attempts++;
console.error(`Error occurred (${attempts}/${this.retryLimitCount}):`, error);
}
else {
if (this.allowStopOnFailure) {
this.inFlightTasks = 0;
this.processedEntries = 0;
while (this.data.length)
this.data.pop();
this.eventEmitter.emit(ProcessStatus.ERROR, () => Promise.reject(error));
this.eventEmitter.emit(ProcessStatus.TASK_COMPLETED);
this.eventEmitter.emit(ProcessStatus.FINISH);
}
throw error;
}
}
}
};
this.processor = this.allowStopOnFailure ? retryableProcessor : processor;
return this._executeProcess();
}
/**
* Method that waits for the currently in-progress task to not exceed the concurrency limit.
* When the task is complete, raise the TASK_COMPLETED event.
*
* @private
*/
_waitForTaskCompletion() {
if (this.inFlightTasks >= this.concurrency) {
return new Promise(resolve => {
this.eventEmitter.once(ProcessStatus.TASK_COMPLETED, resolve);
});
}
else {
return Promise.resolve();
}
}
/**
* Runs an asynchronous task that processes each data item and stores the results in the results array.
* When the task is complete, raise the TASK_COMPLETED event.
*
* @param data
* @private
*/
async _executeTask(data) {
try {
this.inFlightTasks++;
const resultPromise = this.processor(data);
const operations = [resultPromise];
if (this.timeout > 0) {
const timeoutPromise = new Promise((_, reject) => {
setTimeout(() => {
reject(new Error(`The current task has exceeded the ${this.timeout}ms. `));
}, this.timeout);
});
operations.push(timeoutPromise);
}
const result = await Promise.race(operations);
this.results.push(result);
}
catch (error) {
this.error = error;
console.error('_processRecord error:', error);
this.eventEmitter.emit(ProcessStatus.ERROR, () => Promise.reject(error));
}
finally {
this.inFlightTasks--;
this.processedEntries++;
this.eventEmitter.emit(ProcessStatus.TASK_COMPLETED);
if (this.inFlightTasks === 0 && this.processedEntries === this.data.length) {
this.eventEmitter.emit(ProcessStatus.FINISH);
}
}
}
/**
* A method that executes an asynchronous operation and collects the result.
* If a promiseExecution already exists, it returns that promise
* otherwise, it creates a new promise to start the asynchronous operation.
*
* @private
*/
_executeProcess() {
if (this.data.length === 0) {
this.eventEmitter.emit(ProcessStatus.TASK_COMPLETED);
this.eventEmitter.emit(ProcessStatus.FINISH);
return Promise.resolve([]);
}
if (this.promiseExecution !== null) {
return this.promiseExecution;
}
this.promiseExecution = new Promise((resolve, reject) => {
const executeTasks = async () => {
try {
for (const element of this.data) {
await this._waitForTaskCompletion();
this._executeTask(element);
}
this.eventEmitter.once(ProcessStatus.FINISH, () => resolve(this.results));
this.eventEmitter.once(ProcessStatus.ERROR, () => reject(this.error));
}
catch (error) {
console.error('_executeProcess: ', error);
}
};
executeTasks();
});
return this.promiseExecution;
}
}