@naturalcycles/nodejs-lib
Version:
Standard library for Node.js
121 lines (120 loc) • 4.49 kB
JavaScript
import { Transform } from 'node:stream';
import { Worker } from 'node:worker_threads';
import { _range } from '@naturalcycles/js-lib/array/range.js';
import { pDefer } from '@naturalcycles/js-lib/promise/pDefer.js';
const workerProxyFilePath = `${import.meta.dirname}/workerClassProxy.js`;
/**
* Spawns a pool of Workers (threads).
* Distributes (using round-robin, equally) all inputs over Workers.
* Workers emit 1 output for each 1 input.
* Output of Workers is passed down the stream. Order is RANDOM (since it's a multi-threaded environment).
*/
export function transformMultiThreaded(opt) {
const { workerFile, poolSize = 2, workerData } = opt;
const maxConcurrency = opt.concurrency || poolSize;
const highWaterMark = Math.max(16, maxConcurrency);
console.log({
poolSize,
maxConcurrency,
highWaterMark,
});
const workerDonePromises = [];
const messageDonePromises = {};
let index = -1; // input chunk index, will start from 0
// Concurrency control
let inFlight = 0;
let blockedCallback = null;
let flushBlocked = null;
const workers = _range(0, poolSize).map(workerIndex => {
workerDonePromises.push(pDefer());
const worker = new Worker(workerProxyFilePath, {
workerData: {
workerIndex,
workerFile, // pass it, so workerProxy can require() it
...workerData,
},
});
worker.on('error', err => {
console.error(`Worker ${workerIndex} error`, err);
workerDonePromises[workerIndex].reject(err);
});
worker.on('exit', _exitCode => {
workerDonePromises[workerIndex].resolve(undefined);
});
worker.on('message', (out) => {
if (out.error) {
messageDonePromises[out.index].reject(out.error);
}
else {
messageDonePromises[out.index].resolve(out.payload);
}
});
return worker;
});
return new Transform({
objectMode: true,
readableHighWaterMark: highWaterMark,
writableHighWaterMark: highWaterMark,
async transform(chunk, _, cb) {
const currentIndex = ++index;
inFlight++;
// Apply backpressure if at capacity, otherwise request more input
if (inFlight < maxConcurrency) {
cb();
}
else {
blockedCallback = cb;
}
// Create the unresolved promise (to await)
messageDonePromises[currentIndex] = pDefer();
const worker = workers[currentIndex % poolSize]; // round-robin
worker.postMessage({
index: currentIndex,
payload: chunk,
});
try {
const out = await messageDonePromises[currentIndex];
this.push(out);
}
catch (err) {
// Currently we only support ErrorMode.SUPPRESS
// Error is logged and output continues
console.error(err);
}
finally {
delete messageDonePromises[currentIndex];
inFlight--;
// Release blocked callback if we now have capacity
if (blockedCallback && inFlight < maxConcurrency) {
const pendingCb = blockedCallback;
blockedCallback = null;
pendingCb();
}
// Trigger flush completion if all done
if (inFlight === 0 && flushBlocked) {
flushBlocked.resolve();
}
}
},
async flush(cb) {
// Wait for all in-flight operations to complete
if (inFlight > 0) {
flushBlocked = pDefer();
await flushBlocked;
}
try {
// Push null (complete) to all workers
for (const worker of workers) {
worker.postMessage(null);
}
console.log(`transformMultiThreaded.flush is waiting for all workers to be done`);
await Promise.all(workerDonePromises);
console.log(`transformMultiThreaded.flush all workers done`);
cb();
}
catch (err) {
cb(err);
}
},
});
}