@naturalcycles/nodejs-lib
Version:
Standard library for Node.js
190 lines (189 loc) • 7.04 kB
JavaScript
import { Transform } from 'node:stream';
import { _hc } from '@naturalcycles/js-lib';
import { _since } from '@naturalcycles/js-lib/datetime';
import { _anyToError, _assert, ErrorMode } from '@naturalcycles/js-lib/error';
import { createCommonLoggerAtLevel } from '@naturalcycles/js-lib/log';
import { pDefer } from '@naturalcycles/js-lib/promise/pDefer.js';
import { _stringify } from '@naturalcycles/js-lib/string';
import { END, SKIP } from '@naturalcycles/js-lib/types';
import { yellow } from '../../colors/colors.js';
import { PIPELINE_GRACEFUL_ABORT } from '../stream.util.js';
const WARMUP_CHECK_INTERVAL_MS = 1000;
/**
* Like transformMap, but with native concurrency control (no through2-concurrent dependency)
* and support for gradual warmup.
*
* @experimental
*/
export function transformMap(mapper, opt = {}) {
const { concurrency: maxConcurrency = 16, warmupSeconds = 0, predicate, asyncPredicate, errorMode = ErrorMode.THROW_IMMEDIATELY, onError, onDone, metric = 'stream', signal, objectMode = true, highWaterMark = 64, } = opt;
const warmupMs = warmupSeconds * 1000;
const logger = createCommonLoggerAtLevel(opt.logger, opt.logLevel);
// Stats
let started = 0;
let index = -1;
let countOut = 0;
let isSettled = false;
let ok = true;
let errors = 0;
const collectedErrors = [];
// Concurrency control - single counter, single callback for backpressure
let inFlight = 0;
let blockedCallback = null;
let flushBlocked = null;
// Warmup - cached concurrency to reduce Date.now() syscalls
let warmupComplete = warmupSeconds <= 0 || maxConcurrency <= 1;
let concurrency = warmupComplete ? maxConcurrency : 1;
let lastWarmupCheck = 0;
return new Transform({
objectMode,
readableHighWaterMark: highWaterMark,
writableHighWaterMark: highWaterMark,
async transform(chunk, _, cb) {
// Initialize start time on first item
if (started === 0) {
started = Date.now();
lastWarmupCheck = started;
}
if (isSettled)
return cb();
const currentIndex = ++index;
inFlight++;
if (!warmupComplete) {
updateConcurrency();
}
// Apply backpressure if at capacity, otherwise request more input
if (inFlight < concurrency) {
cb();
}
else {
blockedCallback = cb;
}
try {
const res = await mapper(chunk, currentIndex);
if (isSettled)
return;
if (res === END) {
isSettled = true;
logger.log(`transformMap2 END received at index ${currentIndex}`);
_assert(signal, 'signal is required when using END');
signal.abort(new Error(PIPELINE_GRACEFUL_ABORT));
return;
}
if (res === SKIP)
return;
let shouldPush = true;
if (predicate) {
shouldPush = predicate(res, currentIndex);
}
else if (asyncPredicate) {
shouldPush = (await asyncPredicate(res, currentIndex)) && !isSettled;
}
if (shouldPush) {
countOut++;
this.push(res);
}
}
catch (err) {
logger.error(err);
errors++;
logErrorStats();
if (onError) {
try {
onError(_anyToError(err), chunk);
}
catch { }
}
if (errorMode === ErrorMode.THROW_IMMEDIATELY) {
isSettled = true;
ok = false;
await callOnDone();
this.destroy(_anyToError(err));
return;
}
if (errorMode === ErrorMode.THROW_AGGREGATED) {
collectedErrors.push(_anyToError(err));
}
}
finally {
inFlight--;
// Release blocked callback if we now have capacity
if (blockedCallback && inFlight < concurrency) {
const pendingCb = blockedCallback;
blockedCallback = null;
pendingCb();
}
// Trigger flush completion if all done
if (inFlight === 0 && flushBlocked) {
flushBlocked.resolve();
}
}
},
async flush(cb) {
// Wait for all in-flight operations to complete
if (inFlight > 0) {
flushBlocked = pDefer();
await flushBlocked;
}
logErrorStats(true);
await callOnDone();
if (collectedErrors.length) {
cb(new AggregateError(collectedErrors, `transformMap2 resulted in ${collectedErrors.length} error(s)`));
}
else {
cb();
}
},
});
function updateConcurrency() {
const now = Date.now();
if (now - lastWarmupCheck < WARMUP_CHECK_INTERVAL_MS)
return;
lastWarmupCheck = now;
const elapsed = now - started;
if (elapsed >= warmupMs) {
warmupComplete = true;
concurrency = maxConcurrency;
logger.log(`transformMap2: warmup complete in ${_since(started)}`);
return;
}
const progress = elapsed / warmupMs;
concurrency = Math.max(1, Math.floor(1 + (maxConcurrency - 1) * progress));
}
function logErrorStats(final = false) {
if (!errors)
return;
logger.log(`${metric} ${final ? 'final ' : ''}errors: ${yellow(errors)}`);
}
async function callOnDone() {
try {
await onDone?.({
ok: collectedErrors.length === 0 && ok,
collectedErrors,
countErrors: errors,
countIn: index + 1,
countOut,
started,
});
}
catch (err) {
logger.error(err);
}
}
}
/**
* Renders TransformMapStatsSummary into a friendly string,
* to be used e.g in Github Actions summary or Slack.
*/
export function transformMapStatsSummary(summary) {
const { countIn, countOut, countErrors, started, name = 'Transform', extra = {} } = summary;
return [
`### ${name} summary\n`,
`${_since(started)} spent`,
`${_hc(countIn)} / ${_hc(countOut)} row(s) in / out`,
countErrors ? `${countErrors} error(s)` : '',
...Object.entries(extra).map(([k, v]) => `${k}: ${_stringify(v)}`),
]
.filter(Boolean)
.join('\n');
}