@cloudant/couchbackup
Version:
CouchBackup - command-line backup utility for Cloudant/CouchDB
390 lines (368 loc) • 13.9 kB
JavaScript
// Copyright © 2023, 2025 IBM Corp. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
const { Duplex, PassThrough, Writable, getDefaultHighWaterMark, setDefaultHighWaterMark } = require('node:stream');
const debug = require('debug');
/**
* Input: stream of elements
* Output: stream of arrays of batchSize elements each
*/
class BatchingStream extends Duplex {
// The buffer of elements to batch
elementsToBatch = [];
// The current batch ID
batchId = 0;
// Flag whether the Readable side is currently draining
isReadableDraining = true;
// Flag whether the writable side is complete
isWritableComplete = false;
/**
* Make a new BatchingStream with the given output batch
* size and whether it is accepting arrays for rebatching
* or single elements and buffering the set number of batches.
*
* @param {number} batchSize output batch (array) size
* @param {boolean} rebatch true to accept arrays and resize them (defaults to false to accept single items)
* @param {number} batchHighWaterMark the number of batches to buffer before applying upstream back-pressure
*/
constructor(batchSize, rebatch = false, batchHighWaterMark = 1) {
// This Duplex stream is always objectMode and doesn't use the stream
// buffers. It does use an internal buffer of elements for batching, which
// holds up to 1 batch in memory.
// The Writable side of this Duplex is written to by the element supplier.
// The Readable side of this Duplex is read by the batch consumer.
super({ objectMode: true, readableHighWaterMark: 0, writableHighWaterMark: 0 });
// Logging config
this.log = debug((`couchbackup:transform:${rebatch ? 're' : ''}batch`));
this.log(`Batching to size ${batchSize}`);
this.batchSize = batchSize;
this.rebatch = rebatch;
this.elementHighWaterMark = batchHighWaterMark * this.batchSize;
}
/**
* Check the available elementsToBatch and if the downstream consumer is
* accepting make and push as many batches as possible.
*
* This will not push if the Readable is not draining (downstream back-pressure).
* Batches will be pushed if:
* 1. The Readable is draining and there are at least batch size elements
* 2. The Readable is draining and there will be no new elements (the Writable is complete)
* and there are any elements available.
* Condition 2 allows for a smaller sized partial final batch.
*/
tryPushingBatches() {
this.log('Try to push batches.',
`Available elements:${this.elementsToBatch.length}`,
`Readable draining:${this.isReadableDraining}`,
`Writable complete:${this.isWritableComplete}`);
while (this.isReadableDraining &&
(this.elementsToBatch.length >= this.batchSize ||
(this.isWritableComplete && this.elementsToBatch.length > 0))) {
// Splice up to batchSize elements from the available elements
const batch = this.elementsToBatch.splice(0, this.batchSize);
this.log(`Writing batch ${this.batchId} with ${batch.length} elements.`);
// Increment the batch ID ready for the next batch
this.batchId++;
// push the batch downstream
if (!this.push(batch)) {
// There was back-pressure from downstream.
// Unset the draining flag and break the loop.
this.isReadableDraining = false;
break;
}
}
if (this.elementsToBatch.length < this.batchSize) {
// We've drained the buffer, release upstream.
if (this.pendingCallback) {
this.log('Unblocking after downstream reads.');
this.pendingCallback();
this.pendingCallback = null;
}
}
if (this.elementsToBatch.length === 0 && this.isWritableComplete) {
this.log('No further elements, signalling EOF.');
this.push(null);
}
}
/**
* Implementation of _read.
* The Duplex.read is called when downstream can accept more data.
* That in turn calls this _read implementation.
*
* @param {number} size ignored for objectMode
*/
_read(size) {
// Downstream asked for data set the draining flag.
this.isReadableDraining = true;
// Push any available batches.
this.tryPushingBatches();
}
/**
* Implementation of _write that accepts elements and
* adds them to an array of elementsToBatch.
* If the size of elementsToBatch exceeds the configured
* high water mark then the element supplier receives back-pressure
* via a delayed callback.
*
* @param {*} element the element to add to a batch
* @param {string} encoding ignored (objects are passed as-is)
* @param {function} callback called back when elementsToBatch is not too full
*/
_write(element, encoding, callback) {
if (!this.rebatch) {
// If we're not rebatching we're dealing with a single element
// but the push is cleaner if we can spread, so wrap in an array.
element = [element];
}
if (this.elementsToBatch.push(...element) >= this.elementHighWaterMark) {
// Delay callback as we have more than 1 batch buffered
// Downstream cannot accept more batches, delay the
// callback to back-pressure our element supplier until
// after the next downstream read.
this.log(`Back pressure after batch ${this.batchId}`);
this.pendingCallback = callback;
// If there are enough elements we must try to push batches
// to satisfy the Readable contract which will not call read
// again until after a push in the event that no data was available.
this.tryPushingBatches();
} else {
// Callback immediately if there are fewer elements
callback();
}
}
/**
* Implementation of _final to ensure that any partial batches
* remaining when the supplying stream ends are pushed downstream.
*
* @param {function} callback
*/
_final(callback) {
this.log('Flushing batch transform.');
// Set the writable complete flag
this.isWritableComplete = true;
// Try to push batches
this.tryPushingBatches();
callback();
}
}
class DelegateWritable extends Writable {
/**
* A Writable that delegates to another writable wrapping it in some
* helpful operations and handling "ending" of special streams like
* process.stdout.
*
* @param {string} name - the name of this DelegateWritable for logging
* @param {Writable} targetWritable - the Writable stream to write to
* @param {function} lastChunkFn - a no-args function to call to get a final chunk to write
* @param {function} chunkMapFn - a function(chunk) that can transform/map a chunk before writing
* @param {function} postWriteFn - a function(chunk) that can perform an action after a write completes
*/
constructor(name, targetWritable, lastChunkFn, chunkMapFn, postWriteFn) {
super({ objectMode: true });
this.name = name;
this.targetWritable = targetWritable;
this.lastChunkFn = lastChunkFn;
this.chunkMapFn = chunkMapFn;
this.postWriteFn = postWriteFn;
this.log = debug((`couchbackup:transform:delegate:${name}`));
}
_write(chunk, encoding, callback) {
const toWrite = (this.chunkMapFn) ? this.chunkMapFn(chunk) : chunk;
if (!this.targetWritable.destroyed) {
this.targetWritable.write(toWrite, encoding, (err) => {
if (!err) {
this.log('completed target chunk write');
if (this.postWriteFn) {
this.postWriteFn(chunk);
}
}
callback(err);
});
} else {
// Avoid write after destroy errors
this.log('supressing write after destroy error');
callback();
}
}
_final(callback) {
this.log('Finalizing');
const lastChunk = (this.lastChunkFn && this.lastChunkFn()) || null;
if (!this.targetWritable.destroyed) {
// We can't 'end' stdout, so use a final write instead for that case
if (this.targetWritable === process.stdout) {
// we can't 'write' null, so don't do anything if there is no last chunk
if (lastChunk) {
this.targetWritable.write(lastChunk, 'utf-8', (err) => {
if (!err) {
this.log('wrote last chunk to stdout');
} else {
this.log('error writing last chunk to stdout');
}
callback(err);
});
} else {
this.log('no last chunk to write to stdout');
callback();
}
} else {
this.targetWritable.end(lastChunk, 'utf-8', (err) => {
if (!err) {
this.log('wrote last chunk and ended target writable');
} else {
this.log('error ending target writable');
}
callback(err);
});
}
} else {
// Avoid write after destroy errors
this.log('supressing write after destroy error');
callback();
}
}
}
/**
* A helper PassThrough class that is used in our custom
* Duplex streams.
*/
class DuplexPassThrough extends PassThrough {
constructor(opts) {
super({ objectMode: true, readableHighWaterMark: 0, writableHighWaterMark: 0, ...opts });
}
// The destroy call on this PassThrough stream
// gets ahead of real errors reaching the
// callback/promise at the end of the pipeline.
// This allows an AbortError to get propagated
// from the micro-task queue instead because the
// real error is blocked behind a callback somewhere.
// That in turn masks the real cause of the failure,
// so we defer the _destroy in a setImmediate.
_destroy(err, cb) {
setImmediate(() => {
cb(err);
});
}
}
class DuplexMapper extends Duplex {
constructor(fn, style, concurrency = 1) {
const operatorOpts = { concurrency, highWaterMark: concurrency };
const inputStream = new DuplexPassThrough();
let outputStream;
switch (style) {
case 'map':
outputStream = inputStream.map(fn, operatorOpts);
break;
case 'filter':
outputStream = inputStream.filter(fn, operatorOpts);
break;
default:
throw new Error('Invalid style.');
}
// Workaround the fact that Duplex.from doesn't allow customizing the HWM
// Set a new objectMode default value while we create the stream, then reset it.
const originalHWM = getDefaultHighWaterMark(true);
// Use concurrency as the highWaterMark to allow one item on deck for each "thread"
setDefaultHighWaterMark(true, concurrency);
try {
return Duplex.from({ readable: outputStream, writable: inputStream });
} finally {
setDefaultHighWaterMark(true, originalHWM);
}
}
}
/**
* Input: stream of x
* Output: stream of x with elements not passing the filter removed
*/
class FilterStream extends DuplexMapper {
constructor(filterFunction) {
super(filterFunction, 'filter');
}
}
/**
* Input: stream of x
* Output: stream of mappingFunction(x)
*/
class MappingStream extends DuplexMapper {
constructor(mappingFunction, concurrency = 1) {
super(mappingFunction, 'map', concurrency);
}
}
/**
* PassThrough stream that calls another function
* to perform a side effect.
*/
class SideEffect extends DuplexPassThrough {
constructor(fn, options) {
super(options);
this.fn = fn;
this.log = debug(('couchbackup:transform:sideeffect'));
}
async doSideEffect(chunk, encoding) {
return await this.fn(chunk, encoding);
}
_transform(chunk, encoding, callback) {
this.log('Performing side effect');
this.doSideEffect(chunk, encoding)
.then(() => {
this.log('Passing through');
super._transform(chunk, encoding, callback);
}).catch((err) => {
this.log(`Caught error ${err}`);
callback(err);
});
}
}
class WritableWithPassThrough extends SideEffect {
/**
* A Writable that passes through the original chunk.
* The chunk is also passed to a SideEffect which behaves as DelegateWritable does.
*
* @param {string} name - the name of the DelegateWritable for logging
* @param {Writable} targetWritable - the Writable stream to write to
* @param {function} lastChunkFn - a no-args function to call to get a final chunk to write
* @param {function} chunkMapFn - a function(chunk) that can transform/map a chunk before writing
* @param {function} postWriteFn - a function(chunk) that can perform an action after a write completes
*/
constructor(name, targetWritable, lastChunkFn, chunkMapFn, postWriteFn) {
// Initialize super without a side effect fn because we need to set some
// properties before we can define it.
super(null, { objectMode: true });
this.log = debug(`couchbackup:transform:writablepassthrough:${name}`);
this.delegateWritable = new DelegateWritable(name, targetWritable, lastChunkFn, chunkMapFn, postWriteFn);
// Now set the side effect fn we omitted earlier
this.fn = (chunk, encoding) => {
return new Promise((resolve, reject) => {
this.delegateWritable.write(chunk, encoding, (err) => {
if (err) {
reject(err);
} else {
resolve();
}
});
});
};
}
_flush(callback) {
this.log('Flushing writable passthrough');
this.delegateWritable.end(callback);
}
}
module.exports = {
BatchingStream,
DelegateWritable,
FilterStream,
MappingStream,
SideEffect,
WritableWithPassThrough
};