UNPKG

@cloudant/couchbackup

Version:

CouchBackup - command-line backup utility for Cloudant/CouchDB

179 lines (170 loc) 6.7 kB
// Copyright © 2017, 2025 IBM Corp. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. const { createInterface } = require('node:readline'); const { Duplex, PassThrough, Transform } = require('node:stream'); const debug = require('debug'); /** * A Duplex stream that converts the input stream to a stream * of line objects using the built-in readline interface. * * The new stream line objects have the form * {lineNumber: #, line: content} * * Note that it uses the `line` event and not `for await...of` * for performance reasons. See Node Readline module docs for * details. */ class Liner extends Duplex { // Configure logging log = debug('couchbackup:liner'); // Flag for whether the readline interface is running isRunning = true; // Flag for whether the readline interface is closed isClosed = false; // Line number state lineNumber = 0; // Buffer of processed lines lines = []; constructor(sanitize = false) { // Configuration of this Duplex: // objectMode: false on the writable input (file chunks), true on the readable output (line objects) // The readableHighWaterMark controls the number of lines buffered after this implementation calls // "push". Backup lines are potentially large (default 500 documents - i.e. potentially MBs). Since // there is additional buffering downstream and file processing is faster than the network ops // we don't bottleneck here even without a large buffer. super({ readableObjectMode: true, readableHighWaterMark: 0, writableObjectMode: false }); // Set up the stream of bytes that will be processed to lines. if (sanitize) { // Handle unescaped unicode "newlines" by escaping them before passing to readline this.inStream = new Transform({ objectMode: false, transform(chunk, encoding, callback) { try { this.push(chunk.toString('utf-8').replaceAll('\u2028', '\\u2028').replaceAll('\u2029', '\\u2029'), 'utf-8'); callback(); } catch (e) { callback(e); } } }); } else { this.inStream = new PassThrough({ objectMode: false }); } // if there is an error destroy this Duplex with it this.inStream.on('error', e => this.destroy(e)); // Built-in readline interface over the inStream this.readlineInterface = createInterface({ input: this.inStream, // the writable side of Liner, passed through terminal: false, // expect to read from files crlfDelay: Infinity // couchbackup files should only use "/n" EOL, but allow for all "/r/n" to be single EOL }).on('line', (line) => { // Wrap the line in the object format and store it an array waiting to be pushed // when downstream is ready to receive. const bufferedLines = this.lines.push(this.wrapLine(line)); this.log(`Liner processed line ${this.lineNumber}. Buffered lines available: ${bufferedLines}.`); this.pushAvailable(); }).once('close', () => { this.isClosed = true; this.log('Liner readline interface closed.'); // Push null onto our lines buffer to signal EOF to downstream consumers. this.lines.push(null); this.pushAvailable(); }); } /** * Helper function to wrap a line in the object format that Liner * pushes to downstream consumers. * * @param {string} line * @returns {object} {"lineNumber: #, line"} */ wrapLine(line) { // For each line wrapped, increment the line number return { lineNumber: ++this.lineNumber, line }; } /** * Function that pushes any available lines downstream. */ pushAvailable() { // Check readline is running flag and whether there is content to push. while (this.isRunning && this.lines.length > 0) { if (!this.push(this.lines.shift())) { this.log(`Back-pressure from push. Buffered lines available: ${this.lines.length}.`); // Push returned false, this indicates downstream back-pressure. // Pause the readline interface to stop pushing more lines downstream. // Resumption is triggered by downstream calling _read which happens // when it is ready for more data. this.isRunning = false; if (!this.isClosed) { this.log('Liner pausing.'); this.readlineInterface.pause(); } break; } else { this.log(`Liner pushed. Buffered lines available: ${this.lines.length}.`); } } } /** * Implementation of the Readable side of the Duplex. * * * @param {number} size - ignored as the Readable side is objectMode: true */ _read(size) { // As per the Readable contract if read has been called it won't be called // again until after there has been a call to push. // As part of flow control if we are not running we must resume when read // is called to ensure that pushes are able to happen (and thereby trigger) // subsequent reads. if (!this.isRunning) { this.isRunning = true; if (!this.isClosed) { this.log('Liner resuming after read.'); this.readlineInterface.resume(); } } this.pushAvailable(); } /** * Implementation for the Writable side of the Duplex. * Delegates to the inStream PassThrough. * * @param {*} chunk * @param {string} encoding * @param {function} callback */ _write(chunk, encoding, callback) { // Note that the passed callback function controls flow from upstream. // When the readable side is paused by downstream the inStream buffer // will fill and then the callback will be delayed until that buffer // is drained by the readline interface starting up again. this.inStream.write(chunk, encoding, callback); } /** * Cleanup after the last write to the Duplex. * * @param {function} callback */ _final(callback) { this.log('Finalizing liner.'); // Nothing more will be written, end our inStream which will // cause the readLineInterface to emit 'close' and signal EOF // to our readers after the line buffer is emptied. this.inStream.end(callback); } } module.exports = { Liner };