UNPKG

@cloudant/couchbackup

Version:

CouchBackup - command-line backup utility for Cloudant/CouchDB

410 lines (386 loc) 15 kB
// Copyright © 2017, 2024 IBM Corp. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. /** * CouchBackup module. * @module couchbackup * @see module:couchbackup */ const events = require('node:events'); const fs = require('node:fs'); const URL = require('node:url').URL; const backup = require('./includes/backup.js'); const defaults = require('./includes/config.js').apiDefaults; const { convertError, BackupError, OptionError } = require('./includes/error.js'); const { newClient } = require('./includes/request.js'); const restoreInternal = require('./includes/restore.js'); const debug = require('debug')('couchbackup:app'); const pkg = require('./package.json'); const { RESUME_COMMENT } = require('./includes/restoreMappings.js'); /** * Test for a positive, safe integer. * * @param {any} x - Object under test. */ function isSafePositiveInteger(x) { // https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Global_Objects/Number/MAX_SAFE_INTEGER const MAX_SAFE_INTEGER = Number.MAX_SAFE_INTEGER || 9007199254740991; return (typeof x === 'number' || typeof x === 'bigint') && // Is it an integer? x % 1 === 0 && // Is it positive? x > 0 && // Is it less than the maximum safe integer? x <= MAX_SAFE_INTEGER; } /** * Validate URL. * * @param {string} url - URL of database. * @param {boolean} isIAM - A flag if IAM authentication been used. * @returns Boolean true if all checks are passing. */ async function validateURL(url, isIAM) { if (typeof url !== 'string') { throw new OptionError('Invalid URL, must be type string'); } // Validate URL and ensure no auth if using key try { const urlObject = new URL(url); // We require a protocol, host and path (for db), fail if any is missing. if (urlObject.protocol !== 'https:' && urlObject.protocol !== 'http:') { throw new OptionError('Invalid URL protocol.'); } if (!urlObject.pathname || urlObject.pathname === '/') { throw new OptionError('Invalid URL, missing path element (no database).'); } if (isIAM && (urlObject.username || urlObject.password)) { throw new OptionError('URL user information must not be supplied when using IAM API key.'); } } catch (err) { throw convertError(err); } return true; } /** * Validate options. * * @param {object} opts - Options. * @returns Boolean true if all checks are passing. */ async function validateOptions(opts) { // if we don't have opts then we'll be using defaults if (!opts) { return true; } const rules = [ { key: 'iamApiKey', type: 'string' }, { key: 'log', type: 'string' }, { key: 'output', type: 'string' }, { key: 'bufferSize', type: 'number' }, { key: 'parallelism', type: 'number' }, { key: 'requestTimeout', type: 'number' }, { key: 'mode', type: 'enum', values: ['full', 'shallow'] }, { key: 'resume', type: 'boolean' }, { key: 'quiet', type: 'boolean' }, { key: 'attachments', type: 'boolean' } ]; for (const rule of rules) { const val = opts[rule.key]; switch (rule.type) { case 'string': if (typeof val !== 'undefined' && typeof val !== 'string') { throw new OptionError(`Invalid ${rule.key} option, must be type string`); } break; case 'number': if (typeof val !== 'undefined' && !isSafePositiveInteger(val)) { const humanized = rule.key.replace(/[A-Z]/g, l => ` ${l.toLowerCase()}`); throw new OptionError(`Invalid ${humanized} option, must be a positive integer in the range (0, MAX_SAFE_INTEGER]`); } break; case 'enum': if (typeof val !== 'undefined' && rule.values.indexOf(val) === -1) { const humanized = rule.values .map(w => `"${w}"`) .reduce((acc, w, i, arr) => { return acc + (i < arr.length - 1 ? ', ' : ' or ') + w; }); throw new OptionError(`Invalid mode option, must be either ${humanized}`); } break; case 'boolean': if (typeof val !== 'undefined' && typeof val !== 'boolean') { throw new OptionError(`Invalid ${rule.key} option, must be type boolean`); } } } return true; } /** * Show warning on invalid params in shallow mode. * * @param {object} opts - Options. */ async function shallowModeWarnings(opts) { if (!opts || opts.mode !== 'shallow') { return; } // Perform validation of invalid options for shallow mode and WARN // We don't error for backwards compatibility with scripts that may have been // written passing complete sets of options through if (opts.log || opts.resume) { console.warn('WARNING: the options "log" and "resume" are invalid when using shallow mode.'); } if (opts.parallelism) { console.warn('WARNING: the option "parallelism" has no effect when using shallow mode.'); } } /** * Additional checks for log on resume. * * @param {object} opts - Options. * @returns Boolean true if all checks are passing. */ async function validateLogOnResume(opts) { const logFileExists = opts && opts.log && fs.existsSync(opts.log); if (!opts || opts.mode === 'shallow') { // No opts specified, defaults will be populated. // In shallow mode log/resume are irrelevant and we'll have warned already. return true; } else if (opts.resume) { // Expecting to resume if (!opts.log) { // This is the second place we check for the presence of the log option in conjunction with resume // It has to be here for the API case throw new BackupError('NoLogFileName', 'To resume a backup, a log file must be specified'); } else if (!logFileExists) { throw new BackupError('LogDoesNotExist', 'To resume a backup, the log file must exist'); } if (opts.bufferSize) { // Warn that the bufferSize is already fixed console.warn('WARNING: the original backup "bufferSize" applies when resuming a backup.'); } } else { // Not resuming if (logFileExists) { throw new BackupError('LogFileExists', `The log file ${opts.log} exists. ` + 'Use the resume option if you want to resume a backup from an existing log file.'); } } return true; } async function attachmentWarnings(opts) { if (opts && opts.attachments) { console.warn('WARNING: The "attachments" option is provided as-is and is not supported. ' + 'This option is for Apache CouchDB only and is experimental. ' + 'Do not use this option with IBM Cloudant.'); } } /** * Validate arguments. * * @param {string} url - URL of database. * @param {object} opts - Options. * @param {boolean} backup - true for backup, false for restore * @returns Boolean true if all checks are passing. */ async function validateArgs(url, opts, isBackup = true) { const isIAM = opts && typeof opts.iamApiKey === 'string'; const validations = [ validateURL(url, isIAM), validateOptions(opts), attachmentWarnings(opts) ]; if (isBackup) { validations.push( shallowModeWarnings(opts), validateLogOnResume(opts) ); } return Promise.all(validations); } /** * Check the backup database exists and that the credentials used have * visibility. Throw a fatal error if there is a problem with the DB. * * @param {object} dbClient - database client object * @returns Passed in database client object */ async function validateBackupDb(dbClient) { try { await dbClient.service.headDatabase({ db: dbClient.dbName }); return dbClient; } catch (err) { const e = parseDbResponseError(dbClient, err); if (e.name === 'DatabaseNotFound') { e.message = `${err.message} Ensure the backup source database exists.`; } // maybe convert it to HTTPError throw convertError(e); } } /** * Check that the restore database exists, is new and is empty. Also verify that the credentials used have * visibility. Callback with a fatal error if there is a problem with the DB. * * @param {object} dbClient - database client object * @returns Passed in database client object */ async function validateRestoreDb(dbClient) { try { const response = await dbClient.service.getDatabaseInformation({ db: dbClient.dbName }); const { docCount, docDelCount } = response.result; // The system databases can have a validation ddoc(s) injected in them on creation. // This sets the doc count off, so we just complitely exclude the system databases from this check. // The assumption here is that users restoring system databases know what they are doing. if (!dbClient.dbName.startsWith('_') && (docCount !== 0 || docDelCount !== 0)) { throw new BackupError('DatabaseNotEmpty', `Target database ${dbClient.url}${dbClient.dbName} is not empty. A target database must be a new and empty database.`); } // good to use return dbClient; } catch (err) { const e = parseDbResponseError(dbClient, err); if (e.name === 'DatabaseNotFound') { e.message = `${e.message} Create the target database before restoring.`; } // maybe convert it to HTTPError throw convertError(e); } } /** * Convert the database validation response error to a special DatabaseNotFound error * in case the database is missing. Otherwise returns an original error. * @param {object} dbClient - database client object * @param {object} err - HTTP response error * @returns {Error} - DatabaseNotFound error or passed in err */ function parseDbResponseError(dbClient, err) { if (err && err.status === 404) { // Override the error type and message for the DB not found case const msg = `Database ${dbClient.url}` + `${dbClient.dbName} does not exist. ` + 'Check the URL and database name have been specified correctly.'; return new BackupError('DatabaseNotFound', msg); } return err; } module.exports = { /** * Backup a Cloudant database to a stream. * * @param {string} srcUrl - URL of database to backup. * @param {stream.Writable} targetStream - Stream to write content to. * @param {object} opts - Backup options. * @param {number} [opts.parallelism=5] - Number of parallel HTTP requests to use. * @param {number} [opts.bufferSize=500] - Number of documents per batch request. * @param {number} [opts.requestTimeout=120000] - Milliseconds to wait before retrying a HTTP request. * @param {string} [opts.iamApiKey] - IAM API key to use to access Cloudant database. * @param {string} [opts.log] - Log file name. Default uses a temporary file. * @param {boolean} [opts.resume] - Whether to resume from existing log. * @param {string} [opts.mode=full] - Use `full` or `shallow` mode. * @param {backupRestoreCallback} callback - Called on completion. */ backup: function(srcUrl, targetStream, opts, callback) { if (typeof callback === 'undefined' && typeof opts === 'function') { callback = opts; opts = {}; } const ee = new events.EventEmitter(); validateArgs(srcUrl, opts) // Set up the DB client .then(() => { opts = Object.assign({}, defaults(), opts); return newClient(srcUrl, opts); }) // Validate the DB exists, before proceeding to backup .then(backupDbClient => validateBackupDb(backupDbClient)) .then(backupDbClient => { // Write either a file header or a resume marker. let metadataToWrite; if (opts.mode === 'full' && opts.resume) { // resume is valid in full mode only // Write the resume marker and a newline as it's possible one would be missing from // an interruption of the previous backup. If the backup was clean this // will cause an empty line that will be gracefully handled by the restore. debug('Will write resume marker.'); metadataToWrite = `${RESUME_COMMENT}\n`; } else { // Write a file header including the name, version and mode debug('Will write backup file header.'); metadataToWrite = `${JSON.stringify({ name: pkg.name, version: pkg.version, mode: opts.mode, attachments: opts.attachments })}\n`; } return new Promise((resolve, reject) => { targetStream.write(metadataToWrite, 'utf-8', (err) => { if (err) { reject(err); } else { resolve(backupDbClient); } }); }); }) .then((backupDbClient) => backup(backupDbClient, opts, targetStream, ee) ) .then((total) => { debug(`Finished backup with total doc count of ${total}`); ee.emit('finished', total); callback(null, total); }) .catch(e => callback(convertError(e))); return ee; }, /** * Restore a backup from a stream. * * @param {stream.Readable} srcStream - Stream containing backed up data. * @param {string} targetUrl - Target database. * @param {object} opts - Restore options. * @param {number} opts.parallelism - Number of parallel HTTP requests to use. Default 5. * @param {number} opts.bufferSize - Number of documents per batch request. Default 500. * @param {number} opts.requestTimeout - Milliseconds to wait before retrying a HTTP request. Default 120000. * @param {string} opts.iamApiKey - IAM API key to use to access Cloudant database. * @param {backupRestoreCallback} callback - Called on completion. */ restore: function(srcStream, targetUrl, opts, callback) { if (typeof callback === 'undefined' && typeof opts === 'function') { callback = opts; opts = {}; } const ee = new events.EventEmitter(); validateArgs(targetUrl, opts, false) // Set up the DB client .then(() => { opts = Object.assign({}, defaults(), opts); return newClient(targetUrl, opts); }) // Validate the DB exists, before proceeding to restore .then((restoreDbClient) => validateRestoreDb(restoreDbClient)) .then((restoreDbClient) => { return restoreInternal( restoreDbClient, opts, srcStream, ee); }) .then((total) => { ee.emit('finished', total); callback(null, total); }) .catch(e => callback(convertError(e))); return ee; } }; /** * Backup/restore callback * @callback backupRestoreCallback * @param {Error} err - Error object if operation failed. * @param {object} data - summary data for backup/restore */