@naturalcycles/db-lib
Version:
Lowest Common Denominator API to supported Databases
85 lines (84 loc) • 4 kB
JavaScript
import { localTime } from '@naturalcycles/js-lib/datetime/localTime.js';
import { AppError, ErrorMode } from '@naturalcycles/js-lib/error';
import { pMap } from '@naturalcycles/js-lib/promise/pMap.js';
import { _passthroughMapper } from '@naturalcycles/js-lib/types';
import { boldWhite, dimWhite, grey, yellow } from '@naturalcycles/nodejs-lib/colors';
import { fs2 } from '@naturalcycles/nodejs-lib/fs2';
import { NDJsonStats } from '@naturalcycles/nodejs-lib/stream';
import { DBQuery } from '../query/dbQuery.js';
/**
* Pipeline from input stream(s) to a NDJSON file (optionally gzipped).
* File is overwritten (by default).
* Input stream can be a stream from CommonDB.streamQuery()
* Allows to define a mapper and a predicate to map/filter objects between input and output.
* Handles backpressure.
*
* Optionally you can provide mapperPerTable and @param transformMapOptions (one for all mappers) - it will run for each table.
*/
export async function dbPipelineBackup(opt) {
const { db, concurrency = 16, limit = 0, outputDirPath, protectFromOverwrite = false, mapperPerTable = {}, queryPerTable = {}, logEveryPerTable = {}, transformMapOptions, errorMode = ErrorMode.SUPPRESS, emitSchemaFromDB = false, zst = true, } = opt;
let { tables } = opt;
console.log(`>> ${dimWhite('dbPipelineBackup')} started in ${grey(outputDirPath)}...`);
fs2.ensureDir(outputDirPath);
tables ||= await db.getTables();
console.log(`${yellow(tables.length)} ${boldWhite('table(s)')}:\n` + tables.join('\n'));
const statsPerTable = {};
await pMap(tables, async (table) => {
let q = DBQuery.create(table).limit(limit);
const sinceUpdated = opt.sinceUpdatedPerTable?.[table] ?? opt.sinceUpdated;
if (sinceUpdated) {
q = q.filter('updated', '>=', sinceUpdated);
}
if (queryPerTable[table]) {
// Override the Query with this Query, completely ingoring any of the other query-related options
q = queryPerTable[table];
console.log(`>> ${grey(table)} ${q.pretty()}`);
}
else {
const sinceUpdatedStr = sinceUpdated
? ' since ' + grey(localTime(sinceUpdated).toPretty())
: '';
console.log(`>> ${grey(table)}${sinceUpdatedStr}`);
}
const filePath = `${outputDirPath}/${table}.ndjson` + (zst ? '.zst' : '');
const schemaFilePath = `${outputDirPath}/${table}.schema.json`;
if (protectFromOverwrite && fs2.pathExists(filePath)) {
throw new AppError(`dbPipelineBackup: output file exists: ${filePath}`);
}
const started = Date.now();
let rows = 0;
fs2.ensureFile(filePath);
// console.log(`>> ${grey(filePath)} started...`)
if (emitSchemaFromDB) {
const schema = await db.getTableSchema(table);
await fs2.writeJsonAsync(schemaFilePath, schema, { spaces: 2 });
console.log(`>> ${grey(schemaFilePath)} saved (generated from DB)`);
}
await db
.streamQuery(q)
.logProgress({
...opt,
logEvery: logEveryPerTable[table] ?? opt.logEvery ?? 1000,
metric: table,
})
.map(mapperPerTable[table] || _passthroughMapper, {
errorMode,
...transformMapOptions,
metric: table,
})
.flattenIfNeeded()
.tapSync(() => rows++)
.toNDJsonFile(filePath);
const { size: sizeBytes } = await fs2.statAsync(filePath);
const stats = NDJsonStats.create({
tookMillis: Date.now() - started,
rows,
sizeBytes,
});
console.log(`>> ${grey(filePath)}\n` + stats.toPretty());
statsPerTable[table] = stats;
}, { concurrency, errorMode });
const statsTotal = NDJsonStats.createCombined(Object.values(statsPerTable));
console.log(statsTotal.toPretty('total'));
return statsTotal;
}