UNPKG

@naturalcycles/db-lib

Version:

Lowest Common Denominator API to supported Databases

85 lines (84 loc) 4 kB
import { localTime } from '@naturalcycles/js-lib/datetime/localTime.js'; import { AppError, ErrorMode } from '@naturalcycles/js-lib/error'; import { pMap } from '@naturalcycles/js-lib/promise/pMap.js'; import { _passthroughMapper } from '@naturalcycles/js-lib/types'; import { boldWhite, dimWhite, grey, yellow } from '@naturalcycles/nodejs-lib/colors'; import { fs2 } from '@naturalcycles/nodejs-lib/fs2'; import { NDJsonStats } from '@naturalcycles/nodejs-lib/stream'; import { DBQuery } from '../query/dbQuery.js'; /** * Pipeline from input stream(s) to a NDJSON file (optionally gzipped). * File is overwritten (by default). * Input stream can be a stream from CommonDB.streamQuery() * Allows to define a mapper and a predicate to map/filter objects between input and output. * Handles backpressure. * * Optionally you can provide mapperPerTable and @param transformMapOptions (one for all mappers) - it will run for each table. */ export async function dbPipelineBackup(opt) { const { db, concurrency = 16, limit = 0, outputDirPath, protectFromOverwrite = false, mapperPerTable = {}, queryPerTable = {}, logEveryPerTable = {}, transformMapOptions, errorMode = ErrorMode.SUPPRESS, emitSchemaFromDB = false, zst = true, } = opt; let { tables } = opt; console.log(`>> ${dimWhite('dbPipelineBackup')} started in ${grey(outputDirPath)}...`); fs2.ensureDir(outputDirPath); tables ||= await db.getTables(); console.log(`${yellow(tables.length)} ${boldWhite('table(s)')}:\n` + tables.join('\n')); const statsPerTable = {}; await pMap(tables, async (table) => { let q = DBQuery.create(table).limit(limit); const sinceUpdated = opt.sinceUpdatedPerTable?.[table] ?? opt.sinceUpdated; if (sinceUpdated) { q = q.filter('updated', '>=', sinceUpdated); } if (queryPerTable[table]) { // Override the Query with this Query, completely ingoring any of the other query-related options q = queryPerTable[table]; console.log(`>> ${grey(table)} ${q.pretty()}`); } else { const sinceUpdatedStr = sinceUpdated ? ' since ' + grey(localTime(sinceUpdated).toPretty()) : ''; console.log(`>> ${grey(table)}${sinceUpdatedStr}`); } const filePath = `${outputDirPath}/${table}.ndjson` + (zst ? '.zst' : ''); const schemaFilePath = `${outputDirPath}/${table}.schema.json`; if (protectFromOverwrite && fs2.pathExists(filePath)) { throw new AppError(`dbPipelineBackup: output file exists: ${filePath}`); } const started = Date.now(); let rows = 0; fs2.ensureFile(filePath); // console.log(`>> ${grey(filePath)} started...`) if (emitSchemaFromDB) { const schema = await db.getTableSchema(table); await fs2.writeJsonAsync(schemaFilePath, schema, { spaces: 2 }); console.log(`>> ${grey(schemaFilePath)} saved (generated from DB)`); } await db .streamQuery(q) .logProgress({ ...opt, logEvery: logEveryPerTable[table] ?? opt.logEvery ?? 1000, metric: table, }) .map(mapperPerTable[table] || _passthroughMapper, { errorMode, ...transformMapOptions, metric: table, }) .flattenIfNeeded() .tapSync(() => rows++) .toNDJsonFile(filePath); const { size: sizeBytes } = await fs2.statAsync(filePath); const stats = NDJsonStats.create({ tookMillis: Date.now() - started, rows, sizeBytes, }); console.log(`>> ${grey(filePath)}\n` + stats.toPretty()); statsPerTable[table] = stats; }, { concurrency, errorMode }); const statsTotal = NDJsonStats.createCombined(Object.values(statsPerTable)); console.log(statsTotal.toPretty('total')); return statsTotal; }