UNPKG

@naturalcycles/db-lib

Version:

Lowest Common Denominator API to supported Databases

177 lines (147 loc) 4.84 kB
import { localTime } from '@naturalcycles/js-lib/datetime/localTime.js' import { ErrorMode } from '@naturalcycles/js-lib/error/errorMode.js' import { pMap } from '@naturalcycles/js-lib/promise/pMap.js' import type { AsyncMapper, BaseDBEntity, UnixTimestamp } from '@naturalcycles/js-lib/types' import { _passthroughMapper } from '@naturalcycles/js-lib/types' import { boldWhite, dimWhite, grey, yellow } from '@naturalcycles/nodejs-lib/colors' import type { TransformLogProgressOptions, TransformMapOptions, } from '@naturalcycles/nodejs-lib/stream' import { _pipeline, NDJsonStats, transformChunk, transformLogProgress, transformMap, transformTap, writableForEach, } from '@naturalcycles/nodejs-lib/stream' import type { CommonDB } from '../commondb/common.db.js' import type { CommonDBSaveOptions } from '../db.model.js' import { DBQuery } from '../query/dbQuery.js' export interface DBPipelineCopyOptions extends TransformLogProgressOptions { dbInput: CommonDB dbOutput: CommonDB /** * List of tables to dump. If undefined - will call CommonDB.getTables() and dump ALL tables returned. */ tables?: string[] /** * How many tables to dump in parallel. * * @default 16 * Set to `1` for serial (1 at a time) processing or debugging. */ concurrency?: number /** * @default 100 * * Determines the size of .saveBatch() */ chunkSize?: number /** * @default ErrorMode.SUPPRESS * * Used in high-level pMap(tables, ...) * Also used as default option for TransformMapOptions */ errorMode?: ErrorMode /** * @default undefined * If set - will dump maximum that number of rows per table */ limit?: number /** * If set - will do "incremental backup" (not full), only for entities that updated >= `sinceUpdated` * * @default undefined */ sinceUpdated?: UnixTimestamp /** * Optionally you can provide mapper that is going to run for each table. * * @default `{}` * Default mappers will be "passthroughMapper" (pass all data as-is). */ mapperPerTable?: Record<string, AsyncMapper> /** * You can alter default `transformMapOptions` here. * * @default (see the code) * The goal to have default values that are reasonable for such a job to provide resilient output (forgiving individual errors). * `metric` will be set to table name */ transformMapOptions?: TransformMapOptions saveOptionsPerTable?: Record<string, CommonDBSaveOptions<any>> } /** * Pipeline from input stream(s) to CommonDB .saveBatch(). * Input stream can be a stream from CommonDB.streamQuery() * Allows to define a mapper and a predicate to map/filter objects between input and output. * Handles backpressure. */ export async function dbPipelineCopy(opt: DBPipelineCopyOptions): Promise<NDJsonStats> { const { chunkSize = 100, dbInput, dbOutput, concurrency = 16, limit = 0, sinceUpdated, mapperPerTable = {}, saveOptionsPerTable = {}, transformMapOptions, errorMode = ErrorMode.SUPPRESS, } = opt let { tables } = opt const sinceUpdatedStr = sinceUpdated ? ' since ' + grey(localTime(sinceUpdated).toPretty()) : '' console.log(`>> ${dimWhite('dbPipelineCopy')} started...${sinceUpdatedStr}`) tables ||= await dbInput.getTables() console.log(`${yellow(tables.length)} ${boldWhite('table(s)')}:\n` + tables.join('\n')) const statsPerTable: Record<string, NDJsonStats> = {} await pMap( tables, async table => { let q = DBQuery.create<BaseDBEntity>(table).limit(limit) if (sinceUpdated) { q = q.filter('updated', '>=', sinceUpdated) } const saveOptions: CommonDBSaveOptions<any> = saveOptionsPerTable[table] || {} const mapper = mapperPerTable[table] || _passthroughMapper const stream = dbInput.streamQuery(q) const started = Date.now() let rows = 0 await _pipeline([ stream, transformLogProgress({ logEvery: 1000, ...opt, metric: table, }), transformMap(mapper, { errorMode, flattenArrayOutput: true, ...transformMapOptions, metric: table, }), transformTap(() => rows++), transformChunk({ chunkSize }), writableForEach(async dbms => { await dbOutput.saveBatch(table, dbms, saveOptions) }), ]) const stats = NDJsonStats.create({ tookMillis: Date.now() - started, rows, sizeBytes: 0, // n/a }) console.log(`>> ${grey(table)}\n` + stats.toPretty()) statsPerTable[table] = stats }, { concurrency, errorMode }, ) const statsTotal = NDJsonStats.createCombined(Object.values(statsPerTable)) console.log(statsTotal.toPretty('total')) return statsTotal }