UNPKG

@naturalcycles/db-lib

Version:

Lowest Common Denominator API to supported Databases

99 lines (98 loc) 4.72 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.dbPipelineRestore = dbPipelineRestore; const js_lib_1 = require("@naturalcycles/js-lib"); const nodejs_lib_1 = require("@naturalcycles/nodejs-lib"); /** * Pipeline from NDJSON files in a folder (optionally gzipped) to CommonDB. * Allows to define a mapper and a predicate to map/filter objects between input and output. * Handles backpressure. * * Optionally you can provide mapperPerTable and @param transformMapOptions (one for all mappers) - it will run for each table. */ async function dbPipelineRestore(opt) { const { db, concurrency = 16, chunkSize = 100, limit, sinceUpdated, inputDirPath, mapperPerTable = {}, saveOptionsPerTable = {}, transformMapOptions, errorMode = js_lib_1.ErrorMode.SUPPRESS, recreateTables = false, } = opt; const onlyTables = opt.tables && new Set(opt.tables); const sinceUpdatedStr = sinceUpdated ? ' since ' + (0, nodejs_lib_1.grey)((0, js_lib_1.localTime)(sinceUpdated).toPretty()) : ''; console.log(`>> ${(0, nodejs_lib_1.dimWhite)('dbPipelineRestore')} started in ${(0, nodejs_lib_1.grey)(inputDirPath)}...${sinceUpdatedStr}`); nodejs_lib_1.fs2.ensureDir(inputDirPath); const tablesToGzip = new Set(); const sizeByTable = {}; const statsPerTable = {}; const tables = []; nodejs_lib_1.fs2.readdir(inputDirPath).forEach(f => { let table; let gzip = false; if (f.endsWith('.ndjson')) { table = f.slice(0, f.length - '.ndjson'.length); } else if (f.endsWith('.ndjson.gz')) { table = f.slice(0, f.length - '.ndjson.gz'.length); gzip = true; } else { return; } if (onlyTables && !onlyTables.has(table)) return; // skip table tables.push(table); if (gzip) tablesToGzip.add(table); sizeByTable[table] = nodejs_lib_1.fs2.stat(`${inputDirPath}/${f}`).size; }); const sizeStrByTable = (0, js_lib_1._mapValues)(sizeByTable, (_k, b) => (0, js_lib_1._hb)(b)); console.log(`${(0, nodejs_lib_1.yellow)(tables.length)} ${(0, nodejs_lib_1.boldWhite)('table(s)')}:\n`, sizeStrByTable); // const schemaByTable: Record<string, CommonSchema> = {} if (recreateTables) { await (0, js_lib_1.pMap)(tables, async (table) => { const schemaFilePath = `${inputDirPath}/${table}.schema.json`; if (!nodejs_lib_1.fs2.pathExists(schemaFilePath)) { console.warn(`${schemaFilePath} does not exist!`); return; } const schema = await nodejs_lib_1.fs2.readJsonAsync(schemaFilePath); await db.createTable(table, schema, { dropIfExists: true }); }); } await (0, js_lib_1.pMap)(tables, async (table) => { const gzip = tablesToGzip.has(table); const filePath = `${inputDirPath}/${table}.ndjson` + (gzip ? '.gz' : ''); const saveOptions = saveOptionsPerTable[table] || {}; const started = Date.now(); let rows = 0; const sizeBytes = sizeByTable[table]; console.log(`<< ${(0, nodejs_lib_1.grey)(filePath)} ${(0, nodejs_lib_1.dimWhite)((0, js_lib_1._hb)(sizeBytes))} started...`); await (0, nodejs_lib_1._pipeline)([ nodejs_lib_1.fs2.createReadStreamAsNDJSON(filePath).take(limit || Number.POSITIVE_INFINITY), (0, nodejs_lib_1.transformTap)(() => rows++), (0, nodejs_lib_1.transformLogProgress)({ logEvery: 1000, ...opt, metric: table, }), ...(sinceUpdated ? [(0, nodejs_lib_1.transformFilterSync)(r => r.updated >= sinceUpdated)] : []), (0, nodejs_lib_1.transformMap)(mapperPerTable[table] || js_lib_1._passthroughMapper, { errorMode, flattenArrayOutput: true, ...transformMapOptions, metric: table, }), (0, nodejs_lib_1.transformChunk)({ chunkSize }), (0, nodejs_lib_1.writableForEach)(async (dbms) => { await db.saveBatch(table, dbms, saveOptions); }), ]); const stats = nodejs_lib_1.NDJsonStats.create({ tookMillis: Date.now() - started, rows, sizeBytes, }); console.log(`<< ${(0, nodejs_lib_1.grey)(filePath)}\n` + stats.toPretty()); statsPerTable[table] = stats; }, { concurrency, errorMode }); const statsTotal = nodejs_lib_1.NDJsonStats.createCombined(Object.values(statsPerTable)); console.log(statsTotal.toPretty('total')); return statsTotal; }