UNPKG

iterparse

Version:
141 lines (140 loc) 5.87 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.csvWrite = exports.csvRead = void 0; const tslib_1 = require("tslib"); const Papa = tslib_1.__importStar(require("papaparse")); const fs_extra_1 = require("fs-extra"); const P = tslib_1.__importStar(require("ts-prime")); const helpers_1 = require("./helpers"); const ts_prime_1 = require("ts-prime"); const types_1 = require("./types"); /** * Read CSV file. In memory efficient way. * @include ./CSVReadOptions.md * @example * import { csvRead } from 'iterparse' * csvRead({ filePath: 'path/to/file' }) * .map((q)=> console.log(q)) * .count() * * @example * import { csvRead } from 'iterparse' * for await (const item of csvRead({ filePath: 'path/to/file' })) { * console.log(item) * } * @category CSV */ function csvRead(options) { const { progressFrequency = 3000 } = options || {}; function iter() { return tslib_1.__asyncGenerator(this, arguments, function* iter_1() { const fileStats = fs_extra_1.statSync(options.filePath); const progress = new helpers_1.Progress(options.filePath, fileStats.size, Date.now()); const log = () => { var _a; (_a = options === null || options === void 0 ? void 0 : options.progress) === null || _a === void 0 ? void 0 : _a.call(options, progress); }; const logTh = P.throttle(log, progressFrequency); let obj = []; let done = false; const source = fs_extra_1.createReadStream(options.filePath); source.on('data', (q) => { if (q instanceof Buffer) { progress.add(q.byteLength); return; } progress.add(Buffer.from(q).byteLength); }); Papa.parse(source, Object.assign(Object.assign({ header: true, skipEmptyLines: true, dynamicTyping: true, transformHeader: (parsed) => parsed.trim().replace(/^"/, "").replace(/"$/, ""), transform: (value) => { return value.trim().replace(/^"/, "").replace(/"$/, ""); } }, options), { step: function (row) { obj.push(row); if (obj.length === 100) { source.pause(); } }, complete: function () { done = true; } })); while (!done || obj.length !== 0) { logTh(); const item = obj.shift(); if (item == null) { source.resume(); yield tslib_1.__await(ts_prime_1.delay(0)); continue; } yield yield tslib_1.__await(item); progress.addItem(); } log(); }); } return types_1.IX.from(iter()); } exports.csvRead = csvRead; function _csvWrite(data, options) { return tslib_1.__asyncGenerator(this, arguments, function* _csvWrite_1() { var e_1, _a; let chunk = 0; let dest = 0; let haveFile = null; if (options.mode === 'overwrite' && fs_extra_1.existsSync(options.filePath)) { fs_extra_1.unlinkSync(options.filePath); } const progress = new helpers_1.WriteProgress(options.filePath, Date.now()); const log = () => { var _a; (_a = options.progress) === null || _a === void 0 ? void 0 : _a.call(options, progress); }; const inter = setInterval(log, options.progressFrequency || 3000); if (fs_extra_1.existsSync(options.filePath)) { haveFile = true; } try { for (var _b = tslib_1.__asyncValues(types_1.IX.from(data).buffer(options.writeBuffer || 1000)), _c; _c = yield tslib_1.__await(_b.next()), !_c.done;) { const items = _c.value; if (dest === 0) { yield tslib_1.__await(fs_extra_1.ensureFile(options.filePath) // Accessing stream only when receiving first item. // This is convenient because. If stream have 0 items I will not create any file ); // Accessing stream only when receiving first item. // This is convenient because. If stream have 0 items I will not create any file dest = yield tslib_1.__await(fs_extra_1.open(options.filePath, 'a')); } const normalized = items.map((q) => { return ts_prime_1.mapRecord(q, ([k, v]) => { if (ts_prime_1.isArray(v) || ts_prime_1.isObject(v)) { return [k, JSON.stringify(v)]; } return [k, v]; }); }); const csv = Papa.unparse(normalized, Object.assign({ header: chunk === 0 && !haveFile }, options)); const buffer = Buffer.from(`${csv}\r\n`); yield tslib_1.__await(fs_extra_1.appendFile(dest, buffer, { encoding: 'utf8' })); progress.add(buffer.byteLength); for (const iv of items) { yield yield tslib_1.__await(iv); } progress.addItem(items.length); chunk++; } } catch (e_1_1) { e_1 = { error: e_1_1 }; } finally { try { if (_c && !_c.done && (_a = _b.return)) yield tslib_1.__await(_a.call(_b)); } finally { if (e_1) throw e_1.error; } } clearInterval(inter); log(); }); } function csvWrite() { return ts_prime_1.purry(_csvWrite, arguments); } exports.csvWrite = csvWrite;