iterparse
Version:
Delightful data parsing
141 lines (140 loc) • 5.87 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.csvWrite = exports.csvRead = void 0;
const tslib_1 = require("tslib");
const Papa = tslib_1.__importStar(require("papaparse"));
const fs_extra_1 = require("fs-extra");
const P = tslib_1.__importStar(require("ts-prime"));
const helpers_1 = require("./helpers");
const ts_prime_1 = require("ts-prime");
const types_1 = require("./types");
/**
* Read CSV file. In memory efficient way.
* @include ./CSVReadOptions.md
* @example
* import { csvRead } from 'iterparse'
* csvRead({ filePath: 'path/to/file' })
* .map((q)=> console.log(q))
* .count()
*
* @example
* import { csvRead } from 'iterparse'
* for await (const item of csvRead({ filePath: 'path/to/file' })) {
* console.log(item)
* }
* @category CSV
*/
function csvRead(options) {
const { progressFrequency = 3000 } = options || {};
function iter() {
return tslib_1.__asyncGenerator(this, arguments, function* iter_1() {
const fileStats = fs_extra_1.statSync(options.filePath);
const progress = new helpers_1.Progress(options.filePath, fileStats.size, Date.now());
const log = () => {
var _a;
(_a = options === null || options === void 0 ? void 0 : options.progress) === null || _a === void 0 ? void 0 : _a.call(options, progress);
};
const logTh = P.throttle(log, progressFrequency);
let obj = [];
let done = false;
const source = fs_extra_1.createReadStream(options.filePath);
source.on('data', (q) => {
if (q instanceof Buffer) {
progress.add(q.byteLength);
return;
}
progress.add(Buffer.from(q).byteLength);
});
Papa.parse(source, Object.assign(Object.assign({ header: true, skipEmptyLines: true, dynamicTyping: true, transformHeader: (parsed) => parsed.trim().replace(/^"/, "").replace(/"$/, ""), transform: (value) => {
return value.trim().replace(/^"/, "").replace(/"$/, "");
} }, options), { step: function (row) {
obj.push(row);
if (obj.length === 100) {
source.pause();
}
}, complete: function () {
done = true;
} }));
while (!done || obj.length !== 0) {
logTh();
const item = obj.shift();
if (item == null) {
source.resume();
yield tslib_1.__await(ts_prime_1.delay(0));
continue;
}
yield yield tslib_1.__await(item);
progress.addItem();
}
log();
});
}
return types_1.IX.from(iter());
}
exports.csvRead = csvRead;
function _csvWrite(data, options) {
return tslib_1.__asyncGenerator(this, arguments, function* _csvWrite_1() {
var e_1, _a;
let chunk = 0;
let dest = 0;
let haveFile = null;
if (options.mode === 'overwrite' && fs_extra_1.existsSync(options.filePath)) {
fs_extra_1.unlinkSync(options.filePath);
}
const progress = new helpers_1.WriteProgress(options.filePath, Date.now());
const log = () => {
var _a;
(_a = options.progress) === null || _a === void 0 ? void 0 : _a.call(options, progress);
};
const inter = setInterval(log, options.progressFrequency || 3000);
if (fs_extra_1.existsSync(options.filePath)) {
haveFile = true;
}
try {
for (var _b = tslib_1.__asyncValues(types_1.IX.from(data).buffer(options.writeBuffer || 1000)), _c; _c = yield tslib_1.__await(_b.next()), !_c.done;) {
const items = _c.value;
if (dest === 0) {
yield tslib_1.__await(fs_extra_1.ensureFile(options.filePath)
// Accessing stream only when receiving first item.
// This is convenient because. If stream have 0 items I will not create any file
);
// Accessing stream only when receiving first item.
// This is convenient because. If stream have 0 items I will not create any file
dest = yield tslib_1.__await(fs_extra_1.open(options.filePath, 'a'));
}
const normalized = items.map((q) => {
return ts_prime_1.mapRecord(q, ([k, v]) => {
if (ts_prime_1.isArray(v) || ts_prime_1.isObject(v)) {
return [k, JSON.stringify(v)];
}
return [k, v];
});
});
const csv = Papa.unparse(normalized, Object.assign({ header: chunk === 0 && !haveFile }, options));
const buffer = Buffer.from(`${csv}\r\n`);
yield tslib_1.__await(fs_extra_1.appendFile(dest, buffer, {
encoding: 'utf8'
}));
progress.add(buffer.byteLength);
for (const iv of items) {
yield yield tslib_1.__await(iv);
}
progress.addItem(items.length);
chunk++;
}
}
catch (e_1_1) { e_1 = { error: e_1_1 }; }
finally {
try {
if (_c && !_c.done && (_a = _b.return)) yield tslib_1.__await(_a.call(_b));
}
finally { if (e_1) throw e_1.error; }
}
clearInterval(inter);
log();
});
}
function csvWrite() {
return ts_prime_1.purry(_csvWrite, arguments);
}
exports.csvWrite = csvWrite;