iterparse
Version:
Delightful data parsing
139 lines (138 loc) • 5.24 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.jsonWrite = exports.jsonRead = void 0;
const tslib_1 = require("tslib");
const fs_extra_1 = require("fs-extra");
const helpers_1 = require("./helpers");
const P = tslib_1.__importStar(require("ts-prime"));
const ts_prime_1 = require("ts-prime");
const types_1 = require("./types");
const JSONStream = require('JSONStream');
function _jsonIterParser(options) {
return tslib_1.__asyncGenerator(this, arguments, function* _jsonIterParser_1() {
const pStream = fs_extra_1.createReadStream(options.filePath);
const size = fs_extra_1.statSync(options.filePath);
const progress = new helpers_1.Progress(options.filePath, size.size, Date.now());
const log = () => {
var _a;
(_a = options.progress) === null || _a === void 0 ? void 0 : _a.call(options, progress);
};
const logTh = P.throttle(log, options.progressFrequency || 3000);
pStream.on('data', (q) => {
if (q instanceof Buffer) {
progress.add(q.byteLength);
return;
}
progress.add(Buffer.from(q).byteLength);
});
const parser = JSONStream.parse(options.pattern);
pStream.pipe(parser);
let data = [];
let done = false;
parser.on(`data`, (obj) => {
data.push(obj);
if (data.length > 10) {
pStream.pause();
}
});
pStream.on('close', () => {
done = true;
});
pStream.on('end', () => {
done = true;
});
pStream.on('error', (err) => {
throw err;
});
while (!done || data.length > 0) {
logTh();
const d = data.shift();
if (!d) {
yield tslib_1.__await(P.delay(0));
pStream.resume();
continue;
}
yield yield tslib_1.__await(d);
progress.addItem(1);
}
log();
});
}
function _jsonWrite(data, args) {
function iter() {
return tslib_1.__asyncGenerator(this, arguments, function* iter_1() {
var e_1, _a;
const buffered = types_1.IX.from(data).buffer(1000);
let dest = 0;
const { mode = 'overwrite' } = args;
if (mode === 'overwrite') {
if (fs_extra_1.existsSync(args.filePath)) {
fs_extra_1.unlinkSync(args.filePath);
}
}
const progress = new helpers_1.WriteProgress(args.filePath, Date.now());
const log = () => {
var _a;
(_a = args.progress) === null || _a === void 0 ? void 0 : _a.call(args, progress);
};
const interval = setInterval(() => {
log();
}, args.progressFrequency || 3000);
try {
for (var buffered_1 = tslib_1.__asyncValues(buffered), buffered_1_1; buffered_1_1 = yield tslib_1.__await(buffered_1.next()), !buffered_1_1.done;) {
const item = buffered_1_1.value;
if (dest === 0) {
yield tslib_1.__await(fs_extra_1.ensureFile(args.filePath));
dest = yield tslib_1.__await(fs_extra_1.open(args.filePath, 'a'));
if (mode === 'overwrite') {
yield tslib_1.__await(fs_extra_1.appendFile(dest, `[\r\n`));
}
}
const buffer = Buffer.from(item.map((e) => `\t${JSON.stringify(e)},`).join("\r\n"));
progress.add(buffer.byteLength);
yield tslib_1.__await(fs_extra_1.appendFile(dest, buffer));
for (const iV of item) {
yield yield tslib_1.__await(iV);
}
progress.addItem(item.length);
}
}
catch (e_1_1) { e_1 = { error: e_1_1 }; }
finally {
try {
if (buffered_1_1 && !buffered_1_1.done && (_a = buffered_1.return)) yield tslib_1.__await(_a.call(buffered_1));
}
finally { if (e_1) throw e_1.error; }
}
if (mode === 'overwrite') {
yield tslib_1.__await(fs_extra_1.appendFile(dest, `]`));
}
log();
clearInterval(interval);
});
}
return types_1.IX.from(iter());
}
/**
* Function will read big JSON files in memory efficient way.
* @include ./JSONReadOptions.md
* @example
* import { jsonRead } from 'iterparse'
* jsonRead({ filePath: "path/to/file.json" })
* .map((q)=> console.log(q))
* .count()
* @example
* import { jsonRead } from 'iterparse'
* for await (const item of jsonRead({ filePath: "path/to/file.json" })) {
* console.log(item)
* }
* @category JSON
*/
function jsonRead(options) {
return types_1.IX.from(_jsonIterParser(options));
}
exports.jsonRead = jsonRead;
function jsonWrite() {
return ts_prime_1.purry(_jsonWrite, arguments);
}
exports.jsonWrite = jsonWrite;