iterparse
Version:
Delightful data parsing
180 lines (179 loc) • 8.03 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.xmlRead = exports.xmlWrite = void 0;
const tslib_1 = require("tslib");
const helpers_1 = require("./helpers");
const ix_1 = require("ix");
const buffer_1 = require("./buffer");
const Parser = tslib_1.__importStar(require("fast-xml-parser"));
const ts_prime_1 = require("ts-prime");
const he = tslib_1.__importStar(require("he"));
const fs_extra_1 = require("fs-extra");
const types_1 = require("./types");
function _xmlWrite(data, options) {
return types_1.IX.defer(async () => {
const mode = options.mode || 'overwrite';
let dest = 0;
function iter() {
return tslib_1.__asyncGenerator(this, arguments, function* iter_1() {
var e_1, _a;
const progress = new helpers_1.WriteProgress(options.filePath, Date.now());
const defaultOptions = {
attributeNamePrefix: "",
attrNodeName: "_",
textNodeName: "#text",
ignoreAttributes: false,
cdataTagName: "__cdata",
cdataPositionChar: "\\c",
format: false,
indentBy: " ",
supressEmptyNode: false,
tagValueProcessor: a => {
if (ts_prime_1.isString(a))
return he.encode(a, { useNamedReferences: true });
return a;
},
attrValueProcessor: a => {
if (!ts_prime_1.isString(a))
return a;
return he.encode(a, { useNamedReferences: true }); // default is a=>a
}
};
const parser = new Parser.j2xParser(Object.assign(Object.assign({}, defaultOptions), options));
const items = types_1.IX.from(data);
const log = () => {
var _a;
(_a = options.progress) === null || _a === void 0 ? void 0 : _a.call(options, progress);
};
const inter = setInterval(log, options.progressFrequency || 3000);
try {
for (var items_1 = tslib_1.__asyncValues(items), items_1_1; items_1_1 = yield tslib_1.__await(items_1.next()), !items_1_1.done;) {
const buff = items_1_1.value;
if (dest === 0) {
yield tslib_1.__await(fs_extra_1.ensureFile(options.filePath));
dest = yield tslib_1.__await(fs_extra_1.open(options.filePath, "a"));
if (mode === 'overwrite') {
yield tslib_1.__await(fs_extra_1.appendFile(dest, "<root>\r\n"));
}
}
const result = parser.parse({ [options.nodeName]: buff });
const buffer = Buffer.from(`${result}\r\n`);
progress.add(buffer.byteLength);
yield tslib_1.__await(fs_extra_1.appendFile(dest, buffer));
progress.addItem();
yield yield tslib_1.__await(buff);
}
}
catch (e_1_1) { e_1 = { error: e_1_1 }; }
finally {
try {
if (items_1_1 && !items_1_1.done && (_a = items_1.return)) yield tslib_1.__await(_a.call(items_1));
}
finally { if (e_1) throw e_1.error; }
}
if (mode === 'overwrite') {
yield tslib_1.__await(fs_extra_1.appendFile(dest, "</root>"));
}
clearInterval(inter);
log();
});
}
return types_1.IX.from(iter());
});
}
function xmlWrite() {
return ts_prime_1.purry(_xmlWrite, arguments);
}
exports.xmlWrite = xmlWrite;
/**
* Function read xml from file in memory efficient way
* This parser are able to handled `unlimited` size xml files in memory efficient manner.
*
* @includes ./xml-read.md
* @example
* import { xmlRead } from 'iterparse'
* xmlRead({ filePath: "./path/to/file.xml" })
* .map((q)=> console.log(q))
* .count()
* @example
* import { xmlRead } from 'iterparse'
* for await (const item of xmlRead({ filePath: "./path/to/file.xml" })) {
* console.log(item)
* }
* @category XML
*/
function xmlRead(options) {
let last = '';
const defaultOptions = {
attributeNamePrefix: "",
attrNodeName: "_",
textNodeName: "#text",
ignoreAttributes: false,
ignoreNameSpace: false,
allowBooleanAttributes: false,
parseNodeValue: true,
parseAttributeValue: false,
trimValues: true,
cdataTagName: "__cdata",
cdataPositionChar: "\\c",
parseTrueNumberOnly: false,
arrayMode: false,
};
let count = 0;
const elMatch = new RegExp(`<${options.nodeName}( ?.*)?>`, 'gm');
function iter() {
var _a;
return tslib_1.__asyncGenerator(this, arguments, function* iter_2() {
var e_2, _b;
try {
for (var _c = tslib_1.__asyncValues(buffer_1.bufferRead(Object.assign(Object.assign({}, options), { progress: (q) => {
var _a;
q.set({
items: count
});
(_a = options.progress) === null || _a === void 0 ? void 0 : _a.call(options, q);
} }))), _d; _d = yield tslib_1.__await(_c.next()), !_d.done;) {
const buffer = _d.value;
const full = `${last}${buffer.toString(options.encoding)}`;
if (!full.match(elMatch)) {
throw new Error(`Failed to find node: ${options.nodeName}`);
}
const beef = full.replace(elMatch, `!@###@!<${options.nodeName}>`).split(`!@###@!`);
last = beef.pop() || '';
for (const qwe of beef) {
if (!qwe.includes(`<${options.nodeName}`)) {
continue;
}
const parsedResult = Parser.parse(qwe, Object.assign(Object.assign({}, defaultOptions), options))[options.nodeName];
if (parsedResult == null)
continue;
yield yield tslib_1.__await(parsedResult);
count++;
}
}
}
catch (e_2_1) { e_2 = { error: e_2_1 }; }
finally {
try {
if (_d && !_d.done && (_b = _c.return)) yield tslib_1.__await(_b.call(_c));
}
finally { if (e_2) throw e_2.error; }
}
const last_chunk = last.replace(elMatch, `!@###@!<${options.nodeName}`).split(`!@###@!`);
const lastOfLast = last_chunk.pop() || '';
for (const qwe of last_chunk) {
if (!qwe.includes(`<${options.nodeName}`)) {
continue;
}
yield yield tslib_1.__await(Parser.parse(qwe, Object.assign(Object.assign({}, defaultOptions), options))[options.nodeName]);
count++;
}
const lastItem = ((_a = lastOfLast.replace(`</${options.nodeName}>`, `</${options.nodeName}>!@###@!`).split(`!@###@!`)) === null || _a === void 0 ? void 0 : _a[0]) || '';
if (lastItem.includes(`<${options.nodeName}`)) {
yield yield tslib_1.__await(Parser.parse(lastItem, Object.assign(Object.assign({}, defaultOptions), options))[options.nodeName]);
}
});
}
return ix_1.AsyncIterable.from(iter());
}
exports.xmlRead = xmlRead;