@drorgl/xml-streamer
Version:
XML stream parser for parsing large files efficiently with less usage of memory.
311 lines • 10.6 kB
JavaScript
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const lodash_1 = __importDefault(require("lodash"));
const stream_1 = __importDefault(require("stream"));
const ltx_1 = require("./ltx");
const parserState_1 = require("./parserState");
const defaults = {
resourcePath: "",
emitOnNodeName: false,
attrsKey: "$",
textKey: "_",
explicitArray: true,
verbatimText: false,
preserveWhitespace: false
};
class XmlParser extends stream_1.default.Transform {
constructor(opts) {
super();
this.opts = lodash_1.default.defaults(opts, defaults);
this.parserState = new parserState_1.ParserState();
this.parser = new ltx_1.SaxLtx();
this._readableState.objectMode = true;
}
_flush(callback) {
this.processChunk("");
callback();
}
_transform(chunk, encoding, callback) {
if (encoding !== "buffer") {
this.emit("error", new Error("unsupported encoding"));
}
this.processChunk(chunk);
callback();
}
parse(chunk, cb) {
const parser = this.parser;
const state = this.parserState;
let error;
if (state.isRootNode) {
this.checkForInterestedNodeListeners();
registerEvents.call(this);
}
this.on("error", (err) => {
error = err;
});
if (chunk.length === 0) {
parser.end();
this.emit("end");
this.removeAllListeners();
}
parser.write(chunk);
if (error) {
return cb(error);
}
const result = [];
while (this._readableState.buffer.length > 0) {
result.push(this._readableState.buffer.consume());
}
return cb(null, result);
}
processChunk(chunk) {
const parser = this.parser;
const state = this.parserState;
if (state.isRootNode) {
this.checkForInterestedNodeListeners();
registerEvents.call(this);
}
parser.write(chunk);
}
checkForInterestedNodeListeners() {
const ignore = ["end", "prefinish", "data", "error"];
const eventNames = Object.keys(this._events);
// tslint:disable-next-line:prefer-for-of
for (let i = 0; i < eventNames.length; i++) {
if (lodash_1.default.includes(ignore, eventNames[i], 0)) {
continue;
}
this.parserState.interestedNodes.push(eventNames[i]);
}
}
}
exports.XmlParser = XmlParser;
function registerEvents() {
const scope = this;
const parser = this.parser;
const state = this.parserState;
let lastIndex;
const resourcePath = this.opts.resourcePath;
const attrsKey = this.opts.attrsKey;
const textKey = this.opts.textKey;
const interestedNodes = state.interestedNodes;
const explicitArray = this.opts.explicitArray;
const verbatimText = this.opts.verbatimText;
const preserveWhitespace = this.opts.preserveWhitespace;
parser.on("startElement", (name, attrs) => {
if (state.isRootNode) {
state.isRootNode = false;
}
state.currentPath = state.currentPath + "/" + name;
checkForResourcePath(name);
if (state.isPathfound) {
processStartElement(name, attrs);
}
});
parser.on("endElement", (name) => {
state.lastEndedNode = name;
lastIndex = state.currentPath.lastIndexOf("/" + name);
if (state.currentPath.substring(lastIndex + 1).indexOf("/") !== -1) {
processError.call(this, `mismatched tag`);
}
state.currentPath = state.currentPath.substring(0, lastIndex);
if (state.isPathfound) {
processEndElement(name);
}
checkForResourcePath(name);
});
parser.on("text", (text) => {
if (state.isPathfound) {
processText(text);
}
});
parser.on("error", function (err) {
processError.call(this, err);
});
function processStartElement(name, attrs) {
if (!name) {
return;
}
const obj = {};
if (attrs && !lodash_1.default.isEmpty(attrs)) {
obj[attrsKey] = attrs;
}
let tempObj = state.object;
const path = getRelativePath( /*name*/);
if (!path) {
if (attrs && !lodash_1.default.isEmpty(attrs)) {
state.object[attrsKey] = attrs;
}
return;
}
const tokens = path.split(".");
for (let i = 0; i < tokens.length; i++) {
if (tempObj[tokens[i]] && !(explicitArray === false && i === tokens.length - 1)) {
tempObj = tempObj[tokens[i]];
}
else {
// if explicitArray is true then create each node as array
// irrespective of how many nodes are there with same name.
tempObj[tokens[i]] = explicitArray ? [] : obj;
tempObj = tempObj[tokens[i]];
}
if (Array.isArray(tempObj) && i !== tokens.length - 1) {
tempObj = tempObj[tempObj.length - 1];
}
}
if (Array.isArray(tempObj)) {
tempObj.push(obj);
}
}
function processEndElement(name) {
if (resourcePath) {
const index = resourcePath.lastIndexOf("/");
const rpath = resourcePath.substring(0, index);
if (rpath === state.currentPath) {
scope.push(state.object);
if (scope.opts.emitOnNodeName) {
scope.emit(name, state.object);
}
state.object = {};
}
}
else {
if (lodash_1.default.includes(interestedNodes, name, 0)) {
emitInterestedNode(name);
if (state.firstFoundNode === name) {
state.object = {};
state.firstFoundNode = "";
state.isPathfound = false;
}
}
}
}
function emitInterestedNode(name) {
let index;
let xpath;
let pathTokens;
xpath = state.currentPath.substring(1);
pathTokens = xpath.split("/");
pathTokens.push(name);
index = pathTokens.indexOf(state.firstFoundNode);
pathTokens = lodash_1.default.drop(pathTokens, index + 1);
let tempObj = state.object;
// tslint:disable-next-line:prefer-for-of
for (let i = 0; i < pathTokens.length; i++) {
tempObj = tempObj[pathTokens[i]];
}
if (Array.isArray(tempObj)) {
tempObj = tempObj[tempObj.length - 1];
}
scope.emit(name, tempObj);
scope.push(tempObj);
}
function processText(text) {
if ((!text) || ((!verbatimText) && !/\S/.test(text))) {
return;
}
const path = getRelativePath();
let tempObj = state.object;
if (!path) {
if (!state.object[textKey]) {
state.object[textKey] = "";
}
state.object[textKey] = state.object[textKey] + text;
if ((!preserveWhitespace)) {
state.object[textKey] = state.object[textKey].replace(/\s+/g, " ").trim();
}
return;
}
const tokens = path.split(".");
for (let i = 0; i < tokens.length; i++) {
if (tempObj[tokens[i]]) {
tempObj = tempObj[tokens[i]];
}
else {
tempObj[tokens[i]] = explicitArray ? [] : {};
tempObj = tempObj[tokens[i]];
}
if (Array.isArray(tempObj) && i !== tokens.length - 1) {
tempObj = tempObj[tempObj.length - 1];
}
}
if (Array.isArray(tempObj)) {
const obj = tempObj[tempObj.length - 1];
if (!obj[textKey]) {
obj[textKey] = "";
}
obj[textKey] = obj[textKey] + text;
if ((!preserveWhitespace)) {
obj[textKey] = obj[textKey].replace(/\s+/g, " ").trim();
}
}
else {
if (!tempObj[textKey]) {
tempObj[textKey] = "";
}
tempObj[textKey] = tempObj[textKey] + text;
if ((!preserveWhitespace)) {
tempObj[textKey] = tempObj[textKey].replace(/\s+/g, " ").trim();
}
}
}
function checkForResourcePath(name) {
if (resourcePath) {
if (state.currentPath.indexOf(resourcePath) === 0) {
state.isPathfound = true;
}
else {
state.isPathfound = false;
}
}
else {
if (lodash_1.default.includes(interestedNodes, name, 0)) {
state.isPathfound = true;
if (!state.firstFoundNode) {
state.firstFoundNode = name;
}
}
}
}
function getRelativePath() {
let tokens;
let jsonPath;
let index;
if (resourcePath) {
let xpath = state.currentPath.substring(resourcePath.length);
if (!xpath) {
return;
}
if (xpath[0] === "/") {
xpath = xpath.substring(1);
}
tokens = xpath.split("/");
jsonPath = tokens.join(".");
}
else {
const xpath = state.currentPath.substring(1);
tokens = xpath.split("/");
index = tokens.indexOf(state.firstFoundNode);
tokens = lodash_1.default.drop(tokens, index + 1);
jsonPath = tokens.join(".");
}
return jsonPath;
}
}
function processError(err) {
const parser = this.parser;
let error = null;
if (err) {
error = err;
}
else {
error = parser.getError();
}
error = new Error(`${error} at line no: ${parser.getCurrentLineNumber()}`);
this.emit("error", error);
return error;
}
//# sourceMappingURL=parser.js.map