UNPKG

@drorgl/xml-streamer

Version:

XML stream parser for parsing large files efficiently with less usage of memory.

311 lines 10.6 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); const lodash_1 = __importDefault(require("lodash")); const stream_1 = __importDefault(require("stream")); const ltx_1 = require("./ltx"); const parserState_1 = require("./parserState"); const defaults = { resourcePath: "", emitOnNodeName: false, attrsKey: "$", textKey: "_", explicitArray: true, verbatimText: false, preserveWhitespace: false }; class XmlParser extends stream_1.default.Transform { constructor(opts) { super(); this.opts = lodash_1.default.defaults(opts, defaults); this.parserState = new parserState_1.ParserState(); this.parser = new ltx_1.SaxLtx(); this._readableState.objectMode = true; } _flush(callback) { this.processChunk(""); callback(); } _transform(chunk, encoding, callback) { if (encoding !== "buffer") { this.emit("error", new Error("unsupported encoding")); } this.processChunk(chunk); callback(); } parse(chunk, cb) { const parser = this.parser; const state = this.parserState; let error; if (state.isRootNode) { this.checkForInterestedNodeListeners(); registerEvents.call(this); } this.on("error", (err) => { error = err; }); if (chunk.length === 0) { parser.end(); this.emit("end"); this.removeAllListeners(); } parser.write(chunk); if (error) { return cb(error); } const result = []; while (this._readableState.buffer.length > 0) { result.push(this._readableState.buffer.consume()); } return cb(null, result); } processChunk(chunk) { const parser = this.parser; const state = this.parserState; if (state.isRootNode) { this.checkForInterestedNodeListeners(); registerEvents.call(this); } parser.write(chunk); } checkForInterestedNodeListeners() { const ignore = ["end", "prefinish", "data", "error"]; const eventNames = Object.keys(this._events); // tslint:disable-next-line:prefer-for-of for (let i = 0; i < eventNames.length; i++) { if (lodash_1.default.includes(ignore, eventNames[i], 0)) { continue; } this.parserState.interestedNodes.push(eventNames[i]); } } } exports.XmlParser = XmlParser; function registerEvents() { const scope = this; const parser = this.parser; const state = this.parserState; let lastIndex; const resourcePath = this.opts.resourcePath; const attrsKey = this.opts.attrsKey; const textKey = this.opts.textKey; const interestedNodes = state.interestedNodes; const explicitArray = this.opts.explicitArray; const verbatimText = this.opts.verbatimText; const preserveWhitespace = this.opts.preserveWhitespace; parser.on("startElement", (name, attrs) => { if (state.isRootNode) { state.isRootNode = false; } state.currentPath = state.currentPath + "/" + name; checkForResourcePath(name); if (state.isPathfound) { processStartElement(name, attrs); } }); parser.on("endElement", (name) => { state.lastEndedNode = name; lastIndex = state.currentPath.lastIndexOf("/" + name); if (state.currentPath.substring(lastIndex + 1).indexOf("/") !== -1) { processError.call(this, `mismatched tag`); } state.currentPath = state.currentPath.substring(0, lastIndex); if (state.isPathfound) { processEndElement(name); } checkForResourcePath(name); }); parser.on("text", (text) => { if (state.isPathfound) { processText(text); } }); parser.on("error", function (err) { processError.call(this, err); }); function processStartElement(name, attrs) { if (!name) { return; } const obj = {}; if (attrs && !lodash_1.default.isEmpty(attrs)) { obj[attrsKey] = attrs; } let tempObj = state.object; const path = getRelativePath( /*name*/); if (!path) { if (attrs && !lodash_1.default.isEmpty(attrs)) { state.object[attrsKey] = attrs; } return; } const tokens = path.split("."); for (let i = 0; i < tokens.length; i++) { if (tempObj[tokens[i]] && !(explicitArray === false && i === tokens.length - 1)) { tempObj = tempObj[tokens[i]]; } else { // if explicitArray is true then create each node as array // irrespective of how many nodes are there with same name. tempObj[tokens[i]] = explicitArray ? [] : obj; tempObj = tempObj[tokens[i]]; } if (Array.isArray(tempObj) && i !== tokens.length - 1) { tempObj = tempObj[tempObj.length - 1]; } } if (Array.isArray(tempObj)) { tempObj.push(obj); } } function processEndElement(name) { if (resourcePath) { const index = resourcePath.lastIndexOf("/"); const rpath = resourcePath.substring(0, index); if (rpath === state.currentPath) { scope.push(state.object); if (scope.opts.emitOnNodeName) { scope.emit(name, state.object); } state.object = {}; } } else { if (lodash_1.default.includes(interestedNodes, name, 0)) { emitInterestedNode(name); if (state.firstFoundNode === name) { state.object = {}; state.firstFoundNode = ""; state.isPathfound = false; } } } } function emitInterestedNode(name) { let index; let xpath; let pathTokens; xpath = state.currentPath.substring(1); pathTokens = xpath.split("/"); pathTokens.push(name); index = pathTokens.indexOf(state.firstFoundNode); pathTokens = lodash_1.default.drop(pathTokens, index + 1); let tempObj = state.object; // tslint:disable-next-line:prefer-for-of for (let i = 0; i < pathTokens.length; i++) { tempObj = tempObj[pathTokens[i]]; } if (Array.isArray(tempObj)) { tempObj = tempObj[tempObj.length - 1]; } scope.emit(name, tempObj); scope.push(tempObj); } function processText(text) { if ((!text) || ((!verbatimText) && !/\S/.test(text))) { return; } const path = getRelativePath(); let tempObj = state.object; if (!path) { if (!state.object[textKey]) { state.object[textKey] = ""; } state.object[textKey] = state.object[textKey] + text; if ((!preserveWhitespace)) { state.object[textKey] = state.object[textKey].replace(/\s+/g, " ").trim(); } return; } const tokens = path.split("."); for (let i = 0; i < tokens.length; i++) { if (tempObj[tokens[i]]) { tempObj = tempObj[tokens[i]]; } else { tempObj[tokens[i]] = explicitArray ? [] : {}; tempObj = tempObj[tokens[i]]; } if (Array.isArray(tempObj) && i !== tokens.length - 1) { tempObj = tempObj[tempObj.length - 1]; } } if (Array.isArray(tempObj)) { const obj = tempObj[tempObj.length - 1]; if (!obj[textKey]) { obj[textKey] = ""; } obj[textKey] = obj[textKey] + text; if ((!preserveWhitespace)) { obj[textKey] = obj[textKey].replace(/\s+/g, " ").trim(); } } else { if (!tempObj[textKey]) { tempObj[textKey] = ""; } tempObj[textKey] = tempObj[textKey] + text; if ((!preserveWhitespace)) { tempObj[textKey] = tempObj[textKey].replace(/\s+/g, " ").trim(); } } } function checkForResourcePath(name) { if (resourcePath) { if (state.currentPath.indexOf(resourcePath) === 0) { state.isPathfound = true; } else { state.isPathfound = false; } } else { if (lodash_1.default.includes(interestedNodes, name, 0)) { state.isPathfound = true; if (!state.firstFoundNode) { state.firstFoundNode = name; } } } } function getRelativePath() { let tokens; let jsonPath; let index; if (resourcePath) { let xpath = state.currentPath.substring(resourcePath.length); if (!xpath) { return; } if (xpath[0] === "/") { xpath = xpath.substring(1); } tokens = xpath.split("/"); jsonPath = tokens.join("."); } else { const xpath = state.currentPath.substring(1); tokens = xpath.split("/"); index = tokens.indexOf(state.firstFoundNode); tokens = lodash_1.default.drop(tokens, index + 1); jsonPath = tokens.join("."); } return jsonPath; } } function processError(err) { const parser = this.parser; let error = null; if (err) { error = err; } else { error = parser.getError(); } error = new Error(`${error} at line no: ${parser.getCurrentLineNumber()}`); this.emit("error", error); return error; } //# sourceMappingURL=parser.js.map