node-xml-stream-parser
Version:
A fast XML parser using streams.
317 lines (284 loc) • 9.89 kB
JavaScript
var _typeof = typeof Symbol === "function" && typeof Symbol.iterator === "symbol" ? function (obj) { return typeof obj; } : function (obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; };
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.EVENTS = undefined;
var _createClass = function () {
function defineProperties(target, props) {
for (var i = 0; i < props.length; i++) {
var descriptor = props[i];
descriptor.enumerable = descriptor.enumerable || false;
descriptor.configurable = true;
if ("value" in descriptor) descriptor.writable = true;
Object.defineProperty(target, descriptor.key, descriptor);
}
}
return function (Constructor, protoProps, staticProps) {
if (protoProps) defineProperties(Constructor.prototype, protoProps);
if (staticProps) defineProperties(Constructor, staticProps);
return Constructor;
};
}();
var _stream = require("stream");
function _classCallCheck(instance, Constructor) {
if (!(instance instanceof Constructor)) {
throw new TypeError("Cannot call a class as a function");
}
}
function _possibleConstructorReturn(self, call) {
if (!self) {
throw new ReferenceError("this hasn't been initialised - super() hasn't been called");
}
return call && ((typeof call === "undefined" ? "undefined" : _typeof(call)) === "object" || typeof call === "function") ? call : self;
}
function _inherits(subClass, superClass) {
if (typeof superClass !== "function" && superClass !== null) {
throw new TypeError("Super expression must either be null or a function, not " + (typeof superClass === "undefined" ? "undefined" : _typeof(superClass)));
}
subClass.prototype = Object.create(superClass && superClass.prototype, {
constructor: {
value: subClass,
enumerable: false,
writable: true,
configurable: true
}
});
if (superClass) Object.setPrototypeOf ? Object.setPrototypeOf(subClass, superClass) : subClass.__proto__ = superClass;
}
/**
* A fast XML parser for NodeJS using Writable streams.
*
* What this is:
* Simple and fast XML parser purley written for NodeJS. No extra production dependencies.
* A handy way parse ATOM/RSS/RDF feeds and such. No validation is made on the document that is parsed.
*
* Motivation
* There is already quite a few parsers out there. I just wanted a parser that was as tiny and fast as possible to handle easy parsing of
* RSS/ATOM/RDF feeds using streams, no fancy stuff needed. If you want more functionality you should check out other recommended parsers (see below)
*
* Usage
* Just #pipe() a <stream.Readable> and you are ready to listen for events.
* You can also use the #write() method to write directly to the parser.
*
* The source is written using ES2015, babel is used to translate to the dist.
*
* Other recommended parsers for node that are great:
* https://github.com/isaacs/sax-js
* https://github.com/xmppjs/ltx
*
* Events:
* - text
* - instruction
* - opentag
* - closetag
* - cdata
*
* Comments are ignored, so there is no events for them.
*
*/
var Parser = function (_Writable) {
_inherits(Parser, _Writable);
function Parser() {
_classCallCheck(this, Parser);
var _this = _possibleConstructorReturn(this, (Parser.__proto__ || Object.getPrototypeOf(Parser)).call(this));
_this.state = STATE.TEXT;
_this.buffer = "";
_this.pos = 0;
_this.tagType = TAG_TYPE.NONE;
return _this;
}
_createClass(Parser, [{
key: "_write",
value: function _write(chunk, encoding, done) {
chunk = typeof chunk !== "string" ? chunk.toString() : chunk;
for (var i = 0; i < chunk.length; i++) {
var c = chunk[i];
var prev = this.buffer[this.pos - 1];
this.buffer += c;
this.pos++;
switch (this.state) {
case STATE.TEXT:
if (c === "<") this._onStartNewTag();
break;
case STATE.TAG_NAME:
if (prev === "<" && c === "?") {
this._onStartInstruction();
}
if (prev === "<" && c === "/") {
this._onCloseTagStart();
}
if (this.buffer[this.pos - 3] === "<" && prev === "!" && c === "[") {
this._onCDATAStart();
}
if (this.buffer[this.pos - 3] === "<" && prev === "!" && c === "-") {
this._onCommentStart();
}
if (c === ">") {
if (prev === "/") {
this.tagType = TAG_TYPE.SELF_CLOSING;
}
this._onTagCompleted();
}
break;
case STATE.INSTRUCTION:
if (prev === "?" && c === ">") this._onEndInstruction();
break;
case STATE.CDATA:
if (this.buffer[this.pos - 3] === "]" && prev === "]" && c === ">") this._onCDATAEnd();
break;
case STATE.IGNORE_COMMENT:
if (this.buffer[this.pos - 3] === "-" && prev === "-" && c === ">") this._onCommentEnd();
break;
}
}
done();
}
}, {
key: "_endRecording",
value: function _endRecording() {
var rec = this.buffer.slice(1, this.pos - 1);
this.buffer = this.buffer.slice(-1); // Keep last item in buffer for prev comparison in main loop.
this.pos = 1; // Reset the position (since the buffer was reset)
return rec;
}
}, {
key: "_onStartNewTag",
value: function _onStartNewTag() {
var text = this._endRecording().trim();
if (text) {
this.emit(EVENTS.TEXT, text);
}
this.state = STATE.TAG_NAME;
this.tagType = TAG_TYPE.OPENING;
}
}, {
key: "_onTagCompleted",
value: function _onTagCompleted() {
var tag = this._endRecording();
var _parseTagString2 = this._parseTagString(tag),
name = _parseTagString2.name,
attributes = _parseTagString2.attributes;
if (name === null) {
this.emit(EVENTS.ERROR, new Error("Failed to parse name for tag" + tag));
}
if (this.tagType && this.tagType == TAG_TYPE.OPENING) {
this.emit(EVENTS.OPEN_TAG, name, attributes);
}
if (this.tagType && this.tagType === TAG_TYPE.CLOSING) {
this.emit(EVENTS.CLOSE_TAG, name, attributes);
}
if (this.tagType && this.tagType === TAG_TYPE.SELF_CLOSING) {
if (Object.keys(attributes).length === 0 && attributes.constructor === Object) {
attributes = { ___selfClosing___: true };
}
this.emit(EVENTS.OPEN_TAG, name, attributes);
this.emit(EVENTS.CLOSE_TAG, name, attributes);
}
this.state = STATE.TEXT;
this.tagType = TAG_TYPE.NONE;
}
}, {
key: "_onCloseTagStart",
value: function _onCloseTagStart() {
this._endRecording();
this.tagType = TAG_TYPE.CLOSING;
}
}, {
key: "_onStartInstruction",
value: function _onStartInstruction() {
this._endRecording();
this.state = STATE.INSTRUCTION;
}
}, {
key: "_onEndInstruction",
value: function _onEndInstruction() {
this.pos -= 1; // Move position back 1 step since instruction ends with '?>'
var inst = this._endRecording();
var _parseTagString3 = this._parseTagString(inst),
name = _parseTagString3.name,
attributes = _parseTagString3.attributes;
if (name === null) {
this.emit(EVENTS.ERROR, new Error("Failed to parse name for inst" + inst));
}
this.emit(EVENTS.INSTRUCTION, name, attributes);
this.state = STATE.TEXT;
}
}, {
key: "_onCDATAStart",
value: function _onCDATAStart() {
this._endRecording();
this.state = STATE.CDATA;
}
}, {
key: "_onCDATAEnd",
value: function _onCDATAEnd() {
var text = this._endRecording(); // Will return CDATA[XXX] we regexp out the actual text in the CDATA.
text = text.slice(text.indexOf("[") + 1, text.lastIndexOf("]>") - 1);
this.state = STATE.TEXT;
this.emit(EVENTS.CDATA, text);
}
}, {
key: "_onCommentStart",
value: function _onCommentStart() {
this.state = STATE.IGNORE_COMMENT;
}
}, {
key: "_onCommentEnd",
value: function _onCommentEnd() {
this._endRecording();
this.state = STATE.TEXT;
}
/**
* Helper to parse a tag string 'xml version="2.0" encoding="utf-8"' with regexp.
* @param {string} str the tag string.
* @return {object} {name, attributes}
*/
}, {
key: "_parseTagString",
value: function _parseTagString(str) {
// parse name
var name = void 0;
var parsedString = /^([a-zäöüßÄÖÜA-Z0-9:_\-.\/]+?)(\s|$)/.exec(str);
if (parsedString && parsedString.length > 0) {
name = parsedString[1];
var attributesString = str.substr(name.length);
var attributeRegexp = /([a-zäöüßÄÖÜA-Z0-9:_\-.]+?)="([^"]+?)"/g;
var match = attributeRegexp.exec(attributesString);
var attributes = {};
while (match != null) {
attributes[match[1]] = match[2];
match = attributeRegexp.exec(attributesString);
}
if (name[name.length - 1] === "/") {
name = name.substr(0, name.length - 1);
}
return { name: name, attributes: attributes };
}
return { name: null, attributes: {} };
}
}]);
return Parser;
}(_stream.Writable);
exports.default = Parser;
var STATE = {
TEXT: 0,
TAG_NAME: 1,
INSTRUCTION: 2,
IGNORE_COMMENT: 4,
CDATA: 8
};
var TAG_TYPE = {
NONE: 0,
OPENING: 1,
CLOSING: 2,
SELF_CLOSING: 3
};
var EVENTS = exports.EVENTS = {
ERROR: "error",
TEXT: "text",
INSTRUCTION: "instruction",
OPEN_TAG: "opentag",
CLOSE_TAG: "closetag",
CDATA: "cdata"
};
;