node-xml-stream
Version:
A fast XML parser using streams.
242 lines (207 loc) • 8.38 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.EVENTS = undefined;
var _slicedToArray = function () { function sliceIterator(arr, i) { var _arr = []; var _n = true; var _d = false; var _e = undefined; try { for (var _i = arr[Symbol.iterator](), _s; !(_n = (_s = _i.next()).done); _n = true) { _arr.push(_s.value); if (i && _arr.length === i) break; } } catch (err) { _d = true; _e = err; } finally { try { if (!_n && _i["return"]) _i["return"](); } finally { if (_d) throw _e; } } return _arr; } return function (arr, i) { if (Array.isArray(arr)) { return arr; } else if (Symbol.iterator in Object(arr)) { return sliceIterator(arr, i); } else { throw new TypeError("Invalid attempt to destructure non-iterable instance"); } }; }();
var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }();
var _stream = require('stream');
function _toArray(arr) { return Array.isArray(arr) ? arr : Array.from(arr); }
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
function _possibleConstructorReturn(self, call) { if (!self) { throw new ReferenceError("this hasn't been initialised - super() hasn't been called"); } return call && (typeof call === "object" || typeof call === "function") ? call : self; }
function _inherits(subClass, superClass) { if (typeof superClass !== "function" && superClass !== null) { throw new TypeError("Super expression must either be null or a function, not " + typeof superClass); } subClass.prototype = Object.create(superClass && superClass.prototype, { constructor: { value: subClass, enumerable: false, writable: true, configurable: true } }); if (superClass) Object.setPrototypeOf ? Object.setPrototypeOf(subClass, superClass) : subClass.__proto__ = superClass; }
/**
* A fast XML parser for NodeJS using Writable streams.
*
* What this is:
* Simple and fast XML parser purley written for NodeJS. No extra production dependencies.
* A handy way parse ATOM/RSS/RDF feeds and such. No validation is made on the document that is parsed.
*
* Motivation
* There is already quite a few parsers out there. I just wanted a parser that was as tiny and fast as possible to handle easy parsing of
* RSS/ATOM/RDF feeds using streams, no fancy stuff needed. If you want more functionality you should check out other recommended parsers (see below)
*
* Usage
* Just #pipe() a <stream.Readable> and you are ready to listen for events.
* You can also use the #write() method to write directly to the parser.
*
* The source is written using ES2015, babel is used to translate to the dist.
*
* Other recommended parsers for node that are great:
* https://github.com/isaacs/sax-js
* https://github.com/xmppjs/ltx
*
* Events:
* - text
* - instruction
* - opentag
* - closetag
* - cdata
*
* Comments are ignored, so there is no events for them.
*
*/
var Parser = function (_Writable) {
_inherits(Parser, _Writable);
function Parser() {
_classCallCheck(this, Parser);
var _this = _possibleConstructorReturn(this, (Parser.__proto__ || Object.getPrototypeOf(Parser)).call(this));
_this.state = STATE.TEXT;
_this.buffer = '';
_this.pos = 0;
_this.isCloseTag = false;
return _this;
}
_createClass(Parser, [{
key: '_write',
value: function _write(chunk, encoding, done) {
chunk = typeof chunk !== 'string' ? chunk.toString() : chunk;
for (var i = 0; i < chunk.length; i++) {
var c = chunk[i];
var prev = this.buffer[this.pos - 1];
this.buffer += c;
this.pos++;
switch (this.state) {
case STATE.TEXT:
if (c === '<') this._onStartNewTag();
break;
case STATE.TAG_NAME:
if (prev === '<' && c === '?') this._onStartInstruction();
if (prev === '<' && c === '/') this._onCloseTagStart();
if (this.buffer[this.pos - 3] === '<' && prev === '!' && c === '[') this._onCDATAStart();
if (this.buffer[this.pos - 3] === '<' && prev === '!' && c === '-') this._onCommentStart();
if (c === '>') this._onTagCompleted();
break;
case STATE.INSTRUCTION:
if (prev === '?' && c === '>') this._onEndInstruction();
break;
case STATE.CDATA:
if (prev === ']' && c === ']') this._onCDATAEnd();
break;
case STATE.IGNORE_COMMENT:
if (this.buffer[this.pos - 3] === '-' && prev === '-' && c === '>') this._onCommentEnd();
break;
}
}
done();
}
}, {
key: '_endRecording',
value: function _endRecording() {
var rec = this.buffer.slice(1, this.pos - 1);
this.buffer = this.buffer.slice(-1); // Keep last item in buffer for prev comparison in main loop.
this.pos = 1; // Reset the position (since the buffer was reset)
return rec;
}
}, {
key: '_onStartNewTag',
value: function _onStartNewTag() {
var text = this._endRecording().trim();
if (text) {
this.emit(EVENTS.TEXT, text);
}
this.state = STATE.TAG_NAME;
}
}, {
key: '_onTagCompleted',
value: function _onTagCompleted() {
var tag = this._endRecording();
var _parseTagString2 = this._parseTagString(tag),
name = _parseTagString2.name,
attributes = _parseTagString2.attributes;
if (!this.isCloseTag) {
this.emit(EVENTS.OPEN_TAG, name, attributes);
} else {
this.emit(EVENTS.CLOSE_TAG, name, attributes);
}
this.isCloseTag = false;
this.state = STATE.TEXT;
}
}, {
key: '_onCloseTagStart',
value: function _onCloseTagStart() {
this._endRecording();
this.isCloseTag = true;
}
}, {
key: '_onStartInstruction',
value: function _onStartInstruction() {
this._endRecording();
this.state = STATE.INSTRUCTION;
}
}, {
key: '_onEndInstruction',
value: function _onEndInstruction() {
this.pos -= 2; // Move position back 2 steps since instruction ends with '?>'
var inst = this._endRecording();
var _parseTagString3 = this._parseTagString(inst),
name = _parseTagString3.name,
attributes = _parseTagString3.attributes;
this.emit(EVENTS.INSTRUCTION, name, attributes);
this.state = STATE.TEXT;
}
}, {
key: '_onCDATAStart',
value: function _onCDATAStart() {
this._endRecording();
this.state = STATE.CDATA;
}
}, {
key: '_onCDATAEnd',
value: function _onCDATAEnd() {
var text = this._endRecording(); // Will return CDATA[XXX] we regexp out the actual text in the CDATA.
text = text.slice(text.indexOf('[') + 1, text.lastIndexOf(']'));
this.state = STATE.TEXT;
this.emit(EVENTS.CDATA, text);
}
}, {
key: '_onCommentStart',
value: function _onCommentStart() {
this.state = STATE.IGNORE_COMMENT;
}
}, {
key: '_onCommentEnd',
value: function _onCommentEnd() {
this._endRecording();
this.state = STATE.TEXT;
}
/**
* Helper to parse a tag string 'xml version="2.0" encoding="utf-8"' with regexp.
* @param {string} str the tag string.
* @return {object} {name, attributes}
*/
}, {
key: '_parseTagString',
value: function _parseTagString(str) {
var _str$split = str.split(/\s+(?=[\w:]+=)/g),
_str$split2 = _toArray(_str$split),
name = _str$split2[0],
attrs = _str$split2.slice(1);
var attributes = {};
attrs.forEach(function (attribute) {
var _attribute$split = attribute.split('='),
_attribute$split2 = _slicedToArray(_attribute$split, 2),
name = _attribute$split2[0],
value = _attribute$split2[1];
attributes[name] = value.trim().replace(/"|'/g, '');
});
return { name: name, attributes: attributes };
}
}]);
return Parser;
}(_stream.Writable);
exports.default = Parser;
var STATE = {
TEXT: 0,
TAG_NAME: 1,
INSTRUCTION: 2,
IGNORE_COMMENT: 4,
CDATA: 8
};
var EVENTS = exports.EVENTS = {
TEXT: 'text',
INSTRUCTION: 'instruction',
OPEN_TAG: 'opentag',
CLOSE_TAG: 'closetag',
CDATA: 'cdata'
};