UNPKG

ltx

Version:

<xml for="JavaScript">

253 lines (236 loc) 7.54 kB
import { EventEmitter } from "events"; import { unescapeXML } from "../escape.js"; const STATE_TEXT = 0; const STATE_IGNORE_COMMENT = 1; const STATE_IGNORE_INSTRUCTION = 2; const STATE_TAG_NAME = 3; const STATE_TAG = 4; const STATE_ATTR_NAME = 5; const STATE_ATTR_EQ = 6; const STATE_ATTR_QUOT = 7; const STATE_ATTR_VALUE = 8; const STATE_CDATA = 9; const STATE_IGNORE_CDATA = 10; class SaxLtx extends EventEmitter { constructor() { super(); let state = STATE_TEXT; let remainder; let parseRemainder; let tagName; let attrs; let endTag; let selfClosing; let attrQuote; let attrQuoteChar; let recordStart = 0; let attrName; this._handleTagOpening = function _handleTagOpening( endTag, tagName, attrs ) { if (!endTag) { this.emit("startElement", tagName, attrs); if (selfClosing) { this.emit("endElement", tagName, true); } } else { this.emit("endElement", tagName, false); } }; this.write = function write(data) { if (typeof data !== "string") { data = data.toString(); } let pos = 0; /* Anything from previous write()? */ if (remainder) { data = remainder + data; pos += !parseRemainder ? remainder.length : 0; parseRemainder = false; remainder = null; } function endRecording() { if (typeof recordStart === "number") { const recorded = data.slice(recordStart, pos); recordStart = undefined; return recorded; } } for (; pos < data.length; pos++) { switch (state) { case STATE_TEXT: { // if we're looping through text, fast-forward using indexOf to // the next '<' character const lt = data.indexOf("<", pos); if (lt !== -1 && pos !== lt) { pos = lt; } break; } case STATE_ATTR_VALUE: { // if we're looping through an attribute, fast-forward using // indexOf to the next end quote character const quot = data.indexOf(attrQuoteChar, pos); if (quot !== -1) { pos = quot; } break; } case STATE_IGNORE_COMMENT: { // if we're looping through a comment, fast-forward using // indexOf to the first end-comment character const endcomment = data.indexOf("-->", pos); if (endcomment !== -1) { pos = endcomment + 2; // target the '>' character } break; } case STATE_IGNORE_CDATA: { // if we're looping through a CDATA, fast-forward using // indexOf to the first end-CDATA character ]]> const endCDATA = data.indexOf("]]>", pos); if (endCDATA !== -1) { pos = endCDATA + 2; // target the '>' character } break; } // No default } const c = data.charCodeAt(pos); switch (state) { case STATE_TEXT: if (c === 60 /* < */) { const text = endRecording(); if (text) { this.emit("text", unescapeXML(text)); } state = STATE_TAG_NAME; recordStart = pos + 1; attrs = {}; } break; case STATE_CDATA: if (c === 93 /* ] */) { if (data.substr(pos + 1, 2) === "]>") { const cData = endRecording(); if (cData) { this.emit("text", cData); } state = STATE_TEXT; } else if (data.length < pos + 2) { parseRemainder = true; pos = data.length; } } break; case STATE_TAG_NAME: if (c === 47 /* / */ && recordStart === pos) { recordStart = pos + 1; endTag = true; } else if (c === 33 /* ! */) { if (data.substr(pos + 1, 7) === "[CDATA[") { recordStart = pos + 8; state = STATE_CDATA; } else if ( data.length < pos + 8 && "[CDATA[".startsWith(data.slice(pos + 1)) ) { // We potentially have CDATA, but the chunk is ending; stop here and let the next write() decide parseRemainder = true; pos = data.length; } else { recordStart = undefined; state = STATE_IGNORE_COMMENT; } } else if (c === 63 /* ? */) { recordStart = undefined; state = STATE_IGNORE_INSTRUCTION; } else if (c <= 32 || c === 47 /* / */ || c === 62 /* > */) { tagName = endRecording(); pos--; state = STATE_TAG; } break; case STATE_IGNORE_COMMENT: if (c === 62 /* > */) { const prevFirst = data.charCodeAt(pos - 1); const prevSecond = data.charCodeAt(pos - 2); if ( (prevFirst === 45 /* - */ && prevSecond === 45) /* - */ || (prevFirst === 93 /* ] */ && prevSecond === 93) /* ] */ ) { state = STATE_TEXT; } } break; case STATE_IGNORE_INSTRUCTION: if (c === 62 /* > */) { const prev = data.charCodeAt(pos - 1); if (prev === 63 /* ? */) { state = STATE_TEXT; } } break; case STATE_TAG: if (c === 62 /* > */) { this._handleTagOpening(endTag, tagName, attrs); tagName = undefined; attrs = undefined; endTag = undefined; selfClosing = undefined; state = STATE_TEXT; recordStart = pos + 1; } else if (c === 47 /* / */) { selfClosing = true; } else if (c > 32) { recordStart = pos; state = STATE_ATTR_NAME; } break; case STATE_ATTR_NAME: if (c <= 32 || c === 61 /* = */) { attrName = endRecording(); pos--; state = STATE_ATTR_EQ; } break; case STATE_ATTR_EQ: if (c === 61 /* = */) { state = STATE_ATTR_QUOT; } break; case STATE_ATTR_QUOT: if (c === 34 /* " */ || c === 39 /* ' */) { attrQuote = c; attrQuoteChar = c === 34 ? '"' : "'"; state = STATE_ATTR_VALUE; recordStart = pos + 1; } break; case STATE_ATTR_VALUE: if (c === attrQuote) { const value = unescapeXML(endRecording()); attrs[attrName] = value; attrName = undefined; state = STATE_TAG; } break; } } if (typeof recordStart === "number" && recordStart <= data.length) { remainder = data.slice(recordStart); recordStart = 0; } }; } end(data) { if (data) { this.write(data); } /* Uh, yeah */ this.write = function write() {}; } } export default SaxLtx;