UNPKG

react-saasify-chrisvxd

Version:

React components for Saasify web clients.

1,821 lines (1,674 loc) 56 kB
"use strict"; const { isS, isChar, isNameStartChar, isNameChar, S_LIST, NAME_RE } = require("xmlchars/xml/1.0/ed5"); const { isNCNameStartChar, isNCNameChar, NC_NAME_RE } = require("xmlchars/xmlns/1.0/ed3"); const XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"; const XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/"; const rootNS = { __proto__: null, xml: XML_NAMESPACE, xmlns: XMLNS_NAMESPACE, }; const XML_ENTITIES = { __proto__: null, amp: "&", gt: ">", lt: "<", quot: "\"", apos: "'", }; const S_INITIAL = "sInitial"; // initial state const S_BEGIN_WHITESPACE = "sBeginWhitespace"; // leading whitespace const S_DOCTYPE = "sDoctype"; // <!DOCTYPE const S_DOCTYPE_QUOTE = "sDoctypeQuote"; // <!DOCTYPE "//blah const S_DTD = "sDTD"; // <!DOCTYPE "//blah" [ ... const S_DTD_QUOTED = "sDTDQuoted"; // <!DOCTYPE "//blah" [ "foo const S_DTD_OPEN_WAKA = "sDTDOpenWaka"; const S_DTD_OPEN_WAKA_BANG = "sDTDOpenWakaBang"; const S_DTD_COMMENT = "sDTDComment"; // <!-- const S_DTD_COMMENT_ENDING = "sDTDCommentEnding"; // <!-- blah - const S_DTD_COMMENT_ENDED = "sDTDCommentEnded"; // <!-- blah -- const S_DTD_PI = "sDTDPI"; // <? const S_DTD_PI_ENDING = "sDTDPIEnding"; // <?hi "there" ? const S_TEXT = "sText"; // general stuff const S_ENTITY = "sEntity"; // &amp and such const S_OPEN_WAKA = "sOpenWaka"; // < const S_OPEN_WAKA_BANG = "sOpenWakaBang"; // <!... const S_COMMENT = "sComment"; // <!-- const S_COMMENT_ENDING = "sCommentEnding"; // <!-- blah - const S_COMMENT_ENDED = "sCommentEnded"; // <!-- blah -- const S_CDATA = "sCData"; // <![CDATA[ something const S_CDATA_ENDING = "sCDataEnding"; // ] const S_CDATA_ENDING_2 = "sCDataEnding2"; // ]] const S_PI_FIRST_CHAR = "sPIFirstChar"; // <?hi, first char const S_PI_REST = "sPIRest"; // <?hi, rest of the name const S_PI_BODY = "sPIBody"; // <?hi there const S_PI_ENDING = "sPIEnding"; // <?hi "there" ? const S_OPEN_TAG = "sOpenTag"; // <strong const S_OPEN_TAG_SLASH = "sOpenTagSlash"; // <strong / const S_ATTRIB = "sAttrib"; // <a const S_ATTRIB_NAME = "sAttribName"; // <a foo const S_ATTRIB_NAME_SAW_WHITE = "sAttribNameSawWhite"; // <a foo _ const S_ATTRIB_VALUE = "sAttribValue"; // <a foo= const S_ATTRIB_VALUE_QUOTED = "sAttribValueQuoted"; // <a foo="bar const S_ATTRIB_VALUE_CLOSED = "sAttribValueClosed"; // <a foo="bar" const S_ATTRIB_VALUE_UNQUOTED = "sAttribValueUnquoted"; // <a foo=bar const S_CLOSE_TAG = "sCloseTag"; // </a const S_CLOSE_TAG_SAW_WHITE = "sCloseTagSawWhite"; // </a > // These states are internal to sPIBody const S_XML_DECL_NAME_START = 1; // <?xml const S_XML_DECL_NAME = 2; // <?xml foo const S_XML_DECL_EQ = 3; // <?xml foo= const S_XML_DECL_VALUE_START = 4; // <?xml foo= const S_XML_DECL_VALUE = 5; // <?xml foo="bar" /** * The list of supported events. */ exports.EVENTS = [ "text", "processinginstruction", "doctype", "comment", "opentagstart", "opentag", "closetag", "cdata", "error", "end", "ready", ]; const NL = 0xA; const CR = 0xD; const SPACE = 0x20; const BANG = 0x21; const DQUOTE = 0x22; const AMP = 0x26; const SQUOTE = 0x27; const MINUS = 0x2D; const FORWARD_SLASH = 0x2F; const SEMICOLON = 0x3B; const LESS = 0x3C; const EQUAL = 0x3D; const GREATER = 0x3E; const QUESTION = 0x3F; const OPEN_BRACKET = 0x5B; const CLOSE_BRACKET = 0x5D; function isQuote(c) { return c === DQUOTE || c === SQUOTE; } const QUOTES = [DQUOTE, SQUOTE]; const DOCTYPE_TERMINATOR = [...QUOTES, OPEN_BRACKET, GREATER]; const DTD_TERMINATOR = [...QUOTES, LESS, CLOSE_BRACKET]; const XML_DECL_NAME_TERMINATOR = [EQUAL, QUESTION, ...S_LIST]; const ATTRIB_VALUE_UNQUOTED_TERMINATOR = [...S_LIST, GREATER, AMP, LESS]; function nsPairCheck(parser, prefix, uri) { switch (prefix) { case "xml": if (uri !== XML_NAMESPACE) { parser.fail(`xml prefix must be bound to ${XML_NAMESPACE}.`); } break; case "xmlns": if (uri !== XMLNS_NAMESPACE) { parser.fail(`xmlns prefix must be bound to ${XMLNS_NAMESPACE}.`); } break; default: } switch (uri) { case XMLNS_NAMESPACE: parser.fail(prefix === "" ? `the default namespace may not be set to ${uri}.` : `may not assign a prefix (even "xmlns") to the URI \ ${XMLNS_NAMESPACE}.`); break; case XML_NAMESPACE: switch (prefix) { case "xml": // Assinging the XML namespace to "xml" is fine. break; case "": parser.fail(`the default namespace may not be set to ${uri}.`); break; default: parser.fail("may not assign the xml namespace to another prefix."); } break; default: } } function nsMappingCheck(parser, mapping) { for (const local of Object.keys(mapping)) { nsPairCheck(parser, local, mapping[local]); } } function isNCName(name) { return NC_NAME_RE.test(name); } function isName(name) { return NAME_RE.test(name); } const FORBIDDEN_START = 0; const FORBIDDEN_BRACKET = 1; const FORBIDDEN_BRACKET_BRACKET = 2; /** * Data structure for an XML tag. * * @typedef {object} SaxesTag * * @property {string} name The tag's name. This is the combination of prefix and * global name. For instance ``<a:b>`` would have ``"a:b"`` for ``name``. * * @property {string} prefix The tag's prefix. For instance ``<a:b>`` would have * ``"a"`` for ``prefix``. Undefined if we do not track namespaces. * * @property {string} local The tag's local name. For instance ``<a:b>`` would * have ``"b"`` for ``local``. Undefined if we do not track namespaces. * * @property {string} uri The namespace URI of this tag. Undefined if we do not * track namespaces. * * @property {Object.<string, SaxesAttribute> | Object.<string, string>} * attributes A map of attribute name to attributes. If namespaces are tracked, * the values in the map are {@link SaxesAttribute SaxesAttribute} * objects. Otherwise, they are strings. * * @property {Object.<string, string>} ns The namespace bindings in effect. * * @property {boolean} isSelfClosing Whether the tag is * self-closing (e.g. ``<foo/>``). * */ /** * Data structure for an XML attribute * * @typedef {object} SaxesAttribute * * @property {string} name The attribute's name. This is the combination of * prefix and local name. For instance ``a:b="c"`` would have ``a:b`` for name. * * @property {string} prefix The attribute's prefix. For instance ``a:b="c"`` * would have ``"a"`` for ``prefix``. * * @property {string} local The attribute's local name. For instance ``a:b="c"`` * would have ``"b"`` for ``local``. * * @property {string} uri The namespace URI of this attribute. * * @property {string} value The attribute's value. */ /** * @typedef XMLDecl * * @property {string} [version] The version specified by the XML declaration. * * @property {string} [encoding] The encoding specified by the XML declaration. * * @property {string} [standalone] The value of the standalone parameter * specified by the XML declaration. */ /** * @callback ResolvePrefix * * @param {string} prefix The prefix to check. * * @returns {string|undefined} The URI corresponding to the prefix, if any. */ /** * @typedef SaxesOptions * * @property {boolean} [xmlns] Whether to track namespaces. Unset means * ``false``. * * @property {boolean} [fragment] Whether to accept XML fragments. Unset means * ``false``. * * @property {boolean} [additionalNamespaces] A plain object whose key, value * pairs define namespaces known before parsing the XML file. It is not legal * to pass bindings for the namespaces ``"xml"`` or ``"xmlns"``. * * @property {ResolvePrefix} [resolvePrefix] A function that will be used if the * parser cannot resolve a namespace prefix on its own. * * @property {boolean} [position] Whether to track positions. Unset means * ``true``. * * @property {string} [fileName] A file name to use for error reporting. Leaving * this unset will report a file name of "undefined". "File name" is a loose * concept. You could use a URL to some resource, or any descriptive name you * like. */ class SaxesParser { /** * @param {SaxesOptions} opt The parser options. */ constructor(opt) { this._init(opt); } /** * Reset the parser state. * * @private */ _init(opt) { this.comment = ""; this.openWakaBang = ""; this.text = ""; this.name = ""; this.doctype = ""; this.piTarget = ""; this.piBody = ""; this.entity = ""; this.cdata = ""; this.xmlDeclName = ""; this.xmlDeclValue = ""; /** * The options passed to the constructor of this parser. * * @type {SaxesOptions} */ this.opt = opt || {}; /** * Indicates whether or not the parser is closed. If ``true``, wait for * the ``ready`` event to write again. * * @type {boolean} */ this.closed = false; /** * The XML declaration for this document. * * @type {XMLDecl} */ this.xmlDecl = { version: undefined, encoding: undefined, standalone: undefined, }; this.q = null; this.tags = []; this.tag = null; this.chunk = ""; this.chunkPosition = 0; this.i = 0; this.trailingCR = false; this.forbiddenState = FORBIDDEN_START; /** * A map of entity name to expansion. * * @type {Object.<string, string>} */ this.ENTITIES = Object.create(XML_ENTITIES); this.attribList = []; // The logic is organized so as to minimize the need to check // this.opt.fragment while parsing. const fragmentOpt = this.fragmentOpt = !!this.opt.fragment; this.state = fragmentOpt ? S_TEXT : S_INITIAL; // We want these to be all true if we are dealing with a fragment. this.reportedTextBeforeRoot = this.reportedTextAfterRoot = this.closedRoot = this.sawRoot = fragmentOpt; // An XML declaration is intially possible only when parsing whole // documents. this.xmlDeclPossible = !fragmentOpt; this.piIsXMLDecl = false; this.xmlDeclState = S_XML_DECL_NAME_START; this.xmlDeclExpects = ["version"]; this.requiredSeparator = false; this.entityReturnState = undefined; const xmlnsOpt = this.xmlnsOpt = !!this.opt.xmlns; if (xmlnsOpt) { // This is the function we use to perform name checks on PIs and entities. // When namespaces are used, colons are not allowed in PI target names or // entity names. So the check depends on whether namespaces are used. See: // // https://www.w3.org/XML/xml-names-19990114-errata.html // NE08 // this.nameStartCheck = isNCNameStartChar; this.nameCheck = isNCNameChar; this.isName = isNCName; this.processAttribs = this.processAttribsNS; this.pushAttrib = this.pushAttribNS; this.ns = Object.assign({ __proto__: null }, rootNS); const additional = this.opt.additionalNamespaces; if (additional) { nsMappingCheck(this, additional); Object.assign(this.ns, additional); } } else { this.nameStartCheck = isNameStartChar; this.nameCheck = isNameChar; this.isName = isName; this.processAttribs = this.processAttribsPlain; this.pushAttrib = this.pushAttribPlain; } this.trackPosition = this.opt.position !== false; /** The line number the parser is currently looking at. */ this.line = 1; /** The column the parser is currently looking at. */ this.column = 0; this.fileName = this.opt.fileName; this.onready(); } /** The stream position the parser is currently looking at. */ get position() { return this.chunkPosition + this.i; } /* eslint-disable class-methods-use-this */ /** * Event handler for text data. The default implementation is a no-op. * * @param {string} text The text data encountered by the parser. * */ ontext() {} /** * Event handler for processing instructions. The default implementation is a * no-op. * * @param {{target: string, body: string}} data The target and body of * the processing instruction. */ onprocessinginstruction() {} /** * Event handler for doctype. The default implementation is a no-op. * * @param {string} doctype The doctype contents. */ ondoctype() {} /** * Event handler for comments. The default implementation is a no-op. * * @param {string} comment The comment contents. */ oncomment() {} /** * Event handler for the start of an open tag. This is called as soon as we * have a tag name. The default implementation is a no-op. * * @param {SaxesTag} tag The tag. */ onopentagstart() {} /** * Event handler for an open tag. This is called when the open tag is * complete. (We've encountered the ">" that ends the open tag.) The default * implementation is a no-op. * * @param {SaxesTag} tag The tag. */ onopentag() {} /** * Event handler for a close tag. Note that for self-closing tags, this is * called right after ``onopentag``. The default implementation is a no-op. * * @param {SaxesTag} tag The tag. */ onclosetag() {} /** * Event handler for a CDATA section. This is called when ending the * CDATA section. The default implementation is a no-op. * * @param {string} cdata The contents of the CDATA section. */ oncdata() {} /** * Event handler for the stream end. This is called when the stream has been * closed with ``close`` or by passing ``null`` to ``write``. The default * implementation is a no-op. */ onend() {} /** * Event handler indicating parser readiness . This is called when the parser * is ready to parse a new document. The default implementation is a no-op. */ onready() {} /** * Event handler indicating an error. The default implementation throws the * error. Override with a no-op handler if you don't want this. * * @param {Error} err The error that occurred. */ onerror(err) { throw new Error(err); } /* eslint-enable class-methods-use-this */ /** * Report a parsing error. This method is made public so that client code may * check for issues that are outside the scope of this project and can report * errors. * * @param {Error} er The error to report. * * @returns this */ fail(er) { const message = (this.trackPosition) ? `${this.fileName}:${this.line}:${this.column}: ${er}` : er; this.onerror(new Error(message)); return this; } /** * Write a XML data to the parser. * * @param {string} chunk The XML data to write. * * @returns this */ write(chunk) { if (this.closed) { return this.fail("cannot write after close; assign an onready handler."); } let end = false; if (chunk === null) { end = true; chunk = ""; } if (typeof chunk === "object") { chunk = chunk.toString(); } // We checked if performing a pre-decomposition of the string into an array // of single complete characters (``Array.from(chunk)``) would be faster // than the current repeated calls to ``codePointAt``. As of August 2018, it // isn't. (There may be Node-specific code that would perform faster than // ``Array.from`` but don't want to be dependent on Node.) let limit = chunk.length; if (this.trailingCR) { // The previous chunk had a trailing cr. We need to handle it now. chunk = `\r${chunk}`; } if (!end && chunk[limit - 1] === CR) { // The chunk ends with a trailing CR. We cannot know how to handle it // until we get the next chunk or the end of the stream. So save it for // later. limit--; this.trailingCR = true; } this.limit = limit; this.chunk = chunk; this.i = 0; while (this.i < limit) { this[this.state](); } this.chunkPosition += limit; return end ? this.end() : this; } /** * Close the current stream. Perform final well-formedness checks and reset * the parser tstate. * * @returns this */ close() { return this.write(null); } /** * Get a single code point out of the current chunk. This updates the current * position if we do position tracking. * * @private * * @returns {number} The character read. */ getCode() { const { chunk, i } = this; // Using charCodeAt and handling the surrogates ourselves is faster // than using codePointAt. let code = chunk.charCodeAt(i); let skip = 1; switch (code) { case CR: // We may get NaN if we read past the end of the chunk, which is // fine. if (chunk.charCodeAt(i + 1) === NL) { // A \r\n sequence is converted to \n so we have to skip over the next // character. We already know it has a size of 1 so ++ is fine here. skip++; } // Otherwise, a \r is just converted to \n, so we don't have to skip // ahead. // In either case, \r becomes \n. code = NL; /* yes, fall through */ case NL: this.line++; this.column = 0; break; default: this.column++; if (code >= 0xD800 && code <= 0xDBFF) { code = 0x10000 + ((code - 0xD800) * 0x400) + (chunk.charCodeAt(i + 1) - 0xDC00); this.column++; skip++; } if (!isChar(code)) { this.fail("disallowed character."); } } this.i += skip; return code; } /** * @callback CharacterTest * * @private * * @param {string} c The character to test. * * @returns {boolean} ``true`` if the method should continue capturing text, * ``false`` otherwise. */ /** * Capture characters into a buffer until encountering one of a set of * characters. * * @private * * @param {number[]} chars An array of codepoints. Encountering a character in * the array ends the capture. * * @param {string} buffer The name of the buffer to save into. * * @return {number|undefined} The character code that made the capture end, or * ``undefined`` if we hit the end of the chunk. */ captureTo(chars, buffer) { const { chunk, limit, i: start } = this; while (this.i < limit) { const c = this.getCode(); if (chars.includes(c)) { // This is faster than adding codepoints one by one. this[buffer] += chunk.substring(start, this.i - (c <= 0xFFFF ? 1 : 2)); return c; } } // This is faster than adding codepoints one by one. this[buffer] += chunk.substring(start); return undefined; } /** * Capture characters into a buffer until encountering a character. * * @private * * @param {number} char The codepoint that ends the capture. * * @param {string} buffer The name of the buffer to save into. * * @return {boolean} ``true`` if we ran into the character. Otherwise, we ran * into the end of the current chunk. */ captureToChar(char, buffer) { const { chunk, limit, i: start } = this; while (this.i < limit) { const c = this.getCode(); if (c === char) { // This is faster than adding codepoints one by one. this[buffer] += chunk.substring(start, this.i - (c <= 0xFFFF ? 1 : 2)); return true; } } // This is faster than adding codepoints one by one. this[buffer] += chunk.substring(start); return false; } /** * Capture characters that satisfy ``isNameChar`` into the ``name`` field of * this parser. * * @private * * @return {number|undefined} The character code that made the test fail, or * ``undefined`` if we hit the end of the chunk. */ captureNameChars() { const { chunk, limit, i: start } = this; while (this.i < limit) { const c = this.getCode(); if (!isNameChar(c)) { // This is faster than adding codepoints one by one. this.name += chunk.substring(start, this.i - (c <= 0xFFFF ? 1 : 2)); return c; } } // This is faster than adding codepoints one by one. this.name += chunk.substring(start); return undefined; } /** * Capture characters into a buffer while ``this.nameCheck`` run on the * character read returns true. * * @private * * @param {string} buffer The name of the buffer to save into. * * @return {number|undefined} The character code that made the test fail, or * ``undefined`` if we hit the end of the chunk. */ captureWhileNameCheck(buffer) { const { chunk, limit, i: start } = this; while (this.i < limit) { const c = this.getCode(); if (!this.nameCheck(c)) { // This is faster than adding codepoints one by one. this[buffer] += chunk.substring(start, this.i - (c <= 0xFFFF ? 1 : 2)); return c; } } // This is faster than adding codepoints one by one. this[buffer] += chunk.substring(start); return undefined; } /** * Skip white spaces. * * @private * * @return {string|undefined} The character that ended the skip, or * ``undefined`` if we hit the end of the chunk. */ skipSpaces() { const { limit } = this; while (this.i < limit) { const c = this.getCode(); if (!isS(c)) { return c; } } return undefined; } // STATE HANDLERS /** @private */ sInitial() { // We are essentially peeking at the first character of the chunk. Since // S_INITIAL can be in effect only when we start working on the first chunk, // the index at which we must look is necessarily 0. Note also that the // following tests do not depend on decoding surrogates. const c = this.chunk.charCodeAt(0); // If the initial character is 0xFEFF, ignore it. if (c === 0xFEFF) { this.i++; this.column++; } else if (isS(c)) { this.i++; this.column++; // An XML declaration cannot appear after initial spaces. this.xmlDeclPossible = false; } this.state = S_BEGIN_WHITESPACE; } /** @private */ sBeginWhitespace() { const c = this.skipSpaces(); if (c === LESS) { this.state = S_OPEN_WAKA; } else if (c) { // have to process this as a text node. // weird, but happens. if (!this.reportedTextBeforeRoot) { this.fail("text data outside of root node."); this.reportedTextBeforeRoot = true; } this.text = String.fromCodePoint(c); this.state = S_TEXT; this.xmlDeclPossible = false; } } /** @private */ sText() { // // We did try a version of saxes where the S_TEXT state was split in two // states: one for text inside the root element, and one for text // outside. This was avoiding having to test this.tags.length to decide what // implementation to actually use. // // Peformance testing on gigabyte-size files did not show any advantage to // using the two states solution instead of the current one. Conversely, it // made the code a bit more complicated elsewhere. For instance, a comment // can appear before the root element so when a comment ended it was // necessary to determine whether to return to the S_TEXT state or to the // new text-outside-root state. // if (this.tags.length !== 0) { this.handleTextInRoot(); } else { this.handleTextOutsideRoot(); } } /** @private */ handleTextInRoot() { // This is essentially a specialized version of captureTo which is optimized // for performing the ]]> check. A previous version of this code, checked // ``this.text`` for the presence of ]]>. It simplified the code but was // very costly when character data contained a lot of entities to be parsed. // // Since we are using a specialized loop, we also keep track of the presence // of ]]> in text data. The sequence ]]> is forbidden to appear as-is. // const { chunk, limit, i: start } = this; let { forbiddenState } = this; let c; // eslint-disable-next-line no-labels, no-restricted-syntax scanLoop: while (this.i < limit) { const code = this.getCode(); switch (code) { case LESS: this.state = S_OPEN_WAKA; c = code; forbiddenState = FORBIDDEN_START; // eslint-disable-next-line no-labels break scanLoop; case AMP: this.state = S_ENTITY; this.entityReturnState = S_TEXT; c = code; forbiddenState = FORBIDDEN_START; // eslint-disable-next-line no-labels break scanLoop; case CLOSE_BRACKET: switch (forbiddenState) { case FORBIDDEN_START: forbiddenState = FORBIDDEN_BRACKET; break; case FORBIDDEN_BRACKET: forbiddenState = FORBIDDEN_BRACKET_BRACKET; break; case FORBIDDEN_BRACKET_BRACKET: break; default: throw new Error("impossible state"); } break; case GREATER: if (forbiddenState === FORBIDDEN_BRACKET_BRACKET) { this.fail("the string \"]]>\" is disallowed in char data."); } forbiddenState = FORBIDDEN_START; break; default: forbiddenState = FORBIDDEN_START; } } this.forbiddenState = forbiddenState; // This is faster than adding codepoints one by one. this.text += chunk.substring(start, c === undefined ? undefined : (this.i - (c <= 0xFFFF ? 1 : 2))); } /** @private */ handleTextOutsideRoot() { // This is essentially a specialized version of captureTo which is optimized // for performing the ]]> check. A previous version of this code, checked // ``this.text`` for the presence of ]]>. It simplified the code but was // very costly when character data contained a lot of entities to be parsed. // // Since we are using a specialized loop, we also keep track of the presence // of non-space characters in the text since these are errors when appearing // outside the document root element. // const { chunk, limit, i: start } = this; let nonSpace = false; let c; // eslint-disable-next-line no-labels, no-restricted-syntax outRootLoop: while (this.i < limit) { const code = this.getCode(); switch (code) { case LESS: this.state = S_OPEN_WAKA; c = code; // eslint-disable-next-line no-labels break outRootLoop; case AMP: this.state = S_ENTITY; this.entityReturnState = S_TEXT; c = code; nonSpace = true; // eslint-disable-next-line no-labels break outRootLoop; default: if (!isS(code)) { nonSpace = true; } } } // This is faster than adding codepoints one by one. this.text += chunk.substring(start, c === undefined ? undefined : (this.i - (c <= 0xFFFF ? 1 : 2))); if (!nonSpace) { return; } // We use the reportedTextBeforeRoot and reportedTextAfterRoot flags // to avoid reporting errors for every single character that is out of // place. if (!this.sawRoot && !this.reportedTextBeforeRoot) { this.fail("text data outside of root node."); this.reportedTextBeforeRoot = true; } if (this.closedRoot && !this.reportedTextAfterRoot) { this.fail("text data outside of root node."); this.reportedTextAfterRoot = true; } } /** @private */ sOpenWaka() { const c = this.getCode(); // either a /, ?, !, or text is coming next. if (isNameStartChar(c)) { this.state = S_OPEN_TAG; this.name = String.fromCodePoint(c); this.xmlDeclPossible = false; } else { switch (c) { case FORWARD_SLASH: this.state = S_CLOSE_TAG; this.xmlDeclPossible = false; break; case BANG: this.state = S_OPEN_WAKA_BANG; this.openWakaBang = ""; this.xmlDeclPossible = false; break; case QUESTION: this.state = S_PI_FIRST_CHAR; break; default: this.fail("disallowed character in tag name."); this.state = S_TEXT; this.xmlDeclPossible = false; } } } /** @private */ sOpenWakaBang() { this.openWakaBang += String.fromCodePoint(this.getCode()); switch (this.openWakaBang) { case "[CDATA[": if (!this.sawRoot && !this.reportedTextBeforeRoot) { this.fail("text data outside of root node."); this.reportedTextBeforeRoot = true; } if (this.closedRoot && !this.reportedTextAfterRoot) { this.fail("text data outside of root node."); this.reportedTextAfterRoot = true; } this.state = S_CDATA; this.openWakaBang = ""; break; case "--": this.state = S_COMMENT; this.openWakaBang = ""; break; case "DOCTYPE": this.state = S_DOCTYPE; if (this.doctype || this.sawRoot) { this.fail("inappropriately located doctype declaration."); } this.openWakaBang = ""; break; default: // 7 happens to be the maximum length of the string that can possibly // match one of the cases above. if (this.openWakaBang.length >= 7) { this.fail("incorrect syntax."); } } } /** @private */ sDoctype() { const c = this.captureTo(DOCTYPE_TERMINATOR, "doctype"); if (c === GREATER) { this.state = S_TEXT; if (this.text.length !== 0) { this.closeText(); } this.ondoctype(this.doctype); this.doctype = true; // just remember that we saw it. } else if (c) { this.doctype += String.fromCodePoint(c); if (c === OPEN_BRACKET) { this.state = S_DTD; } else if (isQuote(c)) { this.state = S_DOCTYPE_QUOTE; this.q = c; } } } /** @private */ sDoctypeQuote() { const { q } = this; if (this.captureToChar(q, "doctype")) { this.doctype += String.fromCodePoint(q); this.q = null; this.state = S_DOCTYPE; } } /** @private */ sDTD() { const c = this.captureTo(DTD_TERMINATOR, "doctype"); if (!c) { return; } this.doctype += String.fromCodePoint(c); if (c === CLOSE_BRACKET) { this.state = S_DOCTYPE; } else if (c === LESS) { this.state = S_DTD_OPEN_WAKA; } else if (isQuote(c)) { this.state = S_DTD_QUOTED; this.q = c; } } /** @private */ sDTDQuoted() { const { q } = this; if (this.captureToChar(q, "doctype")) { this.doctype += String.fromCodePoint(q); this.state = S_DTD; this.q = null; } } /** @private */ sDTDOpenWaka() { const c = this.getCode(); this.doctype += String.fromCodePoint(c); switch (c) { case BANG: this.state = S_DTD_OPEN_WAKA_BANG; this.openWakaBang = ""; break; case QUESTION: this.state = S_DTD_PI; break; default: this.state = S_DTD; } } /** @private */ sDTDOpenWakaBang() { const char = String.fromCodePoint(this.getCode()); const owb = this.openWakaBang += char; this.doctype += char; if (owb !== "-") { this.state = owb === "--" ? S_DTD_COMMENT : S_DTD; this.openWakaBang = ""; } } /** @private */ sDTDComment() { if (this.captureToChar(MINUS, "doctype")) { this.doctype += "-"; this.state = S_DTD_COMMENT_ENDING; } } /** @private */ sDTDCommentEnding() { const c = this.getCode(); this.doctype += String.fromCodePoint(c); this.state = c === MINUS ? S_DTD_COMMENT_ENDED : S_DTD_COMMENT; } /** @private */ sDTDCommentEnded() { const c = this.getCode(); this.doctype += String.fromCodePoint(c); if (c === GREATER) { this.state = S_DTD; } else { this.fail("malformed comment."); // <!-- blah -- bloo --> will be recorded as // a comment of " blah -- bloo " this.state = S_DTD_COMMENT; } } /** @private */ sDTDPI() { if (this.captureToChar(QUESTION, "doctype")) { this.doctype += "?"; this.state = S_DTD_PI_ENDING; } } /** @private */ sDTDPIEnding() { const c = this.getCode(); this.doctype += String.fromCodePoint(c); if (c === GREATER) { this.state = S_DTD; } } /** @private */ sComment() { if (this.captureToChar(MINUS, "comment")) { this.state = S_COMMENT_ENDING; } } /** @private */ sCommentEnding() { const c = this.getCode(); if (c === MINUS) { this.state = S_COMMENT_ENDED; if (this.text.length !== 0) { this.closeText(); } this.oncomment(this.comment); this.comment = ""; } else { this.comment += `-${String.fromCodePoint(c)}`; this.state = S_COMMENT; } } /** @private */ sCommentEnded() { const c = this.getCode(); if (c !== GREATER) { this.fail("malformed comment."); // <!-- blah -- bloo --> will be recorded as // a comment of " blah -- bloo " this.comment += `--${String.fromCodePoint(c)}`; this.state = S_COMMENT; } else { this.state = S_TEXT; } } /** @private */ sCData() { if (this.captureToChar(CLOSE_BRACKET, "cdata")) { this.state = S_CDATA_ENDING; } } /** @private */ sCDataEnding() { const c = this.getCode(); if (c === CLOSE_BRACKET) { this.state = S_CDATA_ENDING_2; } else { this.cdata += `]${String.fromCodePoint(c)}`; this.state = S_CDATA; } } /** @private */ sCDataEnding2() { const c = this.getCode(); switch (c) { case GREATER: if (this.text.length !== 0) { this.closeText(); } this.oncdata(this.cdata); this.cdata = ""; this.state = S_TEXT; break; case CLOSE_BRACKET: this.cdata += "]"; break; default: this.cdata += `]]${String.fromCodePoint(c)}`; this.state = S_CDATA; } } /** @private */ sPIFirstChar() { const c = this.getCode(); if (this.nameStartCheck(c)) { this.piTarget += String.fromCodePoint(c); this.state = S_PI_REST; } else if (c === QUESTION || isS(c)) { this.fail("processing instruction without a target."); this.state = c === QUESTION ? S_PI_ENDING : S_PI_BODY; } else { this.fail("disallowed character in processing instruction name."); this.piTarget += String.fromCodePoint(c); this.state = S_PI_REST; } } /** @private */ sPIRest() { const c = this.captureWhileNameCheck("piTarget"); if ((c === QUESTION || isS(c))) { this.piIsXMLDecl = this.piTarget === "xml"; if (this.piIsXMLDecl && !this.xmlDeclPossible) { this.fail("an XML declaration must be at the start of the document."); } this.state = c === QUESTION ? S_PI_ENDING : S_PI_BODY; } else if (c) { this.fail("disallowed character in processing instruction name."); this.piTarget += String.fromCodePoint(c); } } /** @private */ sPIBody() { let c; if (this.piIsXMLDecl) { switch (this.xmlDeclState) { case S_XML_DECL_NAME_START: { c = this.getCode(); if (isS(c)) { c = this.skipSpaces(); } else if (this.requiredSeparator && c !== QUESTION) { this.fail("whitespace required."); } this.requiredSeparator = false; // The question mark character is not valid inside any of the XML // declaration name/value pairs. if (c === QUESTION) { this.state = S_PI_ENDING; return; } if (c) { this.xmlDeclState = S_XML_DECL_NAME; this.xmlDeclName = String.fromCodePoint(c); } break; } case S_XML_DECL_NAME: c = this.captureTo(XML_DECL_NAME_TERMINATOR, "xmlDeclName"); // The question mark character is not valid inside any of the XML // declaration name/value pairs. if (c === QUESTION) { this.state = S_PI_ENDING; return; } if (isS(c) || c === EQUAL) { if (!this.xmlDeclExpects.includes(this.xmlDeclName)) { switch (this.xmlDeclName.length) { case 0: this.fail("did not expect any more name/value pairs."); break; case 1: this.fail(`expected the name ${this.xmlDeclExpects[0]}.`); break; default: this.fail(`expected one of ${this.xmlDeclExpects.join(", ")}`); } } this.xmlDeclState = (c === EQUAL) ? S_XML_DECL_VALUE_START : S_XML_DECL_EQ; } break; case S_XML_DECL_EQ: c = this.getCode(); // The question mark character is not valid inside any of the XML // declaration name/value pairs. if (c === QUESTION) { this.state = S_PI_ENDING; return; } if (!isS(c)) { if (c !== EQUAL) { this.fail("value required."); } this.xmlDeclState = S_XML_DECL_VALUE_START; } break; case S_XML_DECL_VALUE_START: c = this.getCode(); // The question mark character is not valid inside any of the XML // declaration name/value pairs. if (c === QUESTION) { this.state = S_PI_ENDING; return; } if (!isS(c)) { if (!isQuote(c)) { this.fail("value must be quoted."); this.q = SPACE; } else { this.q = c; } this.xmlDeclState = S_XML_DECL_VALUE; } break; case S_XML_DECL_VALUE: c = this.captureTo([this.q, QUESTION], "xmlDeclValue"); // The question mark character is not valid inside any of the XML // declaration name/value pairs. if (c === QUESTION) { this.state = S_PI_ENDING; return; } if (c) { switch (this.xmlDeclName) { case "version": if (!/^1\.[0-9]+$/.test(this.xmlDeclValue)) { this.fail("version number must match /^1\\.[0-9]+$/."); } this.xmlDeclExpects = ["encoding", "standalone"]; this.xmlDecl.version = this.xmlDeclValue; break; case "encoding": if (!/^[A-Za-z][A-Za-z0-9._-]*$/.test(this.xmlDeclValue)) { this.fail("encoding value must match \ /^[A-Za-z0-9][A-Za-z0-9._-]*$/."); } this.xmlDeclExpects = ["standalone"]; this.xmlDecl.encoding = this.xmlDeclValue; break; case "standalone": if (this.xmlDeclValue !== "yes" && this.xmlDeclValue !== "no") { this.fail("standalone value must match \"yes\" or \"no\"."); } this.xmlDeclExpects = []; this.xmlDecl.standalone = this.xmlDeclValue; break; default: // We don't need to raise an error here since we've already // raised one when checking what name was expected. } this.xmlDeclName = this.xmlDeclValue = ""; this.xmlDeclState = S_XML_DECL_NAME_START; this.requiredSeparator = true; } break; default: throw new Error(this, `Unknown XML declaration state: ${this.xmlDeclState}`); } } else if (this.piBody.length === 0) { c = this.getCode(); if (c === QUESTION) { this.state = S_PI_ENDING; } else if (!isS(c)) { this.piBody = String.fromCodePoint(c); } } // The question mark character is not valid inside any of the XML // declaration name/value pairs. else if (this.captureToChar(QUESTION, "piBody")) { this.state = S_PI_ENDING; } } /** @private */ sPIEnding() { const c = this.getCode(); if (this.piIsXMLDecl) { if (c === GREATER) { if (this.piTarget !== "xml") { this.fail("processing instructions are not allowed before root."); } else if (this.xmlDeclState !== S_XML_DECL_NAME_START) { this.fail("XML declaration is incomplete."); } else if (this.xmlDeclExpects.includes("version")) { this.fail("XML declaration must contain a version."); } this.xmlDeclName = this.xmlDeclValue = ""; this.requiredSeparator = false; this.piTarget = this.piBody = ""; this.state = S_TEXT; } else { // We got here because the previous character was a ?, but the // question mark character is not valid inside any of the XML // declaration name/value pairs. this.fail( "The character ? is disallowed anywhere in XML declarations."); } } else if (c === GREATER) { if (this.piTarget.trim().toLowerCase() === "xml") { this.fail("the XML declaration must appear at the start of the document."); } if (this.text.length !== 0) { this.closeText(); } this.onprocessinginstruction({ target: this.piTarget, body: this.piBody, }); this.piTarget = this.piBody = ""; this.state = S_TEXT; } else if (c === QUESTION) { // We ran into ?? as part of a processing instruction. We initially // took the first ? as a sign that the PI was ending, but it is // not. So we have to add it to the body but we take the new ? as a // sign that the PI is ending. this.piBody += "?"; } else { this.piBody += `?${String.fromCodePoint(c)}`; this.state = S_PI_BODY; } this.xmlDeclPossible = false; } /** @private */ sOpenTag() { const c = this.captureNameChars(); if (!c) { return; } const tag = this.tag = { name: this.name, attributes: Object.create(null), }; if (this.xmlnsOpt) { tag.ns = Object.create(null); } if (this.text.length !== 0) { this.closeText(); } this.onopentagstart(tag); this.sawRoot = true; if (!this.fragmentOpt && this.closedRoot) { this.fail("documents may contain only one root."); } switch (c) { case GREATER: this.openTag(); break; case FORWARD_SLASH: this.state = S_OPEN_TAG_SLASH; break; default: if (!isS(c)) { this.fail("disallowed character in tag name."); } this.state = S_ATTRIB; } } /** @private */ sOpenTagSlash() { const c = this.getCode(); if (c === GREATER) { this.openSelfClosingTag(); } else { this.fail("forward-slash in opening tag not followed by >."); this.state = S_ATTRIB; } } /** @private */ sAttrib() { const c = this.skipSpaces(); if (!c) { return; } if (isNameStartChar(c)) { this.name = String.fromCodePoint(c); this.state = S_ATTRIB_NAME; } else if (c === GREATER) { this.openTag(); } else if (c === FORWARD_SLASH) { this.state = S_OPEN_TAG_SLASH; } else { this.fail("disallowed character in attribute name."); } } /** @private */ pushAttribNS(name, value) { const { prefix, local } = this.qname(name); this.attribList.push({ name, prefix, local, value, uri: undefined }); if (prefix === "xmlns") { const trimmed = value.trim(); this.tag.ns[local] = trimmed; nsPairCheck(this, local, trimmed); } else if (name === "xmlns") { const trimmed = value.trim(); this.tag.ns[""] = trimmed; nsPairCheck(this, "", trimmed); } } /** @private */ pushAttribPlain(name, value) { this.attribList.push({ name, value }); } /** @private */ sAttribName() { const c = this.captureNameChars(); if (c === EQUAL) { this.state = S_ATTRIB_VALUE; } else if (isS(c)) { this.state = S_ATTRIB_NAME_SAW_WHITE; } else if (c === GREATER) { this.fail("attribute without value."); this.pushAttrib(this.name, this.name); this.name = this.text = ""; this.openTag(); } else if (c) { this.fail("disallowed character in attribute name."); } } /** @private */ sAttribNameSawWhite() { const c = this.skipSpaces(); if (!c) { return; } if (c === EQUAL) { this.state = S_ATTRIB_VALUE; } else { this.fail("attribute without value."); this.tag.attributes[this.name] = ""; this.text = ""; this.name = ""; if (c === GREATER) { this.openTag(); } else if (isNameStartChar(c)) { this.name = String.fromCodePoint(c); this.state = S_ATTRIB_NAME; } else { this.fail("disallowed character in attribute name."); this.state = S_ATTRIB; } } } /** @private */ sAttribValue() { const c = this.getCode(); if (isQuote(c)) { this.q = c; this.state = S_ATTRIB_VALUE_QUOTED; } else if (!isS(c)) { this.fail("unquoted attribute value."); this.state = S_ATTRIB_VALUE_UNQUOTED; this.text = String.fromCodePoint(c); } } /** @private */ sAttribValueQuoted() { // We deliberately do not use captureTo here. The specialized code we use // here is faster than using captureTo. const { q } = this; const { chunk, limit, i: start } = this; // eslint-disable-next-line no-constant-condition while (true) { if (this.i >= limit) { // This is faster than adding codepoints one by one. this.text += chunk.substring(start); return; } const code = this.getCode(); if (code === q || code === AMP || code === LESS) { // This is faster than adding codepoints one by one. const slice = chunk.substring(start, this.i - (code <= 0xFFFF ? 1 : 2)); switch (code) { case q: this.pushAttrib(this.name, this.text + slice); this.name = this.text = ""; this.q = null; this.state = S_ATTRIB_VALUE_CLOSED; return; case AMP: this.text += slice; this.state = S_ENTITY; this.entityReturnState = S_ATTRIB_VALUE_QUOTED; return; default: this.text += slice; this.fail("disallowed character."); return; } } } } /** @private */ sAttribValueClosed() { const c = this.getCode(); if (isS(c)) { this.state = S_ATTRIB; } else if (c === GREATER) { this.openTag(); } else if (c === FORWARD_SLASH) { this.state = S_OPEN_TAG_SLASH; } else if (isNameStartChar(c)) { this.fail("no whitespace between attributes."); this.name = String.fromCodePoint(c); this.state = S_ATTRIB_NAME; } else { this.fail("disallowed character in attribute name."); } } /** @private */ sAttribValueUnquoted() { const c = this.captureTo(ATTRIB_VALUE_UNQUOTED_TERMINATOR, "text"); if (c === AMP) { this.state = S_ENTITY; this.entityReturnState = S_ATTRIB_VALUE_UNQUOTED; } else if (c === LESS) { this.fail("disallowed character."); } else if (c) { if (this.text.includes("]]>")) { this.fail("the string \"]]>\" is disallowed in char data."); } this.pushAttrib(this.name, this.text); this.name = this.text = ""; if (c === GREATER) { this.openTag(); } else { this.state = S_ATTRIB; } } } /** @private */ sCloseTag() { const c = this.captureNameChars(); if (c === GREATER) { this.closeTag(); } else if (isS(c)) { this.state = S_CLOSE_TAG_SAW_WHITE; } else if (c) { this.fail("disallowed character in closing tag."); } } /** @private */ sCloseTagSawWhite() { const c = this.skipSpaces(); if (c === GREATER) { this.closeTag(); } else if (c) { this.fail("disallowed character in closing tag."); } } /** @private */ sEntity() { if (this.captureToChar(SEMICOLON, "entity")) { this.state = this.entityReturnState; if (this.entity === "") { this.fail("empty entity name."); this.text += "&;"; return; } this.text += this.parseEntity(this.entity); this.entity = ""; } } // END OF STATE HANDLERS /** * End parsing. This performs final well-formedness checks and resets the * parser to a clean state. * * @private * * @returns this */ end() { if (!this.sawRoot) { this.fail("document must contain a root element."); } const { tags } = this; while (tags.length > 0) { const tag = tags.pop(); this.fail(`unclosed tag: ${tag.name}`); } if ((this.state !== S_INITIAL) && (this.state !== S_TEXT)) { this.fail("unexpected end."); } if (this.text.length !== 0) { this.closeText(); } this.closed = true; this.onend(); this._init(this.opt); return this; } /** * If there's text to emit ``ontext``, emit it. * * @private */ closeText() { this.ontext(this.text); this.text = ""; } /** * Resolve a namespace prefix. * * @param {string} prefix The prefix to resolve. * * @returns {string|undefined} The namespace URI or ``undefined`` if the * prefix is not defined. */ resolve(prefix) { let uri = this.tag.ns[prefix]; if (uri !== undefined) { return uri; }