UNPKG

parse5

Version:
1,259 lines 114 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.Parser = void 0; const index_js_1 = require("../tokenizer/index.js"); const open_element_stack_js_1 = require("./open-element-stack.js"); const formatting_element_list_js_1 = require("./formatting-element-list.js"); const default_js_1 = require("../tree-adapters/default.js"); const doctype = require("../common/doctype.js"); const foreignContent = require("../common/foreign-content.js"); const error_codes_js_1 = require("../common/error-codes.js"); const unicode = require("../common/unicode.js"); const html_js_1 = require("../common/html.js"); const token_js_1 = require("../common/token.js"); //Misc constants const HIDDEN_INPUT_TYPE = 'hidden'; //Adoption agency loops iteration count const AA_OUTER_LOOP_ITER = 8; const AA_INNER_LOOP_ITER = 3; //Insertion modes var InsertionMode; (function (InsertionMode) { InsertionMode[InsertionMode["INITIAL"] = 0] = "INITIAL"; InsertionMode[InsertionMode["BEFORE_HTML"] = 1] = "BEFORE_HTML"; InsertionMode[InsertionMode["BEFORE_HEAD"] = 2] = "BEFORE_HEAD"; InsertionMode[InsertionMode["IN_HEAD"] = 3] = "IN_HEAD"; InsertionMode[InsertionMode["IN_HEAD_NO_SCRIPT"] = 4] = "IN_HEAD_NO_SCRIPT"; InsertionMode[InsertionMode["AFTER_HEAD"] = 5] = "AFTER_HEAD"; InsertionMode[InsertionMode["IN_BODY"] = 6] = "IN_BODY"; InsertionMode[InsertionMode["TEXT"] = 7] = "TEXT"; InsertionMode[InsertionMode["IN_TABLE"] = 8] = "IN_TABLE"; InsertionMode[InsertionMode["IN_TABLE_TEXT"] = 9] = "IN_TABLE_TEXT"; InsertionMode[InsertionMode["IN_CAPTION"] = 10] = "IN_CAPTION"; InsertionMode[InsertionMode["IN_COLUMN_GROUP"] = 11] = "IN_COLUMN_GROUP"; InsertionMode[InsertionMode["IN_TABLE_BODY"] = 12] = "IN_TABLE_BODY"; InsertionMode[InsertionMode["IN_ROW"] = 13] = "IN_ROW"; InsertionMode[InsertionMode["IN_CELL"] = 14] = "IN_CELL"; InsertionMode[InsertionMode["IN_SELECT"] = 15] = "IN_SELECT"; InsertionMode[InsertionMode["IN_SELECT_IN_TABLE"] = 16] = "IN_SELECT_IN_TABLE"; InsertionMode[InsertionMode["IN_TEMPLATE"] = 17] = "IN_TEMPLATE"; InsertionMode[InsertionMode["AFTER_BODY"] = 18] = "AFTER_BODY"; InsertionMode[InsertionMode["IN_FRAMESET"] = 19] = "IN_FRAMESET"; InsertionMode[InsertionMode["AFTER_FRAMESET"] = 20] = "AFTER_FRAMESET"; InsertionMode[InsertionMode["AFTER_AFTER_BODY"] = 21] = "AFTER_AFTER_BODY"; InsertionMode[InsertionMode["AFTER_AFTER_FRAMESET"] = 22] = "AFTER_AFTER_FRAMESET"; })(InsertionMode || (InsertionMode = {})); const BASE_LOC = { startLine: -1, startCol: -1, startOffset: -1, endLine: -1, endCol: -1, endOffset: -1, }; const TABLE_STRUCTURE_TAGS = new Set([html_js_1.TAG_ID.TABLE, html_js_1.TAG_ID.TBODY, html_js_1.TAG_ID.TFOOT, html_js_1.TAG_ID.THEAD, html_js_1.TAG_ID.TR]); const defaultParserOptions = { scriptingEnabled: true, sourceCodeLocationInfo: false, treeAdapter: default_js_1.defaultTreeAdapter, onParseError: null, }; //Parser class Parser { constructor(options, document, fragmentContext = null, scriptHandler = null) { this.fragmentContext = fragmentContext; this.scriptHandler = scriptHandler; this.currentToken = null; this.stopped = false; this.insertionMode = InsertionMode.INITIAL; this.originalInsertionMode = InsertionMode.INITIAL; this.headElement = null; this.formElement = null; /** Indicates that the current node is not an element in the HTML namespace */ this.currentNotInHTML = false; /** * The template insertion mode stack is maintained from the left. * Ie. the topmost element will always have index 0. */ this.tmplInsertionModeStack = []; this.pendingCharacterTokens = []; this.hasNonWhitespacePendingCharacterToken = false; this.framesetOk = true; this.skipNextNewLine = false; this.fosterParentingEnabled = false; this.options = Object.assign(Object.assign({}, defaultParserOptions), options); this.treeAdapter = this.options.treeAdapter; this.onParseError = this.options.onParseError; // Always enable location info if we report parse errors. if (this.onParseError) { this.options.sourceCodeLocationInfo = true; } this.document = document !== null && document !== void 0 ? document : this.treeAdapter.createDocument(); this.tokenizer = new index_js_1.Tokenizer(this.options, this); this.activeFormattingElements = new formatting_element_list_js_1.FormattingElementList(this.treeAdapter); this.fragmentContextID = fragmentContext ? (0, html_js_1.getTagID)(this.treeAdapter.getTagName(fragmentContext)) : html_js_1.TAG_ID.UNKNOWN; this._setContextModes(fragmentContext !== null && fragmentContext !== void 0 ? fragmentContext : this.document, this.fragmentContextID); this.openElements = new open_element_stack_js_1.OpenElementStack(this.document, this.treeAdapter, this); } // API static parse(html, options) { const parser = new this(options); parser.tokenizer.write(html, true); return parser.document; } static getFragmentParser(fragmentContext, options) { const opts = Object.assign(Object.assign({}, defaultParserOptions), options); //NOTE: use a <template> element as the fragment context if no context element was provided, //so we will parse in a "forgiving" manner fragmentContext !== null && fragmentContext !== void 0 ? fragmentContext : (fragmentContext = opts.treeAdapter.createElement(html_js_1.TAG_NAMES.TEMPLATE, html_js_1.NS.HTML, [])); //NOTE: create a fake element which will be used as the `document` for fragment parsing. //This is important for jsdom, where a new `document` cannot be created. This led to //fragment parsing messing with the main `document`. const documentMock = opts.treeAdapter.createElement('documentmock', html_js_1.NS.HTML, []); const parser = new this(opts, documentMock, fragmentContext); if (parser.fragmentContextID === html_js_1.TAG_ID.TEMPLATE) { parser.tmplInsertionModeStack.unshift(InsertionMode.IN_TEMPLATE); } parser._initTokenizerForFragmentParsing(); parser._insertFakeRootElement(); parser._resetInsertionMode(); parser._findFormInFragmentContext(); return parser; } getFragment() { const rootElement = this.treeAdapter.getFirstChild(this.document); const fragment = this.treeAdapter.createDocumentFragment(); this._adoptNodes(rootElement, fragment); return fragment; } //Errors _err(token, code, beforeToken) { var _a; if (!this.onParseError) return; const loc = (_a = token.location) !== null && _a !== void 0 ? _a : BASE_LOC; const err = { code, startLine: loc.startLine, startCol: loc.startCol, startOffset: loc.startOffset, endLine: beforeToken ? loc.startLine : loc.endLine, endCol: beforeToken ? loc.startCol : loc.endCol, endOffset: beforeToken ? loc.startOffset : loc.endOffset, }; this.onParseError(err); } //Stack events onItemPush(node, tid, isTop) { var _a, _b; (_b = (_a = this.treeAdapter).onItemPush) === null || _b === void 0 ? void 0 : _b.call(_a, node); if (isTop && this.openElements.stackTop > 0) this._setContextModes(node, tid); } onItemPop(node, isTop) { var _a, _b; if (this.options.sourceCodeLocationInfo) { this._setEndLocation(node, this.currentToken); } (_b = (_a = this.treeAdapter).onItemPop) === null || _b === void 0 ? void 0 : _b.call(_a, node, this.openElements.current); if (isTop) { let current; let currentTagId; if (this.openElements.stackTop === 0 && this.fragmentContext) { current = this.fragmentContext; currentTagId = this.fragmentContextID; } else { ({ current, currentTagId } = this.openElements); } this._setContextModes(current, currentTagId); } } _setContextModes(current, tid) { const isHTML = current === this.document || this.treeAdapter.getNamespaceURI(current) === html_js_1.NS.HTML; this.currentNotInHTML = !isHTML; this.tokenizer.inForeignNode = !isHTML && !this._isIntegrationPoint(tid, current); } _switchToTextParsing(currentToken, nextTokenizerState) { this._insertElement(currentToken, html_js_1.NS.HTML); this.tokenizer.state = nextTokenizerState; this.originalInsertionMode = this.insertionMode; this.insertionMode = InsertionMode.TEXT; } switchToPlaintextParsing() { this.insertionMode = InsertionMode.TEXT; this.originalInsertionMode = InsertionMode.IN_BODY; this.tokenizer.state = index_js_1.TokenizerMode.PLAINTEXT; } //Fragment parsing _getAdjustedCurrentElement() { return this.openElements.stackTop === 0 && this.fragmentContext ? this.fragmentContext : this.openElements.current; } _findFormInFragmentContext() { let node = this.fragmentContext; while (node) { if (this.treeAdapter.getTagName(node) === html_js_1.TAG_NAMES.FORM) { this.formElement = node; break; } node = this.treeAdapter.getParentNode(node); } } _initTokenizerForFragmentParsing() { if (!this.fragmentContext || this.treeAdapter.getNamespaceURI(this.fragmentContext) !== html_js_1.NS.HTML) { return; } switch (this.fragmentContextID) { case html_js_1.TAG_ID.TITLE: case html_js_1.TAG_ID.TEXTAREA: { this.tokenizer.state = index_js_1.TokenizerMode.RCDATA; break; } case html_js_1.TAG_ID.STYLE: case html_js_1.TAG_ID.XMP: case html_js_1.TAG_ID.IFRAME: case html_js_1.TAG_ID.NOEMBED: case html_js_1.TAG_ID.NOFRAMES: case html_js_1.TAG_ID.NOSCRIPT: { this.tokenizer.state = index_js_1.TokenizerMode.RAWTEXT; break; } case html_js_1.TAG_ID.SCRIPT: { this.tokenizer.state = index_js_1.TokenizerMode.SCRIPT_DATA; break; } case html_js_1.TAG_ID.PLAINTEXT: { this.tokenizer.state = index_js_1.TokenizerMode.PLAINTEXT; break; } default: // Do nothing } } //Tree mutation _setDocumentType(token) { const name = token.name || ''; const publicId = token.publicId || ''; const systemId = token.systemId || ''; this.treeAdapter.setDocumentType(this.document, name, publicId, systemId); if (token.location) { const documentChildren = this.treeAdapter.getChildNodes(this.document); const docTypeNode = documentChildren.find((node) => this.treeAdapter.isDocumentTypeNode(node)); if (docTypeNode) { this.treeAdapter.setNodeSourceCodeLocation(docTypeNode, token.location); } } } _attachElementToTree(element, location) { if (this.options.sourceCodeLocationInfo) { const loc = location && Object.assign(Object.assign({}, location), { startTag: location }); this.treeAdapter.setNodeSourceCodeLocation(element, loc); } if (this._shouldFosterParentOnInsertion()) { this._fosterParentElement(element); } else { const parent = this.openElements.currentTmplContentOrNode; this.treeAdapter.appendChild(parent, element); } } _appendElement(token, namespaceURI) { const element = this.treeAdapter.createElement(token.tagName, namespaceURI, token.attrs); this._attachElementToTree(element, token.location); } _insertElement(token, namespaceURI) { const element = this.treeAdapter.createElement(token.tagName, namespaceURI, token.attrs); this._attachElementToTree(element, token.location); this.openElements.push(element, token.tagID); } _insertFakeElement(tagName, tagID) { const element = this.treeAdapter.createElement(tagName, html_js_1.NS.HTML, []); this._attachElementToTree(element, null); this.openElements.push(element, tagID); } _insertTemplate(token) { const tmpl = this.treeAdapter.createElement(token.tagName, html_js_1.NS.HTML, token.attrs); const content = this.treeAdapter.createDocumentFragment(); this.treeAdapter.setTemplateContent(tmpl, content); this._attachElementToTree(tmpl, token.location); this.openElements.push(tmpl, token.tagID); if (this.options.sourceCodeLocationInfo) this.treeAdapter.setNodeSourceCodeLocation(content, null); } _insertFakeRootElement() { const element = this.treeAdapter.createElement(html_js_1.TAG_NAMES.HTML, html_js_1.NS.HTML, []); if (this.options.sourceCodeLocationInfo) this.treeAdapter.setNodeSourceCodeLocation(element, null); this.treeAdapter.appendChild(this.openElements.current, element); this.openElements.push(element, html_js_1.TAG_ID.HTML); } _appendCommentNode(token, parent) { const commentNode = this.treeAdapter.createCommentNode(token.data); this.treeAdapter.appendChild(parent, commentNode); if (this.options.sourceCodeLocationInfo) { this.treeAdapter.setNodeSourceCodeLocation(commentNode, token.location); } } _insertCharacters(token) { let parent; let beforeElement; if (this._shouldFosterParentOnInsertion()) { ({ parent, beforeElement } = this._findFosterParentingLocation()); if (beforeElement) { this.treeAdapter.insertTextBefore(parent, token.chars, beforeElement); } else { this.treeAdapter.insertText(parent, token.chars); } } else { parent = this.openElements.currentTmplContentOrNode; this.treeAdapter.insertText(parent, token.chars); } if (!token.location) return; const siblings = this.treeAdapter.getChildNodes(parent); const textNodeIdx = beforeElement ? siblings.lastIndexOf(beforeElement) : siblings.length; const textNode = siblings[textNodeIdx - 1]; //NOTE: if we have a location assigned by another token, then just update the end position const tnLoc = this.treeAdapter.getNodeSourceCodeLocation(textNode); if (tnLoc) { const { endLine, endCol, endOffset } = token.location; this.treeAdapter.updateNodeSourceCodeLocation(textNode, { endLine, endCol, endOffset }); } else if (this.options.sourceCodeLocationInfo) { this.treeAdapter.setNodeSourceCodeLocation(textNode, token.location); } } _adoptNodes(donor, recipient) { for (let child = this.treeAdapter.getFirstChild(donor); child; child = this.treeAdapter.getFirstChild(donor)) { this.treeAdapter.detachNode(child); this.treeAdapter.appendChild(recipient, child); } } _setEndLocation(element, closingToken) { if (this.treeAdapter.getNodeSourceCodeLocation(element) && closingToken.location) { const ctLoc = closingToken.location; const tn = this.treeAdapter.getTagName(element); const endLoc = // NOTE: For cases like <p> <p> </p> - First 'p' closes without a closing // tag and for cases like <td> <p> </td> - 'p' closes without a closing tag. closingToken.type === token_js_1.TokenType.END_TAG && tn === closingToken.tagName ? { endTag: Object.assign({}, ctLoc), endLine: ctLoc.endLine, endCol: ctLoc.endCol, endOffset: ctLoc.endOffset, } : { endLine: ctLoc.startLine, endCol: ctLoc.startCol, endOffset: ctLoc.startOffset, }; this.treeAdapter.updateNodeSourceCodeLocation(element, endLoc); } } //Token processing shouldProcessStartTagTokenInForeignContent(token) { // Check that neither current === document, or ns === NS.HTML if (!this.currentNotInHTML) return false; let current; let currentTagId; if (this.openElements.stackTop === 0 && this.fragmentContext) { current = this.fragmentContext; currentTagId = this.fragmentContextID; } else { ({ current, currentTagId } = this.openElements); } if (token.tagID === html_js_1.TAG_ID.SVG && this.treeAdapter.getTagName(current) === html_js_1.TAG_NAMES.ANNOTATION_XML && this.treeAdapter.getNamespaceURI(current) === html_js_1.NS.MATHML) { return false; } return ( // Check that `current` is not an integration point for HTML or MathML elements. this.tokenizer.inForeignNode || // If it _is_ an integration point, then we might have to check that it is not an HTML // integration point. ((token.tagID === html_js_1.TAG_ID.MGLYPH || token.tagID === html_js_1.TAG_ID.MALIGNMARK) && !this._isIntegrationPoint(currentTagId, current, html_js_1.NS.HTML))); } _processToken(token) { switch (token.type) { case token_js_1.TokenType.CHARACTER: { this.onCharacter(token); break; } case token_js_1.TokenType.NULL_CHARACTER: { this.onNullCharacter(token); break; } case token_js_1.TokenType.COMMENT: { this.onComment(token); break; } case token_js_1.TokenType.DOCTYPE: { this.onDoctype(token); break; } case token_js_1.TokenType.START_TAG: { this._processStartTag(token); break; } case token_js_1.TokenType.END_TAG: { this.onEndTag(token); break; } case token_js_1.TokenType.EOF: { this.onEof(token); break; } case token_js_1.TokenType.WHITESPACE_CHARACTER: { this.onWhitespaceCharacter(token); break; } } } //Integration points _isIntegrationPoint(tid, element, foreignNS) { const ns = this.treeAdapter.getNamespaceURI(element); const attrs = this.treeAdapter.getAttrList(element); return foreignContent.isIntegrationPoint(tid, ns, attrs, foreignNS); } //Active formatting elements reconstruction _reconstructActiveFormattingElements() { const listLength = this.activeFormattingElements.entries.length; if (listLength) { const endIndex = this.activeFormattingElements.entries.findIndex((entry) => entry.type === formatting_element_list_js_1.EntryType.Marker || this.openElements.contains(entry.element)); const unopenIdx = endIndex < 0 ? listLength - 1 : endIndex - 1; for (let i = unopenIdx; i >= 0; i--) { const entry = this.activeFormattingElements.entries[i]; this._insertElement(entry.token, this.treeAdapter.getNamespaceURI(entry.element)); entry.element = this.openElements.current; } } } //Close elements _closeTableCell() { this.openElements.generateImpliedEndTags(); this.openElements.popUntilTableCellPopped(); this.activeFormattingElements.clearToLastMarker(); this.insertionMode = InsertionMode.IN_ROW; } _closePElement() { this.openElements.generateImpliedEndTagsWithExclusion(html_js_1.TAG_ID.P); this.openElements.popUntilTagNamePopped(html_js_1.TAG_ID.P); } //Insertion modes _resetInsertionMode() { for (let i = this.openElements.stackTop; i >= 0; i--) { //Insertion mode reset map switch (i === 0 && this.fragmentContext ? this.fragmentContextID : this.openElements.tagIDs[i]) { case html_js_1.TAG_ID.TR: { this.insertionMode = InsertionMode.IN_ROW; return; } case html_js_1.TAG_ID.TBODY: case html_js_1.TAG_ID.THEAD: case html_js_1.TAG_ID.TFOOT: { this.insertionMode = InsertionMode.IN_TABLE_BODY; return; } case html_js_1.TAG_ID.CAPTION: { this.insertionMode = InsertionMode.IN_CAPTION; return; } case html_js_1.TAG_ID.COLGROUP: { this.insertionMode = InsertionMode.IN_COLUMN_GROUP; return; } case html_js_1.TAG_ID.TABLE: { this.insertionMode = InsertionMode.IN_TABLE; return; } case html_js_1.TAG_ID.BODY: { this.insertionMode = InsertionMode.IN_BODY; return; } case html_js_1.TAG_ID.FRAMESET: { this.insertionMode = InsertionMode.IN_FRAMESET; return; } case html_js_1.TAG_ID.SELECT: { this._resetInsertionModeForSelect(i); return; } case html_js_1.TAG_ID.TEMPLATE: { this.insertionMode = this.tmplInsertionModeStack[0]; return; } case html_js_1.TAG_ID.HTML: { this.insertionMode = this.headElement ? InsertionMode.AFTER_HEAD : InsertionMode.BEFORE_HEAD; return; } case html_js_1.TAG_ID.TD: case html_js_1.TAG_ID.TH: { if (i > 0) { this.insertionMode = InsertionMode.IN_CELL; return; } break; } case html_js_1.TAG_ID.HEAD: { if (i > 0) { this.insertionMode = InsertionMode.IN_HEAD; return; } break; } } } this.insertionMode = InsertionMode.IN_BODY; } _resetInsertionModeForSelect(selectIdx) { if (selectIdx > 0) { for (let i = selectIdx - 1; i > 0; i--) { const tn = this.openElements.tagIDs[i]; if (tn === html_js_1.TAG_ID.TEMPLATE) { break; } else if (tn === html_js_1.TAG_ID.TABLE) { this.insertionMode = InsertionMode.IN_SELECT_IN_TABLE; return; } } } this.insertionMode = InsertionMode.IN_SELECT; } //Foster parenting _isElementCausesFosterParenting(tn) { return TABLE_STRUCTURE_TAGS.has(tn); } _shouldFosterParentOnInsertion() { return this.fosterParentingEnabled && this._isElementCausesFosterParenting(this.openElements.currentTagId); } _findFosterParentingLocation() { for (let i = this.openElements.stackTop; i >= 0; i--) { const openElement = this.openElements.items[i]; switch (this.openElements.tagIDs[i]) { case html_js_1.TAG_ID.TEMPLATE: { if (this.treeAdapter.getNamespaceURI(openElement) === html_js_1.NS.HTML) { return { parent: this.treeAdapter.getTemplateContent(openElement), beforeElement: null }; } break; } case html_js_1.TAG_ID.TABLE: { const parent = this.treeAdapter.getParentNode(openElement); if (parent) { return { parent, beforeElement: openElement }; } return { parent: this.openElements.items[i - 1], beforeElement: null }; } default: // Do nothing } } return { parent: this.openElements.items[0], beforeElement: null }; } _fosterParentElement(element) { const location = this._findFosterParentingLocation(); if (location.beforeElement) { this.treeAdapter.insertBefore(location.parent, element, location.beforeElement); } else { this.treeAdapter.appendChild(location.parent, element); } } //Special elements _isSpecialElement(element, id) { const ns = this.treeAdapter.getNamespaceURI(element); return html_js_1.SPECIAL_ELEMENTS[ns].has(id); } onCharacter(token) { this.skipNextNewLine = false; if (this.tokenizer.inForeignNode) { characterInForeignContent(this, token); return; } switch (this.insertionMode) { case InsertionMode.INITIAL: { tokenInInitialMode(this, token); break; } case InsertionMode.BEFORE_HTML: { tokenBeforeHtml(this, token); break; } case InsertionMode.BEFORE_HEAD: { tokenBeforeHead(this, token); break; } case InsertionMode.IN_HEAD: { tokenInHead(this, token); break; } case InsertionMode.IN_HEAD_NO_SCRIPT: { tokenInHeadNoScript(this, token); break; } case InsertionMode.AFTER_HEAD: { tokenAfterHead(this, token); break; } case InsertionMode.IN_BODY: case InsertionMode.IN_CAPTION: case InsertionMode.IN_CELL: case InsertionMode.IN_TEMPLATE: { characterInBody(this, token); break; } case InsertionMode.TEXT: case InsertionMode.IN_SELECT: case InsertionMode.IN_SELECT_IN_TABLE: { this._insertCharacters(token); break; } case InsertionMode.IN_TABLE: case InsertionMode.IN_TABLE_BODY: case InsertionMode.IN_ROW: { characterInTable(this, token); break; } case InsertionMode.IN_TABLE_TEXT: { characterInTableText(this, token); break; } case InsertionMode.IN_COLUMN_GROUP: { tokenInColumnGroup(this, token); break; } case InsertionMode.AFTER_BODY: { tokenAfterBody(this, token); break; } case InsertionMode.AFTER_AFTER_BODY: { tokenAfterAfterBody(this, token); break; } default: // Do nothing } } onNullCharacter(token) { this.skipNextNewLine = false; if (this.tokenizer.inForeignNode) { nullCharacterInForeignContent(this, token); return; } switch (this.insertionMode) { case InsertionMode.INITIAL: { tokenInInitialMode(this, token); break; } case InsertionMode.BEFORE_HTML: { tokenBeforeHtml(this, token); break; } case InsertionMode.BEFORE_HEAD: { tokenBeforeHead(this, token); break; } case InsertionMode.IN_HEAD: { tokenInHead(this, token); break; } case InsertionMode.IN_HEAD_NO_SCRIPT: { tokenInHeadNoScript(this, token); break; } case InsertionMode.AFTER_HEAD: { tokenAfterHead(this, token); break; } case InsertionMode.TEXT: { this._insertCharacters(token); break; } case InsertionMode.IN_TABLE: case InsertionMode.IN_TABLE_BODY: case InsertionMode.IN_ROW: { characterInTable(this, token); break; } case InsertionMode.IN_COLUMN_GROUP: { tokenInColumnGroup(this, token); break; } case InsertionMode.AFTER_BODY: { tokenAfterBody(this, token); break; } case InsertionMode.AFTER_AFTER_BODY: { tokenAfterAfterBody(this, token); break; } default: // Do nothing } } onComment(token) { this.skipNextNewLine = false; if (this.currentNotInHTML) { appendComment(this, token); return; } switch (this.insertionMode) { case InsertionMode.INITIAL: case InsertionMode.BEFORE_HTML: case InsertionMode.BEFORE_HEAD: case InsertionMode.IN_HEAD: case InsertionMode.IN_HEAD_NO_SCRIPT: case InsertionMode.AFTER_HEAD: case InsertionMode.IN_BODY: case InsertionMode.IN_TABLE: case InsertionMode.IN_CAPTION: case InsertionMode.IN_COLUMN_GROUP: case InsertionMode.IN_TABLE_BODY: case InsertionMode.IN_ROW: case InsertionMode.IN_CELL: case InsertionMode.IN_SELECT: case InsertionMode.IN_SELECT_IN_TABLE: case InsertionMode.IN_TEMPLATE: case InsertionMode.IN_FRAMESET: case InsertionMode.AFTER_FRAMESET: { appendComment(this, token); break; } case InsertionMode.IN_TABLE_TEXT: { tokenInTableText(this, token); break; } case InsertionMode.AFTER_BODY: { appendCommentToRootHtmlElement(this, token); break; } case InsertionMode.AFTER_AFTER_BODY: case InsertionMode.AFTER_AFTER_FRAMESET: { appendCommentToDocument(this, token); break; } default: // Do nothing } } onDoctype(token) { this.skipNextNewLine = false; switch (this.insertionMode) { case InsertionMode.INITIAL: { doctypeInInitialMode(this, token); break; } case InsertionMode.BEFORE_HEAD: case InsertionMode.IN_HEAD: case InsertionMode.IN_HEAD_NO_SCRIPT: case InsertionMode.AFTER_HEAD: { this._err(token, error_codes_js_1.ERR.misplacedDoctype); break; } case InsertionMode.IN_TABLE_TEXT: { tokenInTableText(this, token); break; } default: // Do nothing } } onStartTag(token) { this.skipNextNewLine = false; this.currentToken = token; this._processStartTag(token); if (token.selfClosing && !token.ackSelfClosing) { this._err(token, error_codes_js_1.ERR.nonVoidHtmlElementStartTagWithTrailingSolidus); } } /** * Processes a given start tag. * * `onStartTag` checks if a self-closing tag was recognized. When a token * is moved inbetween multiple insertion modes, this check for self-closing * could lead to false positives. To avoid this, `_processStartTag` is used * for nested calls. * * @param token The token to process. */ _processStartTag(token) { if (this.shouldProcessStartTagTokenInForeignContent(token)) { startTagInForeignContent(this, token); } else { this._startTagOutsideForeignContent(token); } } _startTagOutsideForeignContent(token) { switch (this.insertionMode) { case InsertionMode.INITIAL: { tokenInInitialMode(this, token); break; } case InsertionMode.BEFORE_HTML: { startTagBeforeHtml(this, token); break; } case InsertionMode.BEFORE_HEAD: { startTagBeforeHead(this, token); break; } case InsertionMode.IN_HEAD: { startTagInHead(this, token); break; } case InsertionMode.IN_HEAD_NO_SCRIPT: { startTagInHeadNoScript(this, token); break; } case InsertionMode.AFTER_HEAD: { startTagAfterHead(this, token); break; } case InsertionMode.IN_BODY: { startTagInBody(this, token); break; } case InsertionMode.IN_TABLE: { startTagInTable(this, token); break; } case InsertionMode.IN_TABLE_TEXT: { tokenInTableText(this, token); break; } case InsertionMode.IN_CAPTION: { startTagInCaption(this, token); break; } case InsertionMode.IN_COLUMN_GROUP: { startTagInColumnGroup(this, token); break; } case InsertionMode.IN_TABLE_BODY: { startTagInTableBody(this, token); break; } case InsertionMode.IN_ROW: { startTagInRow(this, token); break; } case InsertionMode.IN_CELL: { startTagInCell(this, token); break; } case InsertionMode.IN_SELECT: { startTagInSelect(this, token); break; } case InsertionMode.IN_SELECT_IN_TABLE: { startTagInSelectInTable(this, token); break; } case InsertionMode.IN_TEMPLATE: { startTagInTemplate(this, token); break; } case InsertionMode.AFTER_BODY: { startTagAfterBody(this, token); break; } case InsertionMode.IN_FRAMESET: { startTagInFrameset(this, token); break; } case InsertionMode.AFTER_FRAMESET: { startTagAfterFrameset(this, token); break; } case InsertionMode.AFTER_AFTER_BODY: { startTagAfterAfterBody(this, token); break; } case InsertionMode.AFTER_AFTER_FRAMESET: { startTagAfterAfterFrameset(this, token); break; } default: // Do nothing } } onEndTag(token) { this.skipNextNewLine = false; this.currentToken = token; if (this.currentNotInHTML) { endTagInForeignContent(this, token); } else { this._endTagOutsideForeignContent(token); } } _endTagOutsideForeignContent(token) { switch (this.insertionMode) { case InsertionMode.INITIAL: { tokenInInitialMode(this, token); break; } case InsertionMode.BEFORE_HTML: { endTagBeforeHtml(this, token); break; } case InsertionMode.BEFORE_HEAD: { endTagBeforeHead(this, token); break; } case InsertionMode.IN_HEAD: { endTagInHead(this, token); break; } case InsertionMode.IN_HEAD_NO_SCRIPT: { endTagInHeadNoScript(this, token); break; } case InsertionMode.AFTER_HEAD: { endTagAfterHead(this, token); break; } case InsertionMode.IN_BODY: { endTagInBody(this, token); break; } case InsertionMode.TEXT: { endTagInText(this, token); break; } case InsertionMode.IN_TABLE: { endTagInTable(this, token); break; } case InsertionMode.IN_TABLE_TEXT: { tokenInTableText(this, token); break; } case InsertionMode.IN_CAPTION: { endTagInCaption(this, token); break; } case InsertionMode.IN_COLUMN_GROUP: { endTagInColumnGroup(this, token); break; } case InsertionMode.IN_TABLE_BODY: { endTagInTableBody(this, token); break; } case InsertionMode.IN_ROW: { endTagInRow(this, token); break; } case InsertionMode.IN_CELL: { endTagInCell(this, token); break; } case InsertionMode.IN_SELECT: { endTagInSelect(this, token); break; } case InsertionMode.IN_SELECT_IN_TABLE: { endTagInSelectInTable(this, token); break; } case InsertionMode.IN_TEMPLATE: { endTagInTemplate(this, token); break; } case InsertionMode.AFTER_BODY: { endTagAfterBody(this, token); break; } case InsertionMode.IN_FRAMESET: { endTagInFrameset(this, token); break; } case InsertionMode.AFTER_FRAMESET: { endTagAfterFrameset(this, token); break; } case InsertionMode.AFTER_AFTER_BODY: { tokenAfterAfterBody(this, token); break; } default: // Do nothing } } onEof(token) { switch (this.insertionMode) { case InsertionMode.INITIAL: { tokenInInitialMode(this, token); break; } case InsertionMode.BEFORE_HTML: { tokenBeforeHtml(this, token); break; } case InsertionMode.BEFORE_HEAD: { tokenBeforeHead(this, token); break; } case InsertionMode.IN_HEAD: { tokenInHead(this, token); break; } case InsertionMode.IN_HEAD_NO_SCRIPT: { tokenInHeadNoScript(this, token); break; } case InsertionMode.AFTER_HEAD: { tokenAfterHead(this, token); break; } case InsertionMode.IN_BODY: case InsertionMode.IN_TABLE: case InsertionMode.IN_CAPTION: case InsertionMode.IN_COLUMN_GROUP: case InsertionMode.IN_TABLE_BODY: case InsertionMode.IN_ROW: case InsertionMode.IN_CELL: case InsertionMode.IN_SELECT: case InsertionMode.IN_SELECT_IN_TABLE: { eofInBody(this, token); break; } case InsertionMode.TEXT: { eofInText(this, token); break; } case InsertionMode.IN_TABLE_TEXT: { tokenInTableText(this, token); break; } case InsertionMode.IN_TEMPLATE: { eofInTemplate(this, token); break; } case InsertionMode.AFTER_BODY: case InsertionMode.IN_FRAMESET: case InsertionMode.AFTER_FRAMESET: case InsertionMode.AFTER_AFTER_BODY: case InsertionMode.AFTER_AFTER_FRAMESET: { stopParsing(this, token); break; } default: // Do nothing } } onWhitespaceCharacter(token) { if (this.skipNextNewLine) { this.skipNextNewLine = false; if (token.chars.charCodeAt(0) === unicode.CODE_POINTS.LINE_FEED) { if (token.chars.length === 1) { return; } token.chars = token.chars.substr(1); } } if (this.tokenizer.inForeignNode) { this._insertCharacters(token); return; } switch (this.insertionMode) { case InsertionMode.IN_HEAD: case InsertionMode.IN_HEAD_NO_SCRIPT: case InsertionMode.AFTER_HEAD: case InsertionMode.TEXT: case InsertionMode.IN_COLUMN_GROUP: case InsertionMode.IN_SELECT: case InsertionMode.IN_SELECT_IN_TABLE: case InsertionMode.IN_FRAMESET: case InsertionMode.AFTER_FRAMESET: { this._insertCharacters(token); break; } case InsertionMode.IN_BODY: case InsertionMode.IN_CAPTION: case InsertionMode.IN_CELL: case InsertionMode.IN_TEMPLATE: case InsertionMode.AFTER_BODY: case InsertionMode.AFTER_AFTER_BODY: case InsertionMode.AFTER_AFTER_FRAMESET: { whitespaceCharacterInBody(this, token); break; } case InsertionMode.IN_TABLE: case InsertionMode.IN_TABLE_BODY: case InsertionMode.IN_ROW: { characterInTable(this, token); break; } case InsertionMode.IN_TABLE_TEXT: { whitespaceCharacterInTableText(this, token); break; } default: // Do nothing } } } exports.Parser = Parser; //Adoption agency algorithm //(see: http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#adoptionAgency) //------------------------------------------------------------------ //Steps 5-8 of the algorithm function aaObtainFormattingElementEntry(p, token) { let formattingElementEntry = p.activeFormattingElements.getElementEntryInScopeWithTagName(token.tagName); if (formattingElementEntry) { if (!p.openElements.contains(formattingElementEntry.element)) { p.activeFormattingElements.removeEntry(formattingElementEntry); formattingElementEntry = null; } else if (!p.openElements.hasInScope(token.tagID)) { formattingElementEntry = null; } } else { genericEndTagInBody(p, token); } return formattingElementEntry; } //Steps 9 and 10 of the algorithm function aaObtainFurthestBlock(p, formattingElementEntry) { let furthestBlock = null; let idx = p.openElements.stackTop; for (; idx >= 0; idx--) { const element = p.openElements.items[idx]; if (element === formattingElementEntry.element) { break; } if (p._isSpecialElement(element, p.openElements.tagIDs[idx])) { furthestBlock = element; } } if (!furthestBlock) { p.openElements.shortenToLength(idx < 0 ? 0 : idx); p.activeFormattingElements.removeEntry(formattingElementEntry); } return furthestBlock; } //Step 13 of the algorithm function aaInnerLoop(p, furthestBlock, formattingElement) { let lastElement = furthestBlock; let nextElement = p.openElements.getCommonAncestor(furthestBlock); for (let i = 0, element = nextElement; element !== formattingElement; i++, element = nextElement) { //NOTE: store the next element for the next loop iteration (it may be deleted from the stack by step 9.5) nextElement = p.openElements.getCommonAncestor(element); const elementEntry = p.activeFormattingElements.getElementEntry(element); const counterOverflow = elementEntry && i >= AA_INNER_LOOP_ITER; const shouldRemoveFromOpenElements = !elementEntry || counterOverflow; if (shouldRemoveFromOpenElements) { if (counterOverflow) { p.activeFormattingElements.removeEntry(elementEntry); } p.openElements.remove(element); } else { element = aaRecreateElementFromEntry(p, elementEntry); if (lastElement === furthestBlock) { p.activeFormattingElements.bookmark = elementEntry; } p.treeAdapter.detachNode(lastElement); p.treeAdapter.appendChild(element, lastElement); lastElement = element; } } return lastElement; } //Step 13.7 of the algorithm function aaRecreateElementFromEntry(p, elementEntry) { const ns = p.treeAdapter.getNamespaceURI(elementEntry.element); const newElement = p.treeAdapter.createElement(elementEntry.token.tagName, ns, elementEntry.token.attrs); p.openElements.replace(elementEntry.element, newElement); elementEntry.element = newElement; return newElement; } //Step 14 of the algorithm function aaInsertLastNodeInCommonAncestor(p, commonAncestor, lastElement) { const tn = p.treeAdapter.getTagName(commonAncestor); const tid = (0, html_js_1.getTagID)(tn); if (p._isElementCausesFosterParenting(tid)) { p._fosterParentElement(lastElement); } else { const ns = p.treeAdapter.getNamespaceURI(commonAncestor); if (tid === html_js_1.TAG_ID.TEMPLATE && ns === html_js_1.NS.HTML) { commonAncestor = p.treeAdapter.getTemplateContent(commonAncestor); } p.treeAdapter.appendChild(commonAncestor, lastElement); } } //Steps 15-19 of the algorithm function aaReplaceFormattingElement(p, furthestBlock, formattingElementEntry) { const ns = p.treeAdapter.getNamespaceURI(formattingElementEntry.element); const { token } = formattingElementEntry; const newElement = p.treeAdapter.createElement(token.tagName, ns, token.attrs); p._adoptNodes(furthestBlock, newElement); p.treeAdapter.appendChild(furthestBlock, newElement); p.activeFormattingElements.insertElementAfterBookmark(newElement, token); p.activeFormattingElements.removeEntry(formattingElementEntry); p.openElements.remove(formattingElementEntry.element); p.openElements.insertAfter(furthestBlock, newElement, token.tagID); } //Algorithm entry point function callAdoptionAgency(p, token) { for (let i = 0; i < AA_OUTER_LOOP_ITER; i++) { const formattingElementEntry = aaObtainFormattingElementEntry(p, token); if (!formattingElementEntry) { break; } const furthestBlock = aaObtainFurthestBlock(p, formattingElementEntry); if (!furthestBlock) { break; } p.activeFormattingElements.bookmark = formattingElementEntry; const lastElement = aaInnerLoop(p, furthestBlock, formattingElementEntry.element); const commonAncestor = p.openElements.getCommonAncestor(formattingElementEntry.element); p.treeAdapter.detachNode(lastElement); if (commonAncestor) aaInsertLastNodeInCommonAncestor(p, commonAncestor, lastElement); aaReplaceFormattingElement(p, furthestBlock, formattingElementEntry); } } //Generic token handlers //------------------------------------------------------------------ function appendComment(p, token) { p._appendCommentNode(token, p.openElements.currentTmplContentOrNode); } function appendCommentToRootHtmlElement(p, token) { p._appendCommentNode(token, p.openElements.items[0]); } function appendCommentToDocument(p, token) { p._appendCommentNode(token, p.document); } function stopParsing(p, token) { p.stopped = true; // NOTE: Set end locations for elements that remain on the open element stack. if (token.location) { // NOTE: If we are not in a fragment, `html` and `body` will stay on the stack. // This is a problem, as we might overwrite their end position here. const target = p.fragmentContext ? 0 : 2; for (let i = p.openElements.stackTop; i >= target; i--) { p._setEndLocation(p.openElements.items[i], token); } // Handle `html` and `body` if (!p.fragmentContext && p.openElements.stackTop >= 0) { const htmlElement = p.openElements.items[0]; const htmlLocation = p.treeAdapter.getNodeSourceCodeLocation(htmlElement); if (htmlLocation && !htmlLocation.endTag) { p._setEndLocation(htmlElement, token); if (p.openElements.stackTop >= 1) { const bodyElement = p.openEl