parse5
Version:
HTML parser and serializer.
1,294 lines • 105 kB
JavaScript
import { Tokenizer, TokenizerMode } from '../tokenizer/index.js';
import { OpenElementStack } from './open-element-stack.js';
import { FormattingElementList, EntryType } from './formatting-element-list.js';
import { defaultTreeAdapter } from '../tree-adapters/default.js';
import * as doctype from '../common/doctype.js';
import * as foreignContent from '../common/foreign-content.js';
import { ERR } from '../common/error-codes.js';
import * as unicode from '../common/unicode.js';
import { TAG_ID as $, TAG_NAMES as TN, NS, ATTRS, SPECIAL_ELEMENTS, DOCUMENT_MODE, isNumberedHeader, getTagID, } from '../common/html.js';
import { TokenType, getTokenAttr, } from '../common/token.js';
//Misc constants
const HIDDEN_INPUT_TYPE = 'hidden';
//Adoption agency loops iteration count
const AA_OUTER_LOOP_ITER = 8;
const AA_INNER_LOOP_ITER = 3;
//Insertion modes
var InsertionMode;
(function (InsertionMode) {
InsertionMode[InsertionMode["INITIAL"] = 0] = "INITIAL";
InsertionMode[InsertionMode["BEFORE_HTML"] = 1] = "BEFORE_HTML";
InsertionMode[InsertionMode["BEFORE_HEAD"] = 2] = "BEFORE_HEAD";
InsertionMode[InsertionMode["IN_HEAD"] = 3] = "IN_HEAD";
InsertionMode[InsertionMode["IN_HEAD_NO_SCRIPT"] = 4] = "IN_HEAD_NO_SCRIPT";
InsertionMode[InsertionMode["AFTER_HEAD"] = 5] = "AFTER_HEAD";
InsertionMode[InsertionMode["IN_BODY"] = 6] = "IN_BODY";
InsertionMode[InsertionMode["TEXT"] = 7] = "TEXT";
InsertionMode[InsertionMode["IN_TABLE"] = 8] = "IN_TABLE";
InsertionMode[InsertionMode["IN_TABLE_TEXT"] = 9] = "IN_TABLE_TEXT";
InsertionMode[InsertionMode["IN_CAPTION"] = 10] = "IN_CAPTION";
InsertionMode[InsertionMode["IN_COLUMN_GROUP"] = 11] = "IN_COLUMN_GROUP";
InsertionMode[InsertionMode["IN_TABLE_BODY"] = 12] = "IN_TABLE_BODY";
InsertionMode[InsertionMode["IN_ROW"] = 13] = "IN_ROW";
InsertionMode[InsertionMode["IN_CELL"] = 14] = "IN_CELL";
InsertionMode[InsertionMode["IN_SELECT"] = 15] = "IN_SELECT";
InsertionMode[InsertionMode["IN_SELECT_IN_TABLE"] = 16] = "IN_SELECT_IN_TABLE";
InsertionMode[InsertionMode["IN_TEMPLATE"] = 17] = "IN_TEMPLATE";
InsertionMode[InsertionMode["AFTER_BODY"] = 18] = "AFTER_BODY";
InsertionMode[InsertionMode["IN_FRAMESET"] = 19] = "IN_FRAMESET";
InsertionMode[InsertionMode["AFTER_FRAMESET"] = 20] = "AFTER_FRAMESET";
InsertionMode[InsertionMode["AFTER_AFTER_BODY"] = 21] = "AFTER_AFTER_BODY";
InsertionMode[InsertionMode["AFTER_AFTER_FRAMESET"] = 22] = "AFTER_AFTER_FRAMESET";
})(InsertionMode || (InsertionMode = {}));
const BASE_LOC = {
startLine: -1,
startCol: -1,
startOffset: -1,
endLine: -1,
endCol: -1,
endOffset: -1,
};
const TABLE_STRUCTURE_TAGS = new Set([$.TABLE, $.TBODY, $.TFOOT, $.THEAD, $.TR]);
const defaultParserOptions = {
scriptingEnabled: true,
sourceCodeLocationInfo: false,
treeAdapter: defaultTreeAdapter,
onParseError: null,
};
//Parser
export class Parser {
constructor(options, document, fragmentContext = null, scriptHandler = null) {
this.fragmentContext = fragmentContext;
this.scriptHandler = scriptHandler;
this.currentToken = null;
this.stopped = false;
this.insertionMode = InsertionMode.INITIAL;
this.originalInsertionMode = InsertionMode.INITIAL;
this.headElement = null;
this.formElement = null;
/** Indicates that the current node is not an element in the HTML namespace */
this.currentNotInHTML = false;
/**
* The template insertion mode stack is maintained from the left.
* Ie. the topmost element will always have index 0.
*/
this.tmplInsertionModeStack = [];
this.pendingCharacterTokens = [];
this.hasNonWhitespacePendingCharacterToken = false;
this.framesetOk = true;
this.skipNextNewLine = false;
this.fosterParentingEnabled = false;
this.options = {
...defaultParserOptions,
...options,
};
this.treeAdapter = this.options.treeAdapter;
this.onParseError = this.options.onParseError;
// Always enable location info if we report parse errors.
if (this.onParseError) {
this.options.sourceCodeLocationInfo = true;
}
this.document = document !== null && document !== void 0 ? document : this.treeAdapter.createDocument();
this.tokenizer = new Tokenizer(this.options, this);
this.activeFormattingElements = new FormattingElementList(this.treeAdapter);
this.fragmentContextID = fragmentContext ? getTagID(this.treeAdapter.getTagName(fragmentContext)) : $.UNKNOWN;
this._setContextModes(fragmentContext !== null && fragmentContext !== void 0 ? fragmentContext : this.document, this.fragmentContextID);
this.openElements = new OpenElementStack(this.document, this.treeAdapter, this);
}
// API
static parse(html, options) {
const parser = new this(options);
parser.tokenizer.write(html, true);
return parser.document;
}
static getFragmentParser(fragmentContext, options) {
const opts = {
...defaultParserOptions,
...options,
};
//NOTE: use a <template> element as the fragment context if no context element was provided,
//so we will parse in a "forgiving" manner
fragmentContext !== null && fragmentContext !== void 0 ? fragmentContext : (fragmentContext = opts.treeAdapter.createElement(TN.TEMPLATE, NS.HTML, []));
//NOTE: create a fake element which will be used as the `document` for fragment parsing.
//This is important for jsdom, where a new `document` cannot be created. This led to
//fragment parsing messing with the main `document`.
const documentMock = opts.treeAdapter.createElement('documentmock', NS.HTML, []);
const parser = new this(opts, documentMock, fragmentContext);
if (parser.fragmentContextID === $.TEMPLATE) {
parser.tmplInsertionModeStack.unshift(InsertionMode.IN_TEMPLATE);
}
parser._initTokenizerForFragmentParsing();
parser._insertFakeRootElement();
parser._resetInsertionMode();
parser._findFormInFragmentContext();
return parser;
}
getFragment() {
const rootElement = this.treeAdapter.getFirstChild(this.document);
const fragment = this.treeAdapter.createDocumentFragment();
this._adoptNodes(rootElement, fragment);
return fragment;
}
//Errors
_err(token, code, beforeToken) {
var _a;
if (!this.onParseError)
return;
const loc = (_a = token.location) !== null && _a !== void 0 ? _a : BASE_LOC;
const err = {
code,
startLine: loc.startLine,
startCol: loc.startCol,
startOffset: loc.startOffset,
endLine: beforeToken ? loc.startLine : loc.endLine,
endCol: beforeToken ? loc.startCol : loc.endCol,
endOffset: beforeToken ? loc.startOffset : loc.endOffset,
};
this.onParseError(err);
}
//Stack events
onItemPush(node, tid, isTop) {
var _a, _b;
(_b = (_a = this.treeAdapter).onItemPush) === null || _b === void 0 ? void 0 : _b.call(_a, node);
if (isTop && this.openElements.stackTop > 0)
this._setContextModes(node, tid);
}
onItemPop(node, isTop) {
var _a, _b;
if (this.options.sourceCodeLocationInfo) {
this._setEndLocation(node, this.currentToken);
}
(_b = (_a = this.treeAdapter).onItemPop) === null || _b === void 0 ? void 0 : _b.call(_a, node, this.openElements.current);
if (isTop) {
let current;
let currentTagId;
if (this.openElements.stackTop === 0 && this.fragmentContext) {
current = this.fragmentContext;
currentTagId = this.fragmentContextID;
}
else {
({ current, currentTagId } = this.openElements);
}
this._setContextModes(current, currentTagId);
}
}
_setContextModes(current, tid) {
const isHTML = current === this.document || this.treeAdapter.getNamespaceURI(current) === NS.HTML;
this.currentNotInHTML = !isHTML;
this.tokenizer.inForeignNode = !isHTML && !this._isIntegrationPoint(tid, current);
}
_switchToTextParsing(currentToken, nextTokenizerState) {
this._insertElement(currentToken, NS.HTML);
this.tokenizer.state = nextTokenizerState;
this.originalInsertionMode = this.insertionMode;
this.insertionMode = InsertionMode.TEXT;
}
switchToPlaintextParsing() {
this.insertionMode = InsertionMode.TEXT;
this.originalInsertionMode = InsertionMode.IN_BODY;
this.tokenizer.state = TokenizerMode.PLAINTEXT;
}
//Fragment parsing
_getAdjustedCurrentElement() {
return this.openElements.stackTop === 0 && this.fragmentContext
? this.fragmentContext
: this.openElements.current;
}
_findFormInFragmentContext() {
let node = this.fragmentContext;
while (node) {
if (this.treeAdapter.getTagName(node) === TN.FORM) {
this.formElement = node;
break;
}
node = this.treeAdapter.getParentNode(node);
}
}
_initTokenizerForFragmentParsing() {
if (!this.fragmentContext || this.treeAdapter.getNamespaceURI(this.fragmentContext) !== NS.HTML) {
return;
}
switch (this.fragmentContextID) {
case $.TITLE:
case $.TEXTAREA: {
this.tokenizer.state = TokenizerMode.RCDATA;
break;
}
case $.STYLE:
case $.XMP:
case $.IFRAME:
case $.NOEMBED:
case $.NOFRAMES:
case $.NOSCRIPT: {
this.tokenizer.state = TokenizerMode.RAWTEXT;
break;
}
case $.SCRIPT: {
this.tokenizer.state = TokenizerMode.SCRIPT_DATA;
break;
}
case $.PLAINTEXT: {
this.tokenizer.state = TokenizerMode.PLAINTEXT;
break;
}
default:
// Do nothing
}
}
//Tree mutation
_setDocumentType(token) {
const name = token.name || '';
const publicId = token.publicId || '';
const systemId = token.systemId || '';
this.treeAdapter.setDocumentType(this.document, name, publicId, systemId);
if (token.location) {
const documentChildren = this.treeAdapter.getChildNodes(this.document);
const docTypeNode = documentChildren.find((node) => this.treeAdapter.isDocumentTypeNode(node));
if (docTypeNode) {
this.treeAdapter.setNodeSourceCodeLocation(docTypeNode, token.location);
}
}
}
_attachElementToTree(element, location) {
if (this.options.sourceCodeLocationInfo) {
const loc = location && {
...location,
startTag: location,
};
this.treeAdapter.setNodeSourceCodeLocation(element, loc);
}
if (this._shouldFosterParentOnInsertion()) {
this._fosterParentElement(element);
}
else {
const parent = this.openElements.currentTmplContentOrNode;
this.treeAdapter.appendChild(parent, element);
}
}
_appendElement(token, namespaceURI) {
const element = this.treeAdapter.createElement(token.tagName, namespaceURI, token.attrs);
this._attachElementToTree(element, token.location);
}
_insertElement(token, namespaceURI) {
const element = this.treeAdapter.createElement(token.tagName, namespaceURI, token.attrs);
this._attachElementToTree(element, token.location);
this.openElements.push(element, token.tagID);
}
_insertFakeElement(tagName, tagID) {
const element = this.treeAdapter.createElement(tagName, NS.HTML, []);
this._attachElementToTree(element, null);
this.openElements.push(element, tagID);
}
_insertTemplate(token) {
const tmpl = this.treeAdapter.createElement(token.tagName, NS.HTML, token.attrs);
const content = this.treeAdapter.createDocumentFragment();
this.treeAdapter.setTemplateContent(tmpl, content);
this._attachElementToTree(tmpl, token.location);
this.openElements.push(tmpl, token.tagID);
if (this.options.sourceCodeLocationInfo)
this.treeAdapter.setNodeSourceCodeLocation(content, null);
}
_insertFakeRootElement() {
const element = this.treeAdapter.createElement(TN.HTML, NS.HTML, []);
if (this.options.sourceCodeLocationInfo)
this.treeAdapter.setNodeSourceCodeLocation(element, null);
this.treeAdapter.appendChild(this.openElements.current, element);
this.openElements.push(element, $.HTML);
}
_appendCommentNode(token, parent) {
const commentNode = this.treeAdapter.createCommentNode(token.data);
this.treeAdapter.appendChild(parent, commentNode);
if (this.options.sourceCodeLocationInfo) {
this.treeAdapter.setNodeSourceCodeLocation(commentNode, token.location);
}
}
_insertCharacters(token) {
let parent;
let beforeElement;
if (this._shouldFosterParentOnInsertion()) {
({ parent, beforeElement } = this._findFosterParentingLocation());
if (beforeElement) {
this.treeAdapter.insertTextBefore(parent, token.chars, beforeElement);
}
else {
this.treeAdapter.insertText(parent, token.chars);
}
}
else {
parent = this.openElements.currentTmplContentOrNode;
this.treeAdapter.insertText(parent, token.chars);
}
if (!token.location)
return;
const siblings = this.treeAdapter.getChildNodes(parent);
const textNodeIdx = beforeElement ? siblings.lastIndexOf(beforeElement) : siblings.length;
const textNode = siblings[textNodeIdx - 1];
//NOTE: if we have a location assigned by another token, then just update the end position
const tnLoc = this.treeAdapter.getNodeSourceCodeLocation(textNode);
if (tnLoc) {
const { endLine, endCol, endOffset } = token.location;
this.treeAdapter.updateNodeSourceCodeLocation(textNode, { endLine, endCol, endOffset });
}
else if (this.options.sourceCodeLocationInfo) {
this.treeAdapter.setNodeSourceCodeLocation(textNode, token.location);
}
}
_adoptNodes(donor, recipient) {
for (let child = this.treeAdapter.getFirstChild(donor); child; child = this.treeAdapter.getFirstChild(donor)) {
this.treeAdapter.detachNode(child);
this.treeAdapter.appendChild(recipient, child);
}
}
_setEndLocation(element, closingToken) {
if (this.treeAdapter.getNodeSourceCodeLocation(element) && closingToken.location) {
const ctLoc = closingToken.location;
const tn = this.treeAdapter.getTagName(element);
const endLoc =
// NOTE: For cases like <p> <p> </p> - First 'p' closes without a closing
// tag and for cases like <td> <p> </td> - 'p' closes without a closing tag.
closingToken.type === TokenType.END_TAG && tn === closingToken.tagName
? {
endTag: { ...ctLoc },
endLine: ctLoc.endLine,
endCol: ctLoc.endCol,
endOffset: ctLoc.endOffset,
}
: {
endLine: ctLoc.startLine,
endCol: ctLoc.startCol,
endOffset: ctLoc.startOffset,
};
this.treeAdapter.updateNodeSourceCodeLocation(element, endLoc);
}
}
//Token processing
shouldProcessStartTagTokenInForeignContent(token) {
// Check that neither current === document, or ns === NS.HTML
if (!this.currentNotInHTML)
return false;
let current;
let currentTagId;
if (this.openElements.stackTop === 0 && this.fragmentContext) {
current = this.fragmentContext;
currentTagId = this.fragmentContextID;
}
else {
({ current, currentTagId } = this.openElements);
}
if (token.tagID === $.SVG &&
this.treeAdapter.getTagName(current) === TN.ANNOTATION_XML &&
this.treeAdapter.getNamespaceURI(current) === NS.MATHML) {
return false;
}
return (
// Check that `current` is not an integration point for HTML or MathML elements.
this.tokenizer.inForeignNode ||
// If it _is_ an integration point, then we might have to check that it is not an HTML
// integration point.
((token.tagID === $.MGLYPH || token.tagID === $.MALIGNMARK) &&
!this._isIntegrationPoint(currentTagId, current, NS.HTML)));
}
_processToken(token) {
switch (token.type) {
case TokenType.CHARACTER: {
this.onCharacter(token);
break;
}
case TokenType.NULL_CHARACTER: {
this.onNullCharacter(token);
break;
}
case TokenType.COMMENT: {
this.onComment(token);
break;
}
case TokenType.DOCTYPE: {
this.onDoctype(token);
break;
}
case TokenType.START_TAG: {
this._processStartTag(token);
break;
}
case TokenType.END_TAG: {
this.onEndTag(token);
break;
}
case TokenType.EOF: {
this.onEof(token);
break;
}
case TokenType.WHITESPACE_CHARACTER: {
this.onWhitespaceCharacter(token);
break;
}
}
}
//Integration points
_isIntegrationPoint(tid, element, foreignNS) {
const ns = this.treeAdapter.getNamespaceURI(element);
const attrs = this.treeAdapter.getAttrList(element);
return foreignContent.isIntegrationPoint(tid, ns, attrs, foreignNS);
}
//Active formatting elements reconstruction
_reconstructActiveFormattingElements() {
const listLength = this.activeFormattingElements.entries.length;
if (listLength) {
const endIndex = this.activeFormattingElements.entries.findIndex((entry) => entry.type === EntryType.Marker || this.openElements.contains(entry.element));
const unopenIdx = endIndex < 0 ? listLength - 1 : endIndex - 1;
for (let i = unopenIdx; i >= 0; i--) {
const entry = this.activeFormattingElements.entries[i];
this._insertElement(entry.token, this.treeAdapter.getNamespaceURI(entry.element));
entry.element = this.openElements.current;
}
}
}
//Close elements
_closeTableCell() {
this.openElements.generateImpliedEndTags();
this.openElements.popUntilTableCellPopped();
this.activeFormattingElements.clearToLastMarker();
this.insertionMode = InsertionMode.IN_ROW;
}
_closePElement() {
this.openElements.generateImpliedEndTagsWithExclusion($.P);
this.openElements.popUntilTagNamePopped($.P);
}
//Insertion modes
_resetInsertionMode() {
for (let i = this.openElements.stackTop; i >= 0; i--) {
//Insertion mode reset map
switch (i === 0 && this.fragmentContext ? this.fragmentContextID : this.openElements.tagIDs[i]) {
case $.TR: {
this.insertionMode = InsertionMode.IN_ROW;
return;
}
case $.TBODY:
case $.THEAD:
case $.TFOOT: {
this.insertionMode = InsertionMode.IN_TABLE_BODY;
return;
}
case $.CAPTION: {
this.insertionMode = InsertionMode.IN_CAPTION;
return;
}
case $.COLGROUP: {
this.insertionMode = InsertionMode.IN_COLUMN_GROUP;
return;
}
case $.TABLE: {
this.insertionMode = InsertionMode.IN_TABLE;
return;
}
case $.BODY: {
this.insertionMode = InsertionMode.IN_BODY;
return;
}
case $.FRAMESET: {
this.insertionMode = InsertionMode.IN_FRAMESET;
return;
}
case $.SELECT: {
this._resetInsertionModeForSelect(i);
return;
}
case $.TEMPLATE: {
this.insertionMode = this.tmplInsertionModeStack[0];
return;
}
case $.HTML: {
this.insertionMode = this.headElement ? InsertionMode.AFTER_HEAD : InsertionMode.BEFORE_HEAD;
return;
}
case $.TD:
case $.TH: {
if (i > 0) {
this.insertionMode = InsertionMode.IN_CELL;
return;
}
break;
}
case $.HEAD: {
if (i > 0) {
this.insertionMode = InsertionMode.IN_HEAD;
return;
}
break;
}
}
}
this.insertionMode = InsertionMode.IN_BODY;
}
_resetInsertionModeForSelect(selectIdx) {
if (selectIdx > 0) {
for (let i = selectIdx - 1; i > 0; i--) {
const tn = this.openElements.tagIDs[i];
if (tn === $.TEMPLATE) {
break;
}
else if (tn === $.TABLE) {
this.insertionMode = InsertionMode.IN_SELECT_IN_TABLE;
return;
}
}
}
this.insertionMode = InsertionMode.IN_SELECT;
}
//Foster parenting
_isElementCausesFosterParenting(tn) {
return TABLE_STRUCTURE_TAGS.has(tn);
}
_shouldFosterParentOnInsertion() {
return this.fosterParentingEnabled && this._isElementCausesFosterParenting(this.openElements.currentTagId);
}
_findFosterParentingLocation() {
for (let i = this.openElements.stackTop; i >= 0; i--) {
const openElement = this.openElements.items[i];
switch (this.openElements.tagIDs[i]) {
case $.TEMPLATE: {
if (this.treeAdapter.getNamespaceURI(openElement) === NS.HTML) {
return { parent: this.treeAdapter.getTemplateContent(openElement), beforeElement: null };
}
break;
}
case $.TABLE: {
const parent = this.treeAdapter.getParentNode(openElement);
if (parent) {
return { parent, beforeElement: openElement };
}
return { parent: this.openElements.items[i - 1], beforeElement: null };
}
default:
// Do nothing
}
}
return { parent: this.openElements.items[0], beforeElement: null };
}
_fosterParentElement(element) {
const location = this._findFosterParentingLocation();
if (location.beforeElement) {
this.treeAdapter.insertBefore(location.parent, element, location.beforeElement);
}
else {
this.treeAdapter.appendChild(location.parent, element);
}
}
//Special elements
_isSpecialElement(element, id) {
const ns = this.treeAdapter.getNamespaceURI(element);
return SPECIAL_ELEMENTS[ns].has(id);
}
onCharacter(token) {
this.skipNextNewLine = false;
if (this.tokenizer.inForeignNode) {
characterInForeignContent(this, token);
return;
}
switch (this.insertionMode) {
case InsertionMode.INITIAL: {
tokenInInitialMode(this, token);
break;
}
case InsertionMode.BEFORE_HTML: {
tokenBeforeHtml(this, token);
break;
}
case InsertionMode.BEFORE_HEAD: {
tokenBeforeHead(this, token);
break;
}
case InsertionMode.IN_HEAD: {
tokenInHead(this, token);
break;
}
case InsertionMode.IN_HEAD_NO_SCRIPT: {
tokenInHeadNoScript(this, token);
break;
}
case InsertionMode.AFTER_HEAD: {
tokenAfterHead(this, token);
break;
}
case InsertionMode.IN_BODY:
case InsertionMode.IN_CAPTION:
case InsertionMode.IN_CELL:
case InsertionMode.IN_TEMPLATE: {
characterInBody(this, token);
break;
}
case InsertionMode.TEXT:
case InsertionMode.IN_SELECT:
case InsertionMode.IN_SELECT_IN_TABLE: {
this._insertCharacters(token);
break;
}
case InsertionMode.IN_TABLE:
case InsertionMode.IN_TABLE_BODY:
case InsertionMode.IN_ROW: {
characterInTable(this, token);
break;
}
case InsertionMode.IN_TABLE_TEXT: {
characterInTableText(this, token);
break;
}
case InsertionMode.IN_COLUMN_GROUP: {
tokenInColumnGroup(this, token);
break;
}
case InsertionMode.AFTER_BODY: {
tokenAfterBody(this, token);
break;
}
case InsertionMode.AFTER_AFTER_BODY: {
tokenAfterAfterBody(this, token);
break;
}
default:
// Do nothing
}
}
onNullCharacter(token) {
this.skipNextNewLine = false;
if (this.tokenizer.inForeignNode) {
nullCharacterInForeignContent(this, token);
return;
}
switch (this.insertionMode) {
case InsertionMode.INITIAL: {
tokenInInitialMode(this, token);
break;
}
case InsertionMode.BEFORE_HTML: {
tokenBeforeHtml(this, token);
break;
}
case InsertionMode.BEFORE_HEAD: {
tokenBeforeHead(this, token);
break;
}
case InsertionMode.IN_HEAD: {
tokenInHead(this, token);
break;
}
case InsertionMode.IN_HEAD_NO_SCRIPT: {
tokenInHeadNoScript(this, token);
break;
}
case InsertionMode.AFTER_HEAD: {
tokenAfterHead(this, token);
break;
}
case InsertionMode.TEXT: {
this._insertCharacters(token);
break;
}
case InsertionMode.IN_TABLE:
case InsertionMode.IN_TABLE_BODY:
case InsertionMode.IN_ROW: {
characterInTable(this, token);
break;
}
case InsertionMode.IN_COLUMN_GROUP: {
tokenInColumnGroup(this, token);
break;
}
case InsertionMode.AFTER_BODY: {
tokenAfterBody(this, token);
break;
}
case InsertionMode.AFTER_AFTER_BODY: {
tokenAfterAfterBody(this, token);
break;
}
default:
// Do nothing
}
}
onComment(token) {
this.skipNextNewLine = false;
if (this.currentNotInHTML) {
appendComment(this, token);
return;
}
switch (this.insertionMode) {
case InsertionMode.INITIAL:
case InsertionMode.BEFORE_HTML:
case InsertionMode.BEFORE_HEAD:
case InsertionMode.IN_HEAD:
case InsertionMode.IN_HEAD_NO_SCRIPT:
case InsertionMode.AFTER_HEAD:
case InsertionMode.IN_BODY:
case InsertionMode.IN_TABLE:
case InsertionMode.IN_CAPTION:
case InsertionMode.IN_COLUMN_GROUP:
case InsertionMode.IN_TABLE_BODY:
case InsertionMode.IN_ROW:
case InsertionMode.IN_CELL:
case InsertionMode.IN_SELECT:
case InsertionMode.IN_SELECT_IN_TABLE:
case InsertionMode.IN_TEMPLATE:
case InsertionMode.IN_FRAMESET:
case InsertionMode.AFTER_FRAMESET: {
appendComment(this, token);
break;
}
case InsertionMode.IN_TABLE_TEXT: {
tokenInTableText(this, token);
break;
}
case InsertionMode.AFTER_BODY: {
appendCommentToRootHtmlElement(this, token);
break;
}
case InsertionMode.AFTER_AFTER_BODY:
case InsertionMode.AFTER_AFTER_FRAMESET: {
appendCommentToDocument(this, token);
break;
}
default:
// Do nothing
}
}
onDoctype(token) {
this.skipNextNewLine = false;
switch (this.insertionMode) {
case InsertionMode.INITIAL: {
doctypeInInitialMode(this, token);
break;
}
case InsertionMode.BEFORE_HEAD:
case InsertionMode.IN_HEAD:
case InsertionMode.IN_HEAD_NO_SCRIPT:
case InsertionMode.AFTER_HEAD: {
this._err(token, ERR.misplacedDoctype);
break;
}
case InsertionMode.IN_TABLE_TEXT: {
tokenInTableText(this, token);
break;
}
default:
// Do nothing
}
}
onStartTag(token) {
this.skipNextNewLine = false;
this.currentToken = token;
this._processStartTag(token);
if (token.selfClosing && !token.ackSelfClosing) {
this._err(token, ERR.nonVoidHtmlElementStartTagWithTrailingSolidus);
}
}
/**
* Processes a given start tag.
*
* `onStartTag` checks if a self-closing tag was recognized. When a token
* is moved inbetween multiple insertion modes, this check for self-closing
* could lead to false positives. To avoid this, `_processStartTag` is used
* for nested calls.
*
* @param token The token to process.
*/
_processStartTag(token) {
if (this.shouldProcessStartTagTokenInForeignContent(token)) {
startTagInForeignContent(this, token);
}
else {
this._startTagOutsideForeignContent(token);
}
}
_startTagOutsideForeignContent(token) {
switch (this.insertionMode) {
case InsertionMode.INITIAL: {
tokenInInitialMode(this, token);
break;
}
case InsertionMode.BEFORE_HTML: {
startTagBeforeHtml(this, token);
break;
}
case InsertionMode.BEFORE_HEAD: {
startTagBeforeHead(this, token);
break;
}
case InsertionMode.IN_HEAD: {
startTagInHead(this, token);
break;
}
case InsertionMode.IN_HEAD_NO_SCRIPT: {
startTagInHeadNoScript(this, token);
break;
}
case InsertionMode.AFTER_HEAD: {
startTagAfterHead(this, token);
break;
}
case InsertionMode.IN_BODY: {
startTagInBody(this, token);
break;
}
case InsertionMode.IN_TABLE: {
startTagInTable(this, token);
break;
}
case InsertionMode.IN_TABLE_TEXT: {
tokenInTableText(this, token);
break;
}
case InsertionMode.IN_CAPTION: {
startTagInCaption(this, token);
break;
}
case InsertionMode.IN_COLUMN_GROUP: {
startTagInColumnGroup(this, token);
break;
}
case InsertionMode.IN_TABLE_BODY: {
startTagInTableBody(this, token);
break;
}
case InsertionMode.IN_ROW: {
startTagInRow(this, token);
break;
}
case InsertionMode.IN_CELL: {
startTagInCell(this, token);
break;
}
case InsertionMode.IN_SELECT: {
startTagInSelect(this, token);
break;
}
case InsertionMode.IN_SELECT_IN_TABLE: {
startTagInSelectInTable(this, token);
break;
}
case InsertionMode.IN_TEMPLATE: {
startTagInTemplate(this, token);
break;
}
case InsertionMode.AFTER_BODY: {
startTagAfterBody(this, token);
break;
}
case InsertionMode.IN_FRAMESET: {
startTagInFrameset(this, token);
break;
}
case InsertionMode.AFTER_FRAMESET: {
startTagAfterFrameset(this, token);
break;
}
case InsertionMode.AFTER_AFTER_BODY: {
startTagAfterAfterBody(this, token);
break;
}
case InsertionMode.AFTER_AFTER_FRAMESET: {
startTagAfterAfterFrameset(this, token);
break;
}
default:
// Do nothing
}
}
onEndTag(token) {
this.skipNextNewLine = false;
this.currentToken = token;
if (this.currentNotInHTML) {
endTagInForeignContent(this, token);
}
else {
this._endTagOutsideForeignContent(token);
}
}
_endTagOutsideForeignContent(token) {
switch (this.insertionMode) {
case InsertionMode.INITIAL: {
tokenInInitialMode(this, token);
break;
}
case InsertionMode.BEFORE_HTML: {
endTagBeforeHtml(this, token);
break;
}
case InsertionMode.BEFORE_HEAD: {
endTagBeforeHead(this, token);
break;
}
case InsertionMode.IN_HEAD: {
endTagInHead(this, token);
break;
}
case InsertionMode.IN_HEAD_NO_SCRIPT: {
endTagInHeadNoScript(this, token);
break;
}
case InsertionMode.AFTER_HEAD: {
endTagAfterHead(this, token);
break;
}
case InsertionMode.IN_BODY: {
endTagInBody(this, token);
break;
}
case InsertionMode.TEXT: {
endTagInText(this, token);
break;
}
case InsertionMode.IN_TABLE: {
endTagInTable(this, token);
break;
}
case InsertionMode.IN_TABLE_TEXT: {
tokenInTableText(this, token);
break;
}
case InsertionMode.IN_CAPTION: {
endTagInCaption(this, token);
break;
}
case InsertionMode.IN_COLUMN_GROUP: {
endTagInColumnGroup(this, token);
break;
}
case InsertionMode.IN_TABLE_BODY: {
endTagInTableBody(this, token);
break;
}
case InsertionMode.IN_ROW: {
endTagInRow(this, token);
break;
}
case InsertionMode.IN_CELL: {
endTagInCell(this, token);
break;
}
case InsertionMode.IN_SELECT: {
endTagInSelect(this, token);
break;
}
case InsertionMode.IN_SELECT_IN_TABLE: {
endTagInSelectInTable(this, token);
break;
}
case InsertionMode.IN_TEMPLATE: {
endTagInTemplate(this, token);
break;
}
case InsertionMode.AFTER_BODY: {
endTagAfterBody(this, token);
break;
}
case InsertionMode.IN_FRAMESET: {
endTagInFrameset(this, token);
break;
}
case InsertionMode.AFTER_FRAMESET: {
endTagAfterFrameset(this, token);
break;
}
case InsertionMode.AFTER_AFTER_BODY: {
tokenAfterAfterBody(this, token);
break;
}
default:
// Do nothing
}
}
onEof(token) {
switch (this.insertionMode) {
case InsertionMode.INITIAL: {
tokenInInitialMode(this, token);
break;
}
case InsertionMode.BEFORE_HTML: {
tokenBeforeHtml(this, token);
break;
}
case InsertionMode.BEFORE_HEAD: {
tokenBeforeHead(this, token);
break;
}
case InsertionMode.IN_HEAD: {
tokenInHead(this, token);
break;
}
case InsertionMode.IN_HEAD_NO_SCRIPT: {
tokenInHeadNoScript(this, token);
break;
}
case InsertionMode.AFTER_HEAD: {
tokenAfterHead(this, token);
break;
}
case InsertionMode.IN_BODY:
case InsertionMode.IN_TABLE:
case InsertionMode.IN_CAPTION:
case InsertionMode.IN_COLUMN_GROUP:
case InsertionMode.IN_TABLE_BODY:
case InsertionMode.IN_ROW:
case InsertionMode.IN_CELL:
case InsertionMode.IN_SELECT:
case InsertionMode.IN_SELECT_IN_TABLE: {
eofInBody(this, token);
break;
}
case InsertionMode.TEXT: {
eofInText(this, token);
break;
}
case InsertionMode.IN_TABLE_TEXT: {
tokenInTableText(this, token);
break;
}
case InsertionMode.IN_TEMPLATE: {
eofInTemplate(this, token);
break;
}
case InsertionMode.AFTER_BODY:
case InsertionMode.IN_FRAMESET:
case InsertionMode.AFTER_FRAMESET:
case InsertionMode.AFTER_AFTER_BODY:
case InsertionMode.AFTER_AFTER_FRAMESET: {
stopParsing(this, token);
break;
}
default:
// Do nothing
}
}
onWhitespaceCharacter(token) {
if (this.skipNextNewLine) {
this.skipNextNewLine = false;
if (token.chars.charCodeAt(0) === unicode.CODE_POINTS.LINE_FEED) {
if (token.chars.length === 1) {
return;
}
token.chars = token.chars.substr(1);
}
}
if (this.tokenizer.inForeignNode) {
this._insertCharacters(token);
return;
}
switch (this.insertionMode) {
case InsertionMode.IN_HEAD:
case InsertionMode.IN_HEAD_NO_SCRIPT:
case InsertionMode.AFTER_HEAD:
case InsertionMode.TEXT:
case InsertionMode.IN_COLUMN_GROUP:
case InsertionMode.IN_SELECT:
case InsertionMode.IN_SELECT_IN_TABLE:
case InsertionMode.IN_FRAMESET:
case InsertionMode.AFTER_FRAMESET: {
this._insertCharacters(token);
break;
}
case InsertionMode.IN_BODY:
case InsertionMode.IN_CAPTION:
case InsertionMode.IN_CELL:
case InsertionMode.IN_TEMPLATE:
case InsertionMode.AFTER_BODY:
case InsertionMode.AFTER_AFTER_BODY:
case InsertionMode.AFTER_AFTER_FRAMESET: {
whitespaceCharacterInBody(this, token);
break;
}
case InsertionMode.IN_TABLE:
case InsertionMode.IN_TABLE_BODY:
case InsertionMode.IN_ROW: {
characterInTable(this, token);
break;
}
case InsertionMode.IN_TABLE_TEXT: {
whitespaceCharacterInTableText(this, token);
break;
}
default:
// Do nothing
}
}
}
//Adoption agency algorithm
//(see: http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#adoptionAgency)
//------------------------------------------------------------------
//Steps 5-8 of the algorithm
function aaObtainFormattingElementEntry(p, token) {
let formattingElementEntry = p.activeFormattingElements.getElementEntryInScopeWithTagName(token.tagName);
if (formattingElementEntry) {
if (!p.openElements.contains(formattingElementEntry.element)) {
p.activeFormattingElements.removeEntry(formattingElementEntry);
formattingElementEntry = null;
}
else if (!p.openElements.hasInScope(token.tagID)) {
formattingElementEntry = null;
}
}
else {
genericEndTagInBody(p, token);
}
return formattingElementEntry;
}
//Steps 9 and 10 of the algorithm
function aaObtainFurthestBlock(p, formattingElementEntry) {
let furthestBlock = null;
let idx = p.openElements.stackTop;
for (; idx >= 0; idx--) {
const element = p.openElements.items[idx];
if (element === formattingElementEntry.element) {
break;
}
if (p._isSpecialElement(element, p.openElements.tagIDs[idx])) {
furthestBlock = element;
}
}
if (!furthestBlock) {
p.openElements.shortenToLength(idx < 0 ? 0 : idx);
p.activeFormattingElements.removeEntry(formattingElementEntry);
}
return furthestBlock;
}
//Step 13 of the algorithm
function aaInnerLoop(p, furthestBlock, formattingElement) {
let lastElement = furthestBlock;
let nextElement = p.openElements.getCommonAncestor(furthestBlock);
for (let i = 0, element = nextElement; element !== formattingElement; i++, element = nextElement) {
//NOTE: store the next element for the next loop iteration (it may be deleted from the stack by step 9.5)
nextElement = p.openElements.getCommonAncestor(element);
const elementEntry = p.activeFormattingElements.getElementEntry(element);
const counterOverflow = elementEntry && i >= AA_INNER_LOOP_ITER;
const shouldRemoveFromOpenElements = !elementEntry || counterOverflow;
if (shouldRemoveFromOpenElements) {
if (counterOverflow) {
p.activeFormattingElements.removeEntry(elementEntry);
}
p.openElements.remove(element);
}
else {
element = aaRecreateElementFromEntry(p, elementEntry);
if (lastElement === furthestBlock) {
p.activeFormattingElements.bookmark = elementEntry;
}
p.treeAdapter.detachNode(lastElement);
p.treeAdapter.appendChild(element, lastElement);
lastElement = element;
}
}
return lastElement;
}
//Step 13.7 of the algorithm
function aaRecreateElementFromEntry(p, elementEntry) {
const ns = p.treeAdapter.getNamespaceURI(elementEntry.element);
const newElement = p.treeAdapter.createElement(elementEntry.token.tagName, ns, elementEntry.token.attrs);
p.openElements.replace(elementEntry.element, newElement);
elementEntry.element = newElement;
return newElement;
}
//Step 14 of the algorithm
function aaInsertLastNodeInCommonAncestor(p, commonAncestor, lastElement) {
const tn = p.treeAdapter.getTagName(commonAncestor);
const tid = getTagID(tn);
if (p._isElementCausesFosterParenting(tid)) {
p._fosterParentElement(lastElement);
}
else {
const ns = p.treeAdapter.getNamespaceURI(commonAncestor);
if (tid === $.TEMPLATE && ns === NS.HTML) {
commonAncestor = p.treeAdapter.getTemplateContent(commonAncestor);
}
p.treeAdapter.appendChild(commonAncestor, lastElement);
}
}
//Steps 15-19 of the algorithm
function aaReplaceFormattingElement(p, furthestBlock, formattingElementEntry) {
const ns = p.treeAdapter.getNamespaceURI(formattingElementEntry.element);
const { token } = formattingElementEntry;
const newElement = p.treeAdapter.createElement(token.tagName, ns, token.attrs);
p._adoptNodes(furthestBlock, newElement);
p.treeAdapter.appendChild(furthestBlock, newElement);
p.activeFormattingElements.insertElementAfterBookmark(newElement, token);
p.activeFormattingElements.removeEntry(formattingElementEntry);
p.openElements.remove(formattingElementEntry.element);
p.openElements.insertAfter(furthestBlock, newElement, token.tagID);
}
//Algorithm entry point
function callAdoptionAgency(p, token) {
for (let i = 0; i < AA_OUTER_LOOP_ITER; i++) {
const formattingElementEntry = aaObtainFormattingElementEntry(p, token);
if (!formattingElementEntry) {
break;
}
const furthestBlock = aaObtainFurthestBlock(p, formattingElementEntry);
if (!furthestBlock) {
break;
}
p.activeFormattingElements.bookmark = formattingElementEntry;
const lastElement = aaInnerLoop(p, furthestBlock, formattingElementEntry.element);
const commonAncestor = p.openElements.getCommonAncestor(formattingElementEntry.element);
p.treeAdapter.detachNode(lastElement);
if (commonAncestor)
aaInsertLastNodeInCommonAncestor(p, commonAncestor, lastElement);
aaReplaceFormattingElement(p, furthestBlock, formattingElementEntry);
}
}
//Generic token handlers
//------------------------------------------------------------------
function appendComment(p, token) {
p._appendCommentNode(token, p.openElements.currentTmplContentOrNode);
}
function appendCommentToRootHtmlElement(p, token) {
p._appendCommentNode(token, p.openElements.items[0]);
}
function appendCommentToDocument(p, token) {
p._appendCommentNode(token, p.document);
}
function stopParsing(p, token) {
p.stopped = true;
// NOTE: Set end locations for elements that remain on the open element stack.
if (token.location) {
// NOTE: If we are not in a fragment, `html` and `body` will stay on the stack.
// This is a problem, as we might overwrite their end position here.
const target = p.fragmentContext ? 0 : 2;
for (let i = p.openElements.stackTop; i >= target; i--) {
p._setEndLocation(p.openElements.items[i], token);
}
// Handle `html` and `body`
if (!p.fragmentContext && p.openElements.stackTop >= 0) {
const htmlElement = p.openElements.items[0];
const htmlLocation = p.treeAdapter.getNodeSourceCodeLocation(htmlElement);
if (htmlLocation && !htmlLocation.endTag) {
p._setEndLocation(htmlElement, token);
if (p.openElements.stackTop >= 1) {
const bodyElement = p.openElements.items[1];
const bodyLocation = p.treeAdapter.getNodeSourceCodeLocation(bodyElement);
if (bodyLocation && !bodyLocation.endTag) {
p._setEndLocation(bodyElement, token);
}
}
}
}
}
}
// The "initial" insertion mode
//------------------------------------------------------------------
function doctypeInInitialMode(p, token) {
p._setDocumentType(token);
const mode = token.forceQuirks ? DOCUMENT_MODE.QUIRKS : doctype.getDocumentMode(token);
if (!doctype.isConforming(token)) {
p._err(token, ERR.nonConformingDoctype);
}
p.treeAdapter.setDocumentMode(p.document, mode);
p.insertionMode = InsertionMode.BEFORE_HTML;
}
function tokenInInitialMode(p, token) {
p._err(token, ERR.missingDoctype, true);
p.treeAdapter.setDocumentMode(p.document, DOCUMENT_MODE.QUIRKS);
p.insertionMode = InsertionMode.BEFORE_HTML;
p._processToken(token);
}
// The "before html" insertion mode
//------------------------------------------------------------------
function startTagBeforeHtml(p, token) {
if (to