happy-dom
Version:
Happy DOM is a JavaScript implementation of a web browser without its graphical user interface. It includes many web standards from WHATWG DOM and HTML.
771 lines • 40.4 kB
JavaScript
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const Document_js_1 = __importDefault(require("../nodes/document/Document.cjs"));
const PropertySymbol = __importStar(require("../PropertySymbol.cjs"));
const NamespaceURI_js_1 = __importDefault(require("../config/NamespaceURI.cjs"));
const HTMLElementConfig_js_1 = __importDefault(require("../config/HTMLElementConfig.cjs"));
const HTMLElementConfigContentModelEnum_js_1 = __importDefault(require("../config/HTMLElementConfigContentModelEnum.cjs"));
const SVGElementConfig_js_1 = __importDefault(require("../config/SVGElementConfig.cjs"));
const StringUtility_js_1 = __importDefault(require("../utilities/StringUtility.cjs"));
const XMLEncodeUtility_js_1 = __importDefault(require("../utilities/XMLEncodeUtility.cjs"));
const NodeTypeEnum_js_1 = __importDefault(require("../nodes/node/NodeTypeEnum.cjs"));
const NodeFactory_js_1 = __importDefault(require("../nodes/NodeFactory.cjs"));
/**
* Markup RegExp.
*
* Group 1: Beginning of start tag (e.g. "div" in "<div").
* Group 2: End tag (e.g. "div" in "</div>").
* Group 3: Comment start tag "<!--"
* Group 4: Comment end tag "-->"
* Group 5: Document type start tag "<!"
* Group 6: Processing instruction start tag "<?"
* Group 7: End of self closing start tag (e.g. "/>" in "<img/>").
* Group 8: End of start tag or comment tag (e.g. ">" in "<div>").
*/
const MARKUP_REGEXP = /<([^\s/!>?]+)|<\/([^\s/!>?]+)\s*>|(<!--)|(-->|--!>)|(<!)|(<\?)|(\/>)|(>)/gm;
/**
* Attribute RegExp.
*
* Group 1: Attribute name when the attribute has a value with no apostrophes (e.g. "name" in "<div name=value>").
* Group 2: Attribute value when the attribute has a value with no apostrophes (e.g. "value" in "<div name="value">").
* Group 3: Attribute name when the attribute has a value using double apostrophe (e.g. "name" in "<div name="value">").
* Group 4: Attribute value when the attribute has a value using double apostrophe (e.g. "value" in "<div name="value">").
* Group 5: Attribute end apostrophe when the attribute has a value using double apostrophe (e.g. '"' in "<div name="value">").
* Group 6: Attribute name when the attribute has a value using single apostrophe (e.g. "name" in "<div name='value'>").
* Group 7: Attribute value when the attribute has a value using single apostrophe (e.g. "value" in "<div name='value'>").
* Group 8: Attribute end apostrophe when the attribute has a value using single apostrophe (e.g. "'" in "<div name='value'>").
* Group 9: Attribute name when the attribute has no value (e.g. "disabled" in "<div disabled>").
*/
const ATTRIBUTE_REGEXP = /\s*([a-zA-Z0-9-_:.$@?\\<\[\]]+)\s*=\s*([^"'=<>\\`\s]+)|\s*([a-zA-Z0-9-_:.$@?\\<\[\]]+)\s*=\s*"([^"]*)("{0,1})|\s*([a-zA-Z0-9-_:.$@?\\<\[\]]+)\s*=\s*'([^']*)('{0,1})|\s*([a-zA-Z0-9-_:.$@?\\<\[\]]+)/gm;
/**
* Document type attribute RegExp.
*
* Group 1: Attribute value.
*/
const DOCUMENT_TYPE_ATTRIBUTE_REGEXP = /"([^"]+)"/gm;
/**
* Space RegExp.
*/
const SPACE_REGEXP = /\s+/;
/**
* Space in the beginning of string RegExp.
*/
const SPACE_IN_BEGINNING_REGEXP = /^\s+/;
/**
* Markup read state (which state the parser is in).
*/
var MarkupReadStateEnum;
(function (MarkupReadStateEnum) {
MarkupReadStateEnum["any"] = "any";
MarkupReadStateEnum["startTag"] = "startTag";
MarkupReadStateEnum["comment"] = "comment";
MarkupReadStateEnum["documentType"] = "documentType";
MarkupReadStateEnum["processingInstruction"] = "processingInstruction";
MarkupReadStateEnum["rawTextElement"] = "rawTextElement";
})(MarkupReadStateEnum || (MarkupReadStateEnum = {}));
/**
* How much of the HTML document that has been parsed (where the parser level is).
*/
var HTMLDocumentStructureLevelEnum;
(function (HTMLDocumentStructureLevelEnum) {
HTMLDocumentStructureLevelEnum[HTMLDocumentStructureLevelEnum["root"] = 0] = "root";
HTMLDocumentStructureLevelEnum[HTMLDocumentStructureLevelEnum["doctype"] = 1] = "doctype";
HTMLDocumentStructureLevelEnum[HTMLDocumentStructureLevelEnum["documentElement"] = 2] = "documentElement";
HTMLDocumentStructureLevelEnum[HTMLDocumentStructureLevelEnum["head"] = 3] = "head";
HTMLDocumentStructureLevelEnum[HTMLDocumentStructureLevelEnum["additionalHeadWithoutBody"] = 4] = "additionalHeadWithoutBody";
HTMLDocumentStructureLevelEnum[HTMLDocumentStructureLevelEnum["body"] = 5] = "body";
HTMLDocumentStructureLevelEnum[HTMLDocumentStructureLevelEnum["afterBody"] = 6] = "afterBody";
})(HTMLDocumentStructureLevelEnum || (HTMLDocumentStructureLevelEnum = {}));
/**
* HTML parser.
*/
class HTMLParser {
window;
evaluateScripts = false;
rootNode = null;
rootDocument = null;
nodeStack = [];
tagNameStack = [];
documentStructure = null;
startTagIndex = 0;
markupRegExp = null;
nextElement = null;
currentNode = null;
readState = MarkupReadStateEnum.any;
/**
* Constructor.
*
* @param window Window.
* @param [options] Options.
* @param [options.evaluateScripts] Set to "true" to enable script execution
*/
constructor(window, options) {
this.window = window;
if (options?.evaluateScripts) {
this.evaluateScripts = true;
}
}
/**
* Parses HTML a root element containing nodes found.
*
* @param html HTML string.
* @param [rootNode] Root node.
* @returns Root node.
*/
parse(html, rootNode) {
this.rootNode = rootNode || this.window.document.createDocumentFragment();
this.rootDocument = this.rootNode instanceof Document_js_1.default ? this.rootNode : this.window.document;
this.nodeStack = [this.rootNode];
this.tagNameStack = [null];
this.currentNode = this.rootNode;
this.readState = MarkupReadStateEnum.any;
this.documentStructure = null;
this.startTagIndex = 0;
this.markupRegExp = new RegExp(MARKUP_REGEXP, 'gm');
if (this.rootNode instanceof Document_js_1.default) {
const { doctype, documentElement, head, body } = this.rootNode;
if (!documentElement || !head || !body) {
throw new Error('Failed to parse HTML: The root node must have "documentElement", "head" and "body".\n\nWe should not end up here and it is therefore a bug in Happy DOM. Please report this issue.');
}
this.documentStructure = {
nodes: {
doctype: doctype || null,
documentElement,
head,
body
},
level: HTMLDocumentStructureLevelEnum.root
};
}
if (this.rootNode instanceof this.window.HTMLHtmlElement) {
const head = this.rootDocument.createElement('head');
const body = this.rootDocument.createElement('body');
while (this.rootNode[PropertySymbol.nodeArray].length > 0) {
this.rootNode[PropertySymbol.removeChild](this.rootNode[PropertySymbol.nodeArray][this.rootNode[PropertySymbol.nodeArray].length - 1]);
}
this.rootNode[PropertySymbol.appendChild](head);
this.rootNode[PropertySymbol.appendChild](body);
this.documentStructure = {
nodes: {
doctype: null,
documentElement: this.rootNode,
head,
body
},
level: HTMLDocumentStructureLevelEnum.documentElement
};
}
let match;
let lastIndex = 0;
html = String(html);
while ((match = this.markupRegExp.exec(html))) {
switch (this.readState) {
case MarkupReadStateEnum.any:
// Plain text between tags.
if (match.index !== lastIndex &&
(match[1] || match[2] || match[3] || match[4] || match[5] !== undefined || match[6])) {
this.parsePlainText(html.substring(lastIndex, match.index));
}
if (match[1]) {
// Start tag.
this.nextElement = this.getStartTagElement(match[1]);
this.startTagIndex = this.markupRegExp.lastIndex;
this.readState = MarkupReadStateEnum.startTag;
}
else if (match[2]) {
// End tag.
this.parseEndTag(match[2]);
}
else if (match[3]) {
// Comment.
this.startTagIndex = this.markupRegExp.lastIndex;
this.readState = MarkupReadStateEnum.comment;
}
else if (match[5] !== undefined) {
// Document type.
this.startTagIndex = this.markupRegExp.lastIndex;
this.readState = MarkupReadStateEnum.documentType;
}
else if (match[6]) {
// Processing instruction.
this.startTagIndex = this.markupRegExp.lastIndex;
this.readState = MarkupReadStateEnum.processingInstruction;
}
else {
// Plain text between tags, including the matched tag as it is not a valid start or end tag.
this.parsePlainText(html.substring(lastIndex, this.markupRegExp.lastIndex));
}
break;
case MarkupReadStateEnum.startTag:
// End of start tag
// match[2] is matching an end tag in case the start tag wasn't closed (e.g. "<div\n</ul>" instead of "<div>\n</ul>").
// match[7] is matching "/>" (e.g. "<img/>").
// match[8] is matching ">" (e.g. "<div>").
if (match[7] || match[8] || match[2]) {
if (this.nextElement) {
const attributeString = html.substring(this.startTagIndex, match[2] ? this.markupRegExp.lastIndex - 1 : match.index);
const isSelfClosed = !!match[7];
this.parseEndOfStartTag(attributeString, isSelfClosed);
}
else {
// If "nextElement" is set to null, the tag is not allowed (<html>, <head> and <body> are not allowed in an HTML fragment or to be nested).
this.readState = MarkupReadStateEnum.any;
}
}
break;
case MarkupReadStateEnum.comment:
// Comment end tag.
if (match[4]) {
this.parseComment(html.substring(this.startTagIndex, match.index));
}
break;
case MarkupReadStateEnum.documentType:
// Document type end tag.
if (match[7] || match[8]) {
this.parseDocumentType(html.substring(this.startTagIndex, match.index));
}
break;
case MarkupReadStateEnum.processingInstruction:
// Processing instruction end tag.
if (match[7] || match[8]) {
// Processing instructions are not supported in HTML and are rendered as comments.
this.parseComment('?' + html.substring(this.startTagIndex, match.index));
}
break;
case MarkupReadStateEnum.rawTextElement:
// End tag of raw text content.
// <script> and <style> elements are raw text elements.
if (match[2]) {
this.parseRawTextElementContent(match[2], html.substring(this.startTagIndex, match.index));
}
break;
}
lastIndex = this.markupRegExp.lastIndex;
}
// Plain text after tags.
if (lastIndex !== html.length && this.currentNode) {
this.parsePlainText(html.substring(lastIndex));
}
return this.rootNode;
}
/**
* Parses plain text.
*
* @param text Text.
*/
parsePlainText(text) {
if (this.documentStructure) {
const level = this.documentStructure.level;
const { documentElement, head, body } = this.documentStructure.nodes;
// We should remove space in beginning inside the document and the <html> tag.
const htmlText = (this.currentNode === this.rootNode || this.currentNode === documentElement) &&
level < HTMLDocumentStructureLevelEnum.head &&
body[PropertySymbol.elementArray].length === 0
? text.replace(SPACE_IN_BEGINNING_REGEXP, '')
: text;
if (htmlText) {
const textNode = this.rootDocument.createTextNode(XMLEncodeUtility_js_1.default.decodeHTMLEntities(htmlText));
if (this.currentNode === head &&
level === HTMLDocumentStructureLevelEnum.additionalHeadWithoutBody) {
documentElement[PropertySymbol.insertBefore](textNode, body, true);
}
else if (this.currentNode === this.rootNode ||
this.currentNode === documentElement ||
(this.currentNode === head && level >= HTMLDocumentStructureLevelEnum.body)) {
if (level === HTMLDocumentStructureLevelEnum.head) {
// Space between <head> and <body> is allowed
documentElement[PropertySymbol.insertBefore](textNode, body, true);
}
else if (body.lastChild?.[PropertySymbol.nodeType] === NodeTypeEnum_js_1.default.textNode) {
// If the last child of the body is a text node, we should append the text to it.
body.lastChild[PropertySymbol.data] += text;
}
else {
// Nodes outside <html>, <head> and <body> should be appended to the body
body[PropertySymbol.appendChild](textNode, true);
}
}
else {
this.currentNode[PropertySymbol.appendChild](textNode, true);
}
}
}
else {
const textNode = this.rootDocument.createTextNode(XMLEncodeUtility_js_1.default.decodeHTMLEntities(text));
this.currentNode[PropertySymbol.appendChild](textNode, true);
}
}
/**
* Parses end of start tag.
*
* @param attributeString Attribute string.
* @param isSelfClosed Is self closed.
*/
parseEndOfStartTag(attributeString, isSelfClosed) {
if (attributeString &&
(!this.documentStructure ||
this.nextElement !== this.documentStructure.nodes.head ||
this.documentStructure.level < HTMLDocumentStructureLevelEnum.body)) {
const attributeRegexp = new RegExp(ATTRIBUTE_REGEXP, 'gm');
let attributeMatch;
while ((attributeMatch = attributeRegexp.exec(attributeString))) {
if ((attributeMatch[1] && attributeMatch[2]) ||
(attributeMatch[3] && attributeMatch[5] === '"') ||
(attributeMatch[6] && attributeMatch[8] === "'") ||
attributeMatch[9]) {
// Valid attribute name and value.
const name = attributeMatch[1] || attributeMatch[3] || attributeMatch[6] || attributeMatch[9] || '';
const rawValue = attributeMatch[2] || attributeMatch[4] || attributeMatch[7] || '';
const value = rawValue ? XMLEncodeUtility_js_1.default.decodeHTMLAttributeValue(rawValue) : '';
const attributes = this.nextElement[PropertySymbol.attributes];
if (this.nextElement[PropertySymbol.namespaceURI] === NamespaceURI_js_1.default.svg) {
const nameParts = name.split(':');
let namespaceURI = null;
// In the SVG namespace, the attribute "xmlns" should be set to the "http://www.w3.org/2000/xmlns/" namespace and "xlink" to the "http://www.w3.org/1999/xlink" namespace.
switch (nameParts[0]) {
case 'xmlns':
namespaceURI =
!nameParts[1] || nameParts[1] === 'xlink' ? NamespaceURI_js_1.default.xmlns : null;
break;
case 'xlink':
namespaceURI = NamespaceURI_js_1.default.xlink;
break;
}
if (!attributes.getNamedItemNS(namespaceURI, nameParts[1] ?? name)) {
const attribute = NodeFactory_js_1.default.createNode(this.rootDocument, this.window.Attr);
attribute[PropertySymbol.namespaceURI] = namespaceURI;
attribute[PropertySymbol.name] = name;
attribute[PropertySymbol.localName] =
namespaceURI && nameParts[1] ? nameParts[1] : name;
attribute[PropertySymbol.prefix] = namespaceURI && nameParts[1] ? nameParts[0] : null;
attribute[PropertySymbol.value] = value;
attributes[PropertySymbol.setNamedItem](attribute);
}
}
else if (!attributes.getNamedItem(name)) {
const attributeItem = this.rootDocument.createAttribute(name);
attributeItem[PropertySymbol.value] = value;
attributes[PropertySymbol.setNamedItem](attributeItem);
}
this.startTagIndex += attributeMatch[0].length;
}
else if (!attributeMatch[1] &&
((attributeMatch[3] && !attributeMatch[5]) || (attributeMatch[6] && !attributeMatch[8]))) {
// End attribute apostrophe is missing (e.g. "attr='value" or 'attr="value').
// We should continue to the next end of start tag match.
return;
}
}
}
const tagName = this.nextElement[PropertySymbol.tagName];
const lowerTagName = tagName.toLowerCase();
const config = HTMLElementConfig_js_1.default[lowerTagName];
let previousCurrentNode = null;
while (previousCurrentNode !== this.rootNode) {
const parentLowerTagName = this.currentNode[PropertySymbol.tagName]?.toLowerCase();
const parentConfig = HTMLElementConfig_js_1.default[parentLowerTagName];
if (previousCurrentNode === this.currentNode) {
throw new Error('Failed to parse HTML: The parser is stuck in an infinite loop. Please report this issue.');
}
previousCurrentNode = this.currentNode;
// Some elements are not allowed to be nested (e.g. "<a><a></a></a>" is not allowed.).
// Therefore we need to auto-close tags with the same name matching the config, so that it become valid (e.g. "<a></a><a></a>").
if ((config?.contentModel === HTMLElementConfigContentModelEnum_js_1.default.noFirstLevelSelfDescendants &&
this.tagNameStack[this.tagNameStack.length - 1] === tagName) ||
parentConfig?.contentModel === HTMLElementConfigContentModelEnum_js_1.default.textOrComments ||
(parentConfig?.contentModel ===
HTMLElementConfigContentModelEnum_js_1.default.noForbiddenFirstLevelDescendants &&
parentConfig?.forbiddenDescendants?.includes(lowerTagName)) ||
(parentConfig?.contentModel === HTMLElementConfigContentModelEnum_js_1.default.permittedDescendants &&
!parentConfig?.permittedDescendants?.includes(lowerTagName) &&
(!config ||
!config.addPermittedParent ||
(HTMLElementConfig_js_1.default[config.addPermittedParent].permittedParents &&
!HTMLElementConfig_js_1.default[config.addPermittedParent].permittedParents.includes(parentLowerTagName)) ||
(HTMLElementConfig_js_1.default[config.addPermittedParent].permittedDescendants &&
!HTMLElementConfig_js_1.default[config.addPermittedParent].permittedDescendants.includes(lowerTagName))))) {
// We need to move forbidden elements inside <table> outside of the table if possible.
// E.g. "<table><div><tr><td></td></tr></div></table>"" should become "<div></div><table><tbody><tr><td></td></tr></tbody></table>" (<tbody> is added as <tr> has addPermittedParent as config).
if (parentConfig?.contentModel === HTMLElementConfigContentModelEnum_js_1.default.permittedDescendants &&
parentConfig.moveForbiddenDescendant &&
!parentConfig.moveForbiddenDescendant.exclude.includes(lowerTagName)) {
// We add the element before the first element that is not forbidden.
let before = this.currentNode;
while (before) {
if (!before.parentNode ||
!HTMLElementConfig_js_1.default[before.parentNode[PropertySymbol.localName]]
?.permittedDescendants ||
HTMLElementConfig_js_1.default[before.parentNode[PropertySymbol.localName]]?.permittedDescendants?.includes(lowerTagName)) {
break;
}
else {
before = before.parentNode;
}
}
if (before && before.parentNode) {
before.parentNode.insertBefore(this.nextElement, before);
}
else {
// If there is no element that is not forbidden, we append the element
before.appendChild(this.nextElement);
}
this.startTagIndex = this.markupRegExp.lastIndex;
this.readState = MarkupReadStateEnum.any;
return;
}
// This will close the current node
// E.g. "<a><a></a></a>" will become "<a></a><a></a>"
this.nodeStack.pop();
this.tagNameStack.pop();
this.currentNode = this.nodeStack[this.nodeStack.length - 1] || this.rootNode;
}
else if (config?.contentModel === HTMLElementConfigContentModelEnum_js_1.default.noSelfDescendants &&
this.tagNameStack.includes(tagName)) {
while (this.currentNode !== this.rootNode) {
if (this.currentNode[PropertySymbol.tagName] === tagName) {
this.nodeStack.pop();
this.tagNameStack.pop();
this.currentNode = this.nodeStack[this.nodeStack.length - 1] || this.rootNode;
break;
}
this.nodeStack.pop();
this.tagNameStack.pop();
this.currentNode = this.nodeStack[this.nodeStack.length - 1] || this.rootNode;
}
}
else if (config?.permittedParents &&
!config.permittedParents.includes(parentLowerTagName)) {
// <thead>, <tbody> and <tfoot> are only allowed as children of <table>.
// <tr> is only allowed as a child of <table>, <thead>, <tbody> and <tfoot>.
// <tbody> should be added automatically when <tr> is added directly to the table.
if (!config.addPermittedParent ||
(HTMLElementConfig_js_1.default[config.addPermittedParent].permittedParents &&
!HTMLElementConfig_js_1.default[config.addPermittedParent].permittedParents.includes(parentLowerTagName)) ||
(HTMLElementConfig_js_1.default[config.addPermittedParent].permittedDescendants &&
!HTMLElementConfig_js_1.default[config.addPermittedParent].permittedDescendants.includes(lowerTagName))) {
this.readState = MarkupReadStateEnum.any;
this.startTagIndex = this.markupRegExp.lastIndex;
return;
}
const permittedParent = this.rootDocument.createElement(config.addPermittedParent);
this.currentNode[PropertySymbol.appendChild](permittedParent, true);
this.nodeStack.push(permittedParent);
this.tagNameStack.push(permittedParent[PropertySymbol.tagName]);
this.currentNode = permittedParent;
}
else {
break;
}
}
// Appends the new element to its parent
if (this.documentStructure) {
const { documentElement, head, body } = this.documentStructure.nodes;
const level = this.documentStructure.level;
// Appends the new node to its parent and sets is as current node.
// Raw text elements (e.g. <script>) should be appended after the raw text has been added as content to the element.
// <html>, <head> and <body> are special elements with context constraints. They are already available in the document.
if ((!config || config.contentModel !== HTMLElementConfigContentModelEnum_js_1.default.rawText) &&
this.nextElement !== documentElement &&
this.nextElement !== head &&
this.nextElement !== body) {
// When parser mode is "htmlDocument", any element added directly to the document or document element should be added to the body.
if (documentElement &&
(this.currentNode === this.rootNode ||
this.currentNode === documentElement ||
(this.currentNode === head && level >= HTMLDocumentStructureLevelEnum.body))) {
if (level < HTMLDocumentStructureLevelEnum.body) {
this.documentStructure.level = HTMLDocumentStructureLevelEnum.afterBody;
}
body[PropertySymbol.appendChild](this.nextElement, true);
}
else {
this.currentNode[PropertySymbol.appendChild](this.nextElement, true);
}
}
}
else {
this.currentNode[PropertySymbol.appendChild](this.nextElement, true);
}
// Sets the new element as the current node.
if (!this.documentStructure ||
this.nextElement !== this.documentStructure.nodes.body ||
this.documentStructure.level <= HTMLDocumentStructureLevelEnum.body) {
this.currentNode = this.nextElement;
this.nodeStack.push(this.currentNode);
this.tagNameStack.push(tagName);
if (this.documentStructure && this.nextElement === this.documentStructure.nodes.body) {
this.documentStructure.level = HTMLDocumentStructureLevelEnum.afterBody;
}
// Check if the tag is a void element and should be closed immediately.
// Elements in the SVG namespace can be self-closed (e.g. "/>").
// "/>" will be ignored in the HTML namespace.
if (config?.contentModel === HTMLElementConfigContentModelEnum_js_1.default.noDescendants ||
(isSelfClosed &&
this.currentNode[PropertySymbol.namespaceURI] === NamespaceURI_js_1.default.svg)) {
this.nodeStack.pop();
this.tagNameStack.pop();
this.currentNode = this.nodeStack[this.nodeStack.length - 1] || this.rootNode;
this.readState = MarkupReadStateEnum.any;
}
else {
// We will set the read state to "rawText" for raw text elements such as <script> and <style> elements.
this.readState =
config?.contentModel === HTMLElementConfigContentModelEnum_js_1.default.rawText
? MarkupReadStateEnum.rawTextElement
: MarkupReadStateEnum.any;
}
}
else {
this.readState = MarkupReadStateEnum.any;
}
this.startTagIndex = this.markupRegExp.lastIndex;
}
/**
* Parses end tag.
*
* @param tagName Tag name.
*/
parseEndTag(tagName) {
// SVG elements are case-sensitive.
const name = this.currentNode[PropertySymbol.namespaceURI] === NamespaceURI_js_1.default.html
? StringUtility_js_1.default.asciiUpperCase(tagName)
: SVGElementConfig_js_1.default[StringUtility_js_1.default.asciiLowerCase(tagName)]?.localName || tagName;
const index = this.tagNameStack.lastIndexOf(name);
// We close all tags up until the first tag that matches the end tag.
if (index !== -1) {
this.nodeStack.splice(index, this.nodeStack.length - index);
this.tagNameStack.splice(index, this.tagNameStack.length - index);
this.currentNode = this.nodeStack[this.nodeStack.length - 1] || this.rootNode;
}
}
/**
* Parses comment.
*
* @param comment Comment.
*/
parseComment(comment) {
const commentNode = this.rootDocument.createComment(XMLEncodeUtility_js_1.default.decodeHTMLEntities(comment));
if (this.documentStructure) {
const level = this.documentStructure.level;
const { documentElement, head, body } = this.documentStructure.nodes;
// We need to add the comment node to the correct position in the document.
let beforeNode = null;
if (this.currentNode === this.rootNode && level === HTMLDocumentStructureLevelEnum.root) {
beforeNode = documentElement;
}
else if (this.currentNode === documentElement &&
level === HTMLDocumentStructureLevelEnum.documentElement) {
beforeNode = head;
}
else if (this.currentNode === documentElement &&
level === HTMLDocumentStructureLevelEnum.head) {
beforeNode = body;
}
this.currentNode[PropertySymbol.insertBefore](commentNode, beforeNode, true);
}
else {
this.currentNode[PropertySymbol.appendChild](commentNode, true);
}
this.readState = MarkupReadStateEnum.any;
}
/**
* Parses document type.
*
* @param text Text.
*/
parseDocumentType(text) {
const decodedText = XMLEncodeUtility_js_1.default.decodeHTMLEntities(text);
if (this.documentStructure) {
let { doctype } = this.documentStructure.nodes;
const documentType = this.getDocumentType(decodedText);
// Document type nodes are only allowed at the beginning of the document.
if (documentType) {
if (this.currentNode === this.rootNode &&
this.documentStructure.level === HTMLDocumentStructureLevelEnum.root) {
if (doctype) {
doctype[PropertySymbol.name] = documentType.name;
doctype[PropertySymbol.publicId] = documentType.publicId;
doctype[PropertySymbol.systemId] = documentType.systemId;
}
else {
doctype = this.rootNode.implementation.createDocumentType(documentType.name, documentType.publicId, documentType.systemId);
this.rootNode.insertBefore(doctype, this.rootNode.documentElement);
}
this.documentStructure.level = HTMLDocumentStructureLevelEnum.doctype;
}
}
else {
this.parseComment(decodedText);
}
}
else {
// Document type nodes are only allowed at the beginning of the document.
if (!this.getDocumentType(decodedText)) {
this.parseComment(decodedText);
}
}
this.readState = MarkupReadStateEnum.any;
}
/**
* Parses raw text content for elements such as <script> and <style>.
*
* @param tagName End tag name.
* @param text Text.
*/
parseRawTextElementContent(tagName, text) {
const upperTagName = StringUtility_js_1.default.asciiUpperCase(tagName);
if (upperTagName !== this.currentNode[PropertySymbol.tagName]) {
return;
}
// Scripts are not allowed to be executed when they are parsed using innerHTML, outerHTML, replaceWith() etc.
// However, they are allowed to be executed when document.write() is used.
// See: https://developer.mozilla.org/en-US/docs/Web/API/HTMLScriptElement
if (upperTagName === 'SCRIPT') {
this.currentNode[PropertySymbol.evaluateScript] = this.evaluateScripts;
}
else if (upperTagName === 'LINK') {
// An assumption that the same rule should be applied for the HTMLLinkElement is made here.
this.currentNode[PropertySymbol.evaluateCSS] = this.evaluateScripts;
}
// Plain text elements such as <script> and <style> should only contain text.
// Plain text elements should not decode entities. See #1564.
this.currentNode[PropertySymbol.appendChild](this.rootDocument.createTextNode(text), true);
const rawTextElement = this.currentNode;
this.nodeStack.pop();
this.tagNameStack.pop();
this.currentNode = this.nodeStack[this.nodeStack.length - 1] || this.rootNode;
this.readState = MarkupReadStateEnum.any;
// Appends the raw text element to its parent.
if (this.documentStructure) {
const { documentElement, body } = this.documentStructure.nodes;
// When parser mode is "htmlDocument", any element added directly to the document or document element should be added to the body.
if (documentElement &&
(this.currentNode === this.rootNode || this.currentNode === documentElement)) {
body[PropertySymbol.appendChild](rawTextElement, true);
}
else {
this.currentNode[PropertySymbol.appendChild](rawTextElement, true);
}
}
else {
this.currentNode[PropertySymbol.appendChild](rawTextElement, true);
}
}
/**
* Creates an element or returns a reference to it.
*
* @param tagName Tag name.
*/
getStartTagElement(tagName) {
const lowerTagName = StringUtility_js_1.default.asciiLowerCase(tagName);
// NamespaceURI is inherited from the parent element.
const namespaceURI = this.currentNode[PropertySymbol.namespaceURI];
// NamespaceURI should be SVG when the tag name is "svg" (even in XML mode).
if (lowerTagName === 'svg') {
return this.rootDocument.createElementNS(NamespaceURI_js_1.default.svg, 'svg');
}
if (namespaceURI === NamespaceURI_js_1.default.svg) {
return this.rootDocument.createElementNS(NamespaceURI_js_1.default.svg, SVGElementConfig_js_1.default[lowerTagName]?.localName || tagName);
}
// New element.
switch (lowerTagName) {
case 'html':
if (!this.documentStructure) {
return null;
}
if (this.documentStructure.level < HTMLDocumentStructureLevelEnum.documentElement) {
this.documentStructure.level = HTMLDocumentStructureLevelEnum.documentElement;
}
return this.documentStructure.nodes.documentElement ?? null;
case 'head':
if (!this.documentStructure) {
return null;
}
if (this.documentStructure.level < HTMLDocumentStructureLevelEnum.head) {
this.documentStructure.level = HTMLDocumentStructureLevelEnum.head;
}
else if (this.documentStructure.level === HTMLDocumentStructureLevelEnum.head) {
this.documentStructure.level = HTMLDocumentStructureLevelEnum.additionalHeadWithoutBody;
}
return this.documentStructure.nodes.head ?? null;
case 'body':
if (!this.documentStructure) {
return null;
}
if (this.documentStructure.level < HTMLDocumentStructureLevelEnum.body) {
this.documentStructure.level = HTMLDocumentStructureLevelEnum.body;
}
return this.documentStructure.nodes.body ?? null;
default:
return this.rootDocument.createElementNS(NamespaceURI_js_1.default.html, lowerTagName);
}
}
/**
* Returns document type.
*
* @param value Value.
* @returns Document type.
*/
getDocumentType(value) {
if (!value.toUpperCase().startsWith('DOCTYPE')) {
return null;
}
const docTypeSplit = value.split(SPACE_REGEXP);
if (docTypeSplit.length <= 1) {
return null;
}
const docTypeString = docTypeSplit.slice(1).join(' ');
const attributes = [];
const attributeRegExp = new RegExp(DOCUMENT_TYPE_ATTRIBUTE_REGEXP, 'gm');
const isPublic = docTypeString.toUpperCase().includes('PUBLIC');
let attributeMatch;
while ((attributeMatch = attributeRegExp.exec(docTypeString))) {
attributes.push(attributeMatch[1]);
}
const publicId = isPublic ? attributes[0] || '' : '';
const systemId = isPublic ? attributes[1] || '' : attributes[0] || '';
return {
name: docTypeSplit[1].toLowerCase(),
publicId,
systemId
};
}
}
exports.default = HTMLParser;
//# sourceMappingURL=HTMLParser.cjs.map
;