UNPKG

jsdom-se

Version:

jsdom fork for silent errors - A JavaScript implementation of the DOM and HTML standards

300 lines (261 loc) 9.43 kB
"use strict"; const parse5 = require("parse5"); const sax = require("sax"); const attributes = require("../living/attributes"); const createDocumentTypeInternal = require("../living/document-type").create; const locationInfo = require("../living/helpers/internal-constants").locationInfo; class HtmlToDom { constructor(core, parser, parsingMode) { if (!parser) { if (parsingMode === "xml") { parser = sax; } else { parser = parse5; } } this.core = core; this.parser = parser; this.parsingMode = parsingMode; if (parser.DefaultHandler) { this.parserType = "htmlparser2"; } else if (parser.Parser && parser.TreeAdapters) { this.parserType = "parse5v1"; } else if (parser.moduleName === "HTML5") { this.parserType = "html5"; } else if (parser.parser) { this.parserType = "sax"; } } appendHtmlToElement(html, element) { if (typeof html !== "string") { html = String(html); } return this["_parseWith" + this.parserType](html, true, element); } appendHtmlToDocument(html, element) { if (typeof html !== "string") { html = String(html); } return this["_parseWith" + this.parserType](html, false, element); } _parseWithhtmlparser2(html, fragment, element) { const handler = new this.parser.DefaultHandler(); // Check if document is XML const isXML = this.parsingMode === "xml"; const parserInstance = new this.parser.Parser(handler, { xmlMode: isXML, lowerCaseTags: !isXML, lowerCaseAttributeNames: !isXML, decodeEntities: true }); parserInstance.includeLocation = false; parserInstance.parseComplete(html); const parsed = handler.dom; for (let i = 0; i < parsed.length; i++) { setChild(this.core, element, parsed[i]); } return element; } _parseWithparse5v1(html, fragment, element) { if (this.parsingMode === "xml") { throw new Error("Can't parse XML with parse5, please use htmlparser2 instead."); } const htmlparser2Adapter = this.parser.TreeAdapters.htmlparser2; let dom; if (fragment) { const instance = new this.parser.Parser(htmlparser2Adapter); const parentElement = htmlparser2Adapter.createElement(element.tagName.toLowerCase(), element.namespaceURI, []); dom = instance.parseFragment(html, parentElement); } else { const instance = new this.parser.Parser(htmlparser2Adapter, { locationInfo: true }); dom = instance.parse(html); } const parsed = dom.children; for (let i = 0; i < parsed.length; i++) { setChild(this.core, element, parsed[i]); } return element; } _parseWithhtml5(html, fragment, element) { if (element.nodeType === 9) { new this.parser.Parser({ document: element }).parse(html); } else { const p = new this.parser.Parser({ document: element.ownerDocument }); p.parse_fragment(html, element); } } _parseWithsax(html, fragment, element) { const SaxParser = this.parser.parser; const parser = new SaxParser(false, { xmlns: true }); parser.looseCase = "toString"; const openStack = [element]; parser.ontext = text => { setChild(this.core, openStack[openStack.length - 1], { type: "text", data: text }); }; parser.onopentag = arg => { const attrValues = {}; const attrPrefixes = {}; const attrNamespaces = {}; Object.keys(arg.attributes).forEach(key => { const localName = arg.attributes[key].local; attrValues[localName] = arg.attributes[key].value; attrPrefixes[localName] = arg.attributes[key].prefix || null; attrNamespaces[localName] = arg.attributes[key].uri || null; }); if (arg.local === "script" && arg.uri === "http://www.w3.org/1999/xhtml") { openStack.push({ type: "tag", name: arg.local, prefix: arg.prefix, namespace: arg.uri, attribs: attrValues, "x-attribsPrefix": attrPrefixes, "x-attribsNamespace": attrNamespaces }); } else { const elem = setChild(this.core, openStack[openStack.length - 1], { type: "tag", name: arg.local, prefix: arg.prefix, namespace: arg.uri, attribs: attrValues, "x-attribsPrefix": attrPrefixes, "x-attribsNamespace": attrNamespaces }); openStack.push(elem); } }; parser.onclosetag = () => { const elem = openStack.pop(); if (elem.constructor.name === "Object") { // we have an empty script tag setChild(this.core, openStack[openStack.length - 1], elem); } }; parser.onscript = scriptText => { const tag = openStack.pop(); tag.children = [{ type: "text", data: scriptText }]; const elem = setChild(this.core, openStack[openStack.length - 1], tag); openStack.push(elem); }; parser.oncomment = comment => { setChild(this.core, openStack[openStack.length - 1], { type: "comment", data: comment }); }; parser.onprocessinginstruction = pi => { setChild(this.core, openStack[openStack.length - 1], { type: "directive", name: "?" + pi.name, data: "?" + pi.name + " " + pi.body + "?" }); }; parser.ondoctype = dt => { setChild(this.core, openStack[openStack.length - 1], { type: "directive", name: "!doctype", data: "!doctype " + dt }); }; parser.write(html).close(); } } // utility function for forgiving parser function setChild(core, parent, node) { const currentDocument = parent._ownerDocument || parent; let newNode; let isTemplateContents = false; switch (node.type) { case "tag": case "script": case "style": newNode = currentDocument._createElementWithCorrectElementInterface(node.name, node.namespace); newNode._localName = node.name; newNode._prefix = node.prefix || null; newNode._namespaceURI = node.namespace || null; break; case "root": // If we are in <template> then add all children to the parent's _templateContents; skip this virtual root node. if (parent.tagName === "TEMPLATE" && parent._namespaceURI === "http://www.w3.org/1999/xhtml") { newNode = parent._templateContents; isTemplateContents = true; } break; case "text": // HTML entities should already be decoded by the parser, so no need to decode them newNode = currentDocument.createTextNode(node.data); break; case "comment": newNode = currentDocument.createComment(node.data); break; case "directive": if (node.name[0] === "?" && node.name.toLowerCase() !== "?xml") { const data = node.data.slice(node.name.length + 1, -1); newNode = currentDocument.createProcessingInstruction(node.name.substring(1), data); } else if (node.name.toLowerCase() === "!doctype") { if (node["x-name"] !== undefined) { // parse5 supports doctypes directly newNode = createDocumentTypeInternal(core, currentDocument, node["x-name"] || "", node["x-publicId"] || "", node["x-systemId"] || ""); } else { newNode = parseDocType(core, currentDocument, "<" + node.data + ">"); } } break; } if (!newNode) { return null; } newNode[locationInfo] = node.__location; if (node.attribs) { for (let localName in node.attribs) { const value = node.attribs[localName]; let prefix = node["x-attribsPrefix"] && node["x-attribsPrefix"][localName] || null; const namespace = node["x-attribsNamespace"] && node["x-attribsNamespace"][localName] || null; if (prefix === "xmlns" && localName === "") { // intended weirdness in node-sax, see https://github.com/isaacs/sax-js/issues/165 localName = prefix; prefix = null; } attributes.setAttributeValue(newNode, localName, value, prefix, namespace); } } if (node.children) { for (let c = 0; c < node.children.length; c++) { setChild(core, newNode, node.children[c]); } } if (!isTemplateContents) { if (parent._templateContents) { // Setting innerHTML on a <template> parent._templateContents.appendChild(newNode); } else { parent.appendChild(newNode); } } return newNode; } const HTML5_DOCTYPE = /<!doctype html>/i; const PUBLIC_DOCTYPE = /<!doctype\s+([^\s]+)\s+public\s+"([^"]+)"\s+"([^"]+)"/i; const SYSTEM_DOCTYPE = /<!doctype\s+([^\s]+)\s+system\s+"([^"]+)"/i; function parseDocType(core, doc, html) { if (HTML5_DOCTYPE.test(html)) { return createDocumentTypeInternal(core, doc, "html", "", ""); } const publicPieces = PUBLIC_DOCTYPE.exec(html); if (publicPieces) { return createDocumentTypeInternal(core, doc, publicPieces[1], publicPieces[2], publicPieces[3]); } const systemPieces = SYSTEM_DOCTYPE.exec(html); if (systemPieces) { return createDocumentTypeInternal(core, doc, systemPieces[1], "", systemPieces[2]); } // Shouldn't get here (the parser shouldn't let us know about invalid doctypes), but our logic likely isn't // real-world perfect, so let's fallback. return createDocumentTypeInternal(core, doc, "html", "", ""); } exports.HtmlToDom = HtmlToDom;