UNPKG

html-dom-parser

Version:
140 lines 5.17 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.default = domparser; var utilities_1 = require("./utilities"); // constants var HTML = 'html'; var HEAD = 'head'; var BODY = 'body'; var FIRST_TAG_REGEX = /<([a-zA-Z]+[0-9]?)/; // e.g., <h1> // match-all-characters in case of newlines (DOTALL) var HEAD_TAG_REGEX = /<head[^]*>/i; var BODY_TAG_REGEX = /<body[^]*>/i; // falls back to `parseFromString` if `createHTMLDocument` cannot be used // eslint-disable-next-line @typescript-eslint/no-unused-vars var parseFromDocument = function (html, tagName) { /* istanbul ignore next */ throw new Error('This browser does not support `document.implementation.createHTMLDocument`'); }; // eslint-disable-next-line @typescript-eslint/no-unused-vars var parseFromString = function (html, tagName) { /* istanbul ignore next */ throw new Error('This browser does not support `DOMParser.prototype.parseFromString`'); }; var DOMParser = typeof window === 'object' && window.DOMParser; /** * DOMParser (performance: slow). * * @see https://developer.mozilla.org/docs/Web/API/DOMParser#Parsing_an_SVG_or_HTML_document */ if (typeof DOMParser === 'function') { var domParser_1 = new DOMParser(); var mimeType_1 = 'text/html'; /** * Creates an HTML document using `DOMParser.parseFromString`. * * @param html - The HTML string. * @param tagName - The element to render the HTML (with 'body' as fallback). * @returns - Document. */ parseFromString = function (html, tagName) { if (tagName) { /* istanbul ignore next */ html = "<".concat(tagName, ">").concat(html, "</").concat(tagName, ">"); } return domParser_1.parseFromString(html, mimeType_1); }; parseFromDocument = parseFromString; } /** * DOMImplementation (performance: fair). * * @see https://developer.mozilla.org/docs/Web/API/DOMImplementation/createHTMLDocument */ if (typeof document === 'object' && document.implementation) { var htmlDocument_1 = document.implementation.createHTMLDocument(); /** * Use HTML document created by `document.implementation.createHTMLDocument`. * * @param html - The HTML string. * @param tagName - The element to render the HTML (with 'body' as fallback). * @returns - Document */ parseFromDocument = function (html, tagName) { if (tagName) { var element = htmlDocument_1.documentElement.querySelector(tagName); if (element) { element.innerHTML = html; } return htmlDocument_1; } htmlDocument_1.documentElement.innerHTML = html; return htmlDocument_1; }; } /** * Template (performance: fast). * * @see https://developer.mozilla.org/docs/Web/HTML/Element/template */ var template = typeof document === 'object' && document.createElement('template'); var parseFromTemplate; if (template && template.content) { /** * Uses a template element (content fragment) to parse HTML. * * @param html - HTML string. * @returns - Nodes. */ parseFromTemplate = function (html) { template.innerHTML = html; return template.content.childNodes; }; } /** * Parses HTML string to DOM nodes. * * @param html - HTML markup. * @returns - DOM nodes. */ function domparser(html) { var _a, _b; // Escape special characters before parsing html = (0, utilities_1.escapeSpecialCharacters)(html); var match = html.match(FIRST_TAG_REGEX); var firstTagName = match && match[1] ? match[1].toLowerCase() : ''; switch (firstTagName) { case HTML: { var doc = parseFromString(html); // the created document may come with filler head/body elements, // so make sure to remove them if they don't actually exist if (!HEAD_TAG_REGEX.test(html)) { var element = doc.querySelector(HEAD); (_a = element === null || element === void 0 ? void 0 : element.parentNode) === null || _a === void 0 ? void 0 : _a.removeChild(element); } if (!BODY_TAG_REGEX.test(html)) { var element = doc.querySelector(BODY); (_b = element === null || element === void 0 ? void 0 : element.parentNode) === null || _b === void 0 ? void 0 : _b.removeChild(element); } return doc.querySelectorAll(HTML); } case HEAD: case BODY: { var elements = parseFromDocument(html).querySelectorAll(firstTagName); // if there's a sibling element, then return both elements if (BODY_TAG_REGEX.test(html) && HEAD_TAG_REGEX.test(html)) { return elements[0].parentNode.childNodes; } return elements; } // low-level tag or text default: { if (parseFromTemplate) { return parseFromTemplate(html); } var element = parseFromDocument(html, BODY).querySelector(BODY); return element.childNodes; } } } //# sourceMappingURL=domparser.js.map