html-dom-parser
Version:
HTML to DOM parser.
143 lines (140 loc) • 5.56 kB
JavaScript
import { escapeSpecialCharacters, hasOpenTag } from './utilities.mjs';
// constants
var HTML = 'html';
var HEAD = 'head';
var BODY = 'body';
var FIRST_TAG_REGEX = /<([a-zA-Z]+[0-9]?)/; // e.g., <h1>
// falls back to `parseFromString` if `createHTMLDocument` cannot be used
/* eslint-disable @typescript-eslint/no-unused-vars */
/* v8 ignore start */
var parseFromDocument = function (html, tagName) {
throw new Error('This browser does not support `document.implementation.createHTMLDocument`');
};
var parseFromString = function (html, tagName) {
throw new Error('This browser does not support `DOMParser.prototype.parseFromString`');
};
var DOMParser = typeof window === 'object' && window.DOMParser;
/**
* DOMParser (performance: slow).
*
* @see https://developer.mozilla.org/docs/Web/API/DOMParser#Parsing_an_SVG_or_HTML_document
*/
if (typeof DOMParser === 'function') {
var domParser_1 = new DOMParser();
var mimeType_1 = 'text/html';
/**
* Creates an HTML document using `DOMParser.parseFromString`.
*
* @param html - The HTML string.
* @param tagName - The element to render the HTML (with 'body' as fallback).
* @returns - Document.
*/
parseFromString = function (html, tagName) {
if (tagName) {
html = "<".concat(tagName, ">").concat(html, "</").concat(tagName, ">");
}
return domParser_1.parseFromString(html, mimeType_1);
};
parseFromDocument = parseFromString;
}
/**
* DOMImplementation (performance: fair).
*
* @see https://developer.mozilla.org/docs/Web/API/DOMImplementation/createHTMLDocument
*/
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
if (typeof document === 'object' && document.implementation) {
var htmlDocument_1 = document.implementation.createHTMLDocument();
/**
* Use HTML document created by `document.implementation.createHTMLDocument`.
*
* @param html - The HTML string.
* @param tagName - The element to render the HTML (with 'body' as fallback).
* @returns - Document
*/
parseFromDocument = function (html, tagName) {
if (tagName) {
var element = htmlDocument_1.documentElement.querySelector(tagName);
if (element) {
element.innerHTML = html;
}
return htmlDocument_1;
}
htmlDocument_1.documentElement.innerHTML = html;
return htmlDocument_1;
};
}
/**
* Template (performance: fast).
*
* @see https://developer.mozilla.org/docs/Web/HTML/Element/template
*/
var template = typeof document === 'object' && document.createElement('template');
var parseFromTemplate;
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
if (template && template.content) {
/**
* Uses a template element (content fragment) to parse HTML.
*
* @param html - HTML string.
* @returns - Nodes.
*/
parseFromTemplate = function (html) {
template.innerHTML = html;
return template.content.childNodes;
};
}
var createNodeList = function () { return document.createDocumentFragment().childNodes; };
/* v8 ignore stop */
/**
* Parses HTML string to DOM nodes.
*
* @param html - HTML markup.
* @returns - DOM nodes.
*/
function domparser(html) {
var _a, _b, _c, _d, _e, _f;
// Escape special characters before parsing
html = escapeSpecialCharacters(html);
var match = FIRST_TAG_REGEX.exec(html);
var firstTagName = (_a = match === null || match === void 0 ? void 0 : match[1]) === null || _a === void 0 ? void 0 : _a.toLowerCase();
switch (firstTagName) {
case HTML: {
var doc = parseFromString(html);
// the created document may come with filler head/body elements,
// so make sure to remove them if they don't actually exist
if (!hasOpenTag(html, HEAD)) {
var element = doc.querySelector(HEAD);
(_b = element === null || element === void 0 ? void 0 : element.parentNode) === null || _b === void 0 ? void 0 : _b.removeChild(element);
}
if (!hasOpenTag(html, BODY)) {
var element = doc.querySelector(BODY);
(_c = element === null || element === void 0 ? void 0 : element.parentNode) === null || _c === void 0 ? void 0 : _c.removeChild(element);
}
return doc.querySelectorAll(HTML);
}
case HEAD:
case BODY: {
var elements = parseFromDocument(html).querySelectorAll(firstTagName);
// if there's a sibling element, then return both elements
/* v8 ignore next */
if (hasOpenTag(html, BODY) && hasOpenTag(html, HEAD)) {
return (_e = (_d = elements[0].parentNode) === null || _d === void 0 ? void 0 : _d.childNodes) !== null && _e !== void 0 ? _e : createNodeList();
}
return elements;
}
// low-level tag or text
/* v8 ignore start */
default: {
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
if (parseFromTemplate) {
return parseFromTemplate(html);
}
var element = parseFromDocument(html, BODY).querySelector(BODY);
return (_f = element === null || element === void 0 ? void 0 : element.childNodes) !== null && _f !== void 0 ? _f : createNodeList();
}
/* v8 ignore stop */
}
}
export { domparser as default };
//# sourceMappingURL=domparser.mjs.map