html-dom-parser
Version:
HTML to DOM parser.
140 lines • 5.17 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.default = domparser;
var utilities_1 = require("./utilities");
// constants
var HTML = 'html';
var HEAD = 'head';
var BODY = 'body';
var FIRST_TAG_REGEX = /<([a-zA-Z]+[0-9]?)/; // e.g., <h1>
// match-all-characters in case of newlines (DOTALL)
var HEAD_TAG_REGEX = /<head[^]*>/i;
var BODY_TAG_REGEX = /<body[^]*>/i;
// falls back to `parseFromString` if `createHTMLDocument` cannot be used
// eslint-disable-next-line @typescript-eslint/no-unused-vars
var parseFromDocument = function (html, tagName) {
/* istanbul ignore next */
throw new Error('This browser does not support `document.implementation.createHTMLDocument`');
};
// eslint-disable-next-line @typescript-eslint/no-unused-vars
var parseFromString = function (html, tagName) {
/* istanbul ignore next */
throw new Error('This browser does not support `DOMParser.prototype.parseFromString`');
};
var DOMParser = typeof window === 'object' && window.DOMParser;
/**
* DOMParser (performance: slow).
*
* @see https://developer.mozilla.org/docs/Web/API/DOMParser#Parsing_an_SVG_or_HTML_document
*/
if (typeof DOMParser === 'function') {
var domParser_1 = new DOMParser();
var mimeType_1 = 'text/html';
/**
* Creates an HTML document using `DOMParser.parseFromString`.
*
* @param html - The HTML string.
* @param tagName - The element to render the HTML (with 'body' as fallback).
* @returns - Document.
*/
parseFromString = function (html, tagName) {
if (tagName) {
/* istanbul ignore next */
html = "<".concat(tagName, ">").concat(html, "</").concat(tagName, ">");
}
return domParser_1.parseFromString(html, mimeType_1);
};
parseFromDocument = parseFromString;
}
/**
* DOMImplementation (performance: fair).
*
* @see https://developer.mozilla.org/docs/Web/API/DOMImplementation/createHTMLDocument
*/
if (typeof document === 'object' && document.implementation) {
var htmlDocument_1 = document.implementation.createHTMLDocument();
/**
* Use HTML document created by `document.implementation.createHTMLDocument`.
*
* @param html - The HTML string.
* @param tagName - The element to render the HTML (with 'body' as fallback).
* @returns - Document
*/
parseFromDocument = function (html, tagName) {
if (tagName) {
var element = htmlDocument_1.documentElement.querySelector(tagName);
if (element) {
element.innerHTML = html;
}
return htmlDocument_1;
}
htmlDocument_1.documentElement.innerHTML = html;
return htmlDocument_1;
};
}
/**
* Template (performance: fast).
*
* @see https://developer.mozilla.org/docs/Web/HTML/Element/template
*/
var template = typeof document === 'object' && document.createElement('template');
var parseFromTemplate;
if (template && template.content) {
/**
* Uses a template element (content fragment) to parse HTML.
*
* @param html - HTML string.
* @returns - Nodes.
*/
parseFromTemplate = function (html) {
template.innerHTML = html;
return template.content.childNodes;
};
}
/**
* Parses HTML string to DOM nodes.
*
* @param html - HTML markup.
* @returns - DOM nodes.
*/
function domparser(html) {
var _a, _b;
// Escape special characters before parsing
html = (0, utilities_1.escapeSpecialCharacters)(html);
var match = html.match(FIRST_TAG_REGEX);
var firstTagName = match && match[1] ? match[1].toLowerCase() : '';
switch (firstTagName) {
case HTML: {
var doc = parseFromString(html);
// the created document may come with filler head/body elements,
// so make sure to remove them if they don't actually exist
if (!HEAD_TAG_REGEX.test(html)) {
var element = doc.querySelector(HEAD);
(_a = element === null || element === void 0 ? void 0 : element.parentNode) === null || _a === void 0 ? void 0 : _a.removeChild(element);
}
if (!BODY_TAG_REGEX.test(html)) {
var element = doc.querySelector(BODY);
(_b = element === null || element === void 0 ? void 0 : element.parentNode) === null || _b === void 0 ? void 0 : _b.removeChild(element);
}
return doc.querySelectorAll(HTML);
}
case HEAD:
case BODY: {
var elements = parseFromDocument(html).querySelectorAll(firstTagName);
// if there's a sibling element, then return both elements
if (BODY_TAG_REGEX.test(html) && HEAD_TAG_REGEX.test(html)) {
return elements[0].parentNode.childNodes;
}
return elements;
}
// low-level tag or text
default: {
if (parseFromTemplate) {
return parseFromTemplate(html);
}
var element = parseFromDocument(html, BODY).querySelector(BODY);
return element.childNodes;
}
}
}
//# sourceMappingURL=domparser.js.map