UNPKG

html-dom-parser

Version:
131 lines (130 loc) 4.12 kB
const require_constants = require("./constants.js"); let domhandler = require("domhandler"); //#region src/client/utilities.ts const CARRIAGE_RETURN = "\r"; const CARRIAGE_RETURN_REGEX = new RegExp(CARRIAGE_RETURN, "g"); const CARRIAGE_RETURN_PLACEHOLDER = `__HTML_DOM_PARSER_CARRIAGE_RETURN_PLACEHOLDER_${Date.now().toString()}__`; const CARRIAGE_RETURN_PLACEHOLDER_REGEX = new RegExp(CARRIAGE_RETURN_PLACEHOLDER, "g"); /** * Gets case-sensitive tag name. * * @param tagName - Tag name in lowercase. * @returns - Case-sensitive tag name. */ function getCaseSensitiveTagName(tagName) { return require_constants.CASE_SENSITIVE_TAG_NAMES_MAP[tagName]; } /** * Formats DOM attributes to a hash map. * * @param attributes - List of attributes. * @returns - Map of attribute name to value. */ function formatAttributes(attributes) { const map = {}; let index = 0; const attributesLength = attributes.length; for (; index < attributesLength; index++) { const attribute = attributes[index]; map[attribute.name] = attribute.value; } return map; } /** * Corrects the tag name if it is case-sensitive (SVG). * Otherwise, returns the lowercase tag name (HTML). * * @param tagName - Lowercase tag name. * @returns - Formatted tag name. */ function formatTagName(tagName) { tagName = tagName.toLowerCase(); const caseSensitiveTagName = getCaseSensitiveTagName(tagName); if (caseSensitiveTagName) return caseSensitiveTagName; return tagName; } /** * Checks if an HTML string contains an opening tag (case-insensitive). * * @param html - HTML string. * @param tagName - Tag name to search for (e.g., 'head' or 'body'). * @returns - Whether the tag is found. */ function hasOpenTag(html, tagName) { const openTag = "<" + tagName; const index = html.toLowerCase().indexOf(openTag); if (index === -1) return false; const char = html[index + openTag.length]; return char === ">" || char === " " || char === " " || char === "\n" || char === "\r" || char === "/"; } /** * Escapes special characters before parsing. * * @param html - The HTML string. * @returns - HTML string with escaped special characters. */ function escapeSpecialCharacters(html) { return html.replace(CARRIAGE_RETURN_REGEX, CARRIAGE_RETURN_PLACEHOLDER); } /** * Reverts escaped special characters back to actual characters. * * @param text - The text with escaped characters. * @returns - Text with escaped characters reverted. */ function revertEscapedCharacters(text) { return text.replace(CARRIAGE_RETURN_PLACEHOLDER_REGEX, CARRIAGE_RETURN); } /** * Transforms DOM nodes to `domhandler` nodes. * * @param nodes - DOM nodes. * @param parent - Parent node. * @param directive - Directive. * @returns - Nodes. */ function formatDOM(nodes, parent = null, directive) { const domNodes = []; let current; let index = 0; const nodesLength = nodes.length; for (; index < nodesLength; index++) { const node = nodes[index]; switch (node.nodeType) { case 1: { const tagName = formatTagName(node.nodeName); current = new domhandler.Element(tagName, formatAttributes(node.attributes)); current.children = formatDOM(tagName === "template" ? node.content.childNodes : node.childNodes, current); break; } /* v8 ignore start */ case 3: current = new domhandler.Text(revertEscapedCharacters(node.nodeValue ?? "")); break; case 8: current = new domhandler.Comment(node.nodeValue ?? ""); break; /* v8 ignore stop */ default: continue; } const prev = domNodes[index - 1] ?? null; if (prev) prev.next = current; current.parent = parent; current.prev = prev; current.next = null; domNodes.push(current); } if (directive) { current = new domhandler.ProcessingInstruction(directive.substring(0, directive.indexOf(" ")).toLowerCase(), directive); current.next = domNodes[0] ?? null; current.parent = parent; domNodes.unshift(current); if (domNodes[1]) domNodes[1].prev = domNodes[0]; } return domNodes; } //#endregion exports.escapeSpecialCharacters = escapeSpecialCharacters; exports.formatDOM = formatDOM; exports.hasOpenTag = hasOpenTag; //# sourceMappingURL=utilities.js.map