html-dom-parser
Version:
HTML to DOM parser.
128 lines • 4.13 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.formatAttributes = formatAttributes;
exports.escapeSpecialCharacters = escapeSpecialCharacters;
exports.revertEscapedCharacters = revertEscapedCharacters;
exports.formatDOM = formatDOM;
var domhandler_1 = require("domhandler");
var constants_1 = require("./constants");
/**
* Gets case-sensitive tag name.
*
* @param tagName - Tag name in lowercase.
* @returns - Case-sensitive tag name.
*/
function getCaseSensitiveTagName(tagName) {
return constants_1.CASE_SENSITIVE_TAG_NAMES_MAP[tagName];
}
/**
* Formats DOM attributes to a hash map.
*
* @param attributes - List of attributes.
* @returns - Map of attribute name to value.
*/
function formatAttributes(attributes) {
var map = {};
var index = 0;
var attributesLength = attributes.length;
// `NamedNodeMap` is array-like
for (; index < attributesLength; index++) {
var attribute = attributes[index];
map[attribute.name] = attribute.value;
}
return map;
}
/**
* Corrects the tag name if it is case-sensitive (SVG).
* Otherwise, returns the lowercase tag name (HTML).
*
* @param tagName - Lowercase tag name.
* @returns - Formatted tag name.
*/
function formatTagName(tagName) {
tagName = tagName.toLowerCase();
var caseSensitiveTagName = getCaseSensitiveTagName(tagName);
if (caseSensitiveTagName) {
return caseSensitiveTagName;
}
return tagName;
}
/**
* Escapes special characters before parsing.
*
* @param html - The HTML string.
* @returns - HTML string with escaped special characters.
*/
function escapeSpecialCharacters(html) {
return html.replace(constants_1.CARRIAGE_RETURN_REGEX, constants_1.CARRIAGE_RETURN_PLACEHOLDER);
}
/**
* Reverts escaped special characters back to actual characters.
*
* @param text - The text with escaped characters.
* @returns - Text with escaped characters reverted.
*/
function revertEscapedCharacters(text) {
return text.replace(constants_1.CARRIAGE_RETURN_PLACEHOLDER_REGEX, constants_1.CARRIAGE_RETURN);
}
/**
* Transforms DOM nodes to `domhandler` nodes.
*
* @param nodes - DOM nodes.
* @param parent - Parent node.
* @param directive - Directive.
* @returns - Nodes.
*/
function formatDOM(nodes, parent, directive) {
if (parent === void 0) { parent = null; }
var domNodes = [];
var current;
var index = 0;
var nodesLength = nodes.length;
for (; index < nodesLength; index++) {
var node = nodes[index];
// set the node data given the type
switch (node.nodeType) {
case 1: {
var tagName = formatTagName(node.nodeName);
// script, style, or tag
current = new domhandler_1.Element(tagName, formatAttributes(node.attributes));
current.children = formatDOM(
// template children are on content
tagName === 'template'
? node.content.childNodes
: node.childNodes, current);
break;
}
case 3:
current = new domhandler_1.Text(revertEscapedCharacters(node.nodeValue));
break;
case 8:
current = new domhandler_1.Comment(node.nodeValue);
break;
default:
continue;
}
// set previous node next
var prev = domNodes[index - 1] || null;
if (prev) {
prev.next = current;
}
// set properties for current node
current.parent = parent;
current.prev = prev;
current.next = null;
domNodes.push(current);
}
if (directive) {
current = new domhandler_1.ProcessingInstruction(directive.substring(0, directive.indexOf(' ')).toLowerCase(), directive);
current.next = domNodes[0] || null;
current.parent = parent;
domNodes.unshift(current);
if (domNodes[1]) {
domNodes[1].prev = domNodes[0];
}
}
return domNodes;
}
//# sourceMappingURL=utilities.js.map