html-dom-parser
Version:
HTML to DOM parser.
1 lines • 6.99 kB
Source Map (JSON)
{"version":3,"file":"utilities.mjs","names":[],"sources":["../../src/client/utilities.ts"],"sourcesContent":["import { Comment, Element, ProcessingInstruction, Text } from 'domhandler';\n\nimport type { DOMNode } from '../types';\nimport { CASE_SENSITIVE_TAG_NAMES_MAP } from './constants';\n\nconst CARRIAGE_RETURN = '\\r';\nconst CARRIAGE_RETURN_REGEX = new RegExp(CARRIAGE_RETURN, 'g');\nconst CARRIAGE_RETURN_PLACEHOLDER = `__HTML_DOM_PARSER_CARRIAGE_RETURN_PLACEHOLDER_${Date.now().toString()}__`;\nconst CARRIAGE_RETURN_PLACEHOLDER_REGEX = new RegExp(\n CARRIAGE_RETURN_PLACEHOLDER,\n 'g',\n);\n\n/**\n * Gets case-sensitive tag name.\n *\n * @param tagName - Tag name in lowercase.\n * @returns - Case-sensitive tag name.\n */\nfunction getCaseSensitiveTagName(tagName: string): string | undefined {\n return CASE_SENSITIVE_TAG_NAMES_MAP[tagName];\n}\n\n/**\n * Formats DOM attributes to a hash map.\n *\n * @param attributes - List of attributes.\n * @returns - Map of attribute name to value.\n */\nfunction formatAttributes(attributes: NamedNodeMap) {\n const map: Record<string, string> = {};\n let index = 0;\n const attributesLength = attributes.length;\n\n // `NamedNodeMap` is array-like\n for (; index < attributesLength; index++) {\n const attribute = attributes[index];\n map[attribute.name] = attribute.value;\n }\n\n return map;\n}\n\n/**\n * Corrects the tag name if it is case-sensitive (SVG).\n * Otherwise, returns the lowercase tag name (HTML).\n *\n * @param tagName - Lowercase tag name.\n * @returns - Formatted tag name.\n */\nfunction formatTagName(tagName: string): string {\n tagName = tagName.toLowerCase();\n const caseSensitiveTagName = getCaseSensitiveTagName(tagName);\n\n if (caseSensitiveTagName) {\n return caseSensitiveTagName;\n }\n\n return tagName;\n}\n\n/**\n * Checks if an HTML string contains an opening tag (case-insensitive).\n *\n * @param html - HTML string.\n * @param tagName - Tag name to search for (e.g., 'head' or 'body').\n * @returns - Whether the tag is found.\n */\nexport function hasOpenTag(html: string, tagName: string): boolean {\n const openTag = '<' + tagName;\n const index = html.toLowerCase().indexOf(openTag);\n\n if (index === -1) {\n return false;\n }\n\n const char = html[index + openTag.length];\n // the character after the tag name must be '>' or whitespace (for attributes)\n return (\n char === '>' ||\n char === ' ' ||\n char === '\\t' ||\n char === '\\n' ||\n char === '\\r' ||\n char === '/'\n );\n}\n\n/**\n * Escapes special characters before parsing.\n *\n * @param html - The HTML string.\n * @returns - HTML string with escaped special characters.\n */\nexport function escapeSpecialCharacters(html: string): string {\n return html.replace(CARRIAGE_RETURN_REGEX, CARRIAGE_RETURN_PLACEHOLDER);\n}\n\n/**\n * Reverts escaped special characters back to actual characters.\n *\n * @param text - The text with escaped characters.\n * @returns - Text with escaped characters reverted.\n */\nexport function revertEscapedCharacters(text: string): string {\n return text.replace(CARRIAGE_RETURN_PLACEHOLDER_REGEX, CARRIAGE_RETURN);\n}\n\n/**\n * Transforms DOM nodes to `domhandler` nodes.\n *\n * @param nodes - DOM nodes.\n * @param parent - Parent node.\n * @param directive - Directive.\n * @returns - Nodes.\n */\nexport function formatDOM(\n nodes: NodeList,\n parent: DOMNode | null = null,\n directive?: string,\n): DOMNode[] {\n const domNodes = [];\n let current;\n let index = 0;\n const nodesLength = nodes.length;\n\n for (; index < nodesLength; index++) {\n const node = nodes[index];\n\n // set the node data given the type\n switch (node.nodeType) {\n case 1: {\n const tagName = formatTagName(node.nodeName);\n\n // script, style, or tag\n current = new Element(\n tagName,\n formatAttributes((node as HTMLElement).attributes),\n );\n\n current.children = formatDOM(\n // template children are on content\n tagName === 'template'\n ? (node as HTMLTemplateElement).content.childNodes\n : node.childNodes,\n current,\n );\n\n break;\n }\n\n /* v8 ignore start */\n case 3:\n current = new Text(revertEscapedCharacters(node.nodeValue ?? ''));\n break;\n\n case 8:\n current = new Comment(node.nodeValue ?? '');\n break;\n /* v8 ignore stop */\n\n default:\n continue;\n }\n\n // set previous node next\n const prev = domNodes[index - 1] ?? null;\n // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition\n if (prev) {\n prev.next = current;\n }\n\n // set properties for current node\n current.parent = parent as Element;\n current.prev = prev;\n current.next = null;\n\n domNodes.push(current);\n }\n\n if (directive) {\n current = new ProcessingInstruction(\n directive.substring(0, directive.indexOf(' ')).toLowerCase(),\n directive,\n );\n\n current.next = domNodes[0] ?? null;\n current.parent = parent as Element;\n domNodes.unshift(current);\n\n if (domNodes[1]) {\n domNodes[1].prev = domNodes[0];\n }\n }\n\n return domNodes;\n}\n"],"mappings":";;;AAKA,MAAM,kBAAkB;AACxB,MAAM,wBAAwB,IAAI,OAAO,iBAAiB,GAAG;AAC7D,MAAM,8BAA8B,iDAAiD,KAAK,IAAI,EAAE,SAAS,EAAE;AAC3G,MAAM,oCAAoC,IAAI,OAC5C,6BACA,GACF;;;;;;;AAQA,SAAS,wBAAwB,SAAqC;CACpE,OAAO,6BAA6B;AACtC;;;;;;;AAQA,SAAS,iBAAiB,YAA0B;CAClD,MAAM,MAA8B,CAAC;CACrC,IAAI,QAAQ;CACZ,MAAM,mBAAmB,WAAW;CAGpC,OAAO,QAAQ,kBAAkB,SAAS;EACxC,MAAM,YAAY,WAAW;EAC7B,IAAI,UAAU,QAAQ,UAAU;CAClC;CAEA,OAAO;AACT;;;;;;;;AASA,SAAS,cAAc,SAAyB;CAC9C,UAAU,QAAQ,YAAY;CAC9B,MAAM,uBAAuB,wBAAwB,OAAO;CAE5D,IAAI,sBACF,OAAO;CAGT,OAAO;AACT;;;;;;;;AASA,SAAgB,WAAW,MAAc,SAA0B;CACjE,MAAM,UAAU,MAAM;CACtB,MAAM,QAAQ,KAAK,YAAY,EAAE,QAAQ,OAAO;CAEhD,IAAI,UAAU,IACZ,OAAO;CAGT,MAAM,OAAO,KAAK,QAAQ,QAAQ;CAElC,OACE,SAAS,OACT,SAAS,OACT,SAAS,OACT,SAAS,QACT,SAAS,QACT,SAAS;AAEb;;;;;;;AAQA,SAAgB,wBAAwB,MAAsB;CAC5D,OAAO,KAAK,QAAQ,uBAAuB,2BAA2B;AACxE;;;;;;;AAQA,SAAgB,wBAAwB,MAAsB;CAC5D,OAAO,KAAK,QAAQ,mCAAmC,eAAe;AACxE;;;;;;;;;AAUA,SAAgB,UACd,OACA,SAAyB,MACzB,WACW;CACX,MAAM,WAAW,CAAC;CAClB,IAAI;CACJ,IAAI,QAAQ;CACZ,MAAM,cAAc,MAAM;CAE1B,OAAO,QAAQ,aAAa,SAAS;EACnC,MAAM,OAAO,MAAM;EAGnB,QAAQ,KAAK,UAAb;GACE,KAAK,GAAG;IACN,MAAM,UAAU,cAAc,KAAK,QAAQ;IAG3C,UAAU,IAAI,QACZ,SACA,iBAAkB,KAAqB,UAAU,CACnD;IAEA,QAAQ,WAAW,UAEjB,YAAY,aACP,KAA6B,QAAQ,aACtC,KAAK,YACT,OACF;IAEA;GACF;;GAGA,KAAK;IACH,UAAU,IAAI,KAAK,wBAAwB,KAAK,aAAa,EAAE,CAAC;IAChE;GAEF,KAAK;IACH,UAAU,IAAI,QAAQ,KAAK,aAAa,EAAE;IAC1C;;GAGF,SACE;EACJ;EAGA,MAAM,OAAO,SAAS,QAAQ,MAAM;EAEpC,IAAI,MACF,KAAK,OAAO;EAId,QAAQ,SAAS;EACjB,QAAQ,OAAO;EACf,QAAQ,OAAO;EAEf,SAAS,KAAK,OAAO;CACvB;CAEA,IAAI,WAAW;EACb,UAAU,IAAI,sBACZ,UAAU,UAAU,GAAG,UAAU,QAAQ,GAAG,CAAC,EAAE,YAAY,GAC3D,SACF;EAEA,QAAQ,OAAO,SAAS,MAAM;EAC9B,QAAQ,SAAS;EACjB,SAAS,QAAQ,OAAO;EAExB,IAAI,SAAS,IACX,SAAS,GAAG,OAAO,SAAS;CAEhC;CAEA,OAAO;AACT"}