@thednp/domparser
Version:
🍝 Super light HTML parser for isomorphic applications.
1 lines • 9.2 kB
Source Map (JSON)
{"version":3,"file":"dom-parser.cjs","names":["isObj","DOM_ERROR","createDocument","stack: (RootNode | DOMNode)[]","tagStack: string[]","tokenize","newNode: ChildNode","startsWith","selfClosingTags","createBasicNode","toUpperCase","getAttributes","createElement"],"sources":["../src/parts/dom-parser.ts"],"sourcesContent":["// dom-parser.ts\nimport { createBasicNode, createDocument, createElement } from \"./prototype\";\nimport type {\n ChildNode,\n DOMNode,\n DomParserOptions,\n DomParserResult,\n GetAttributesOptions,\n RootNode,\n} from \"./types\";\n\nimport {\n DOM_ERROR,\n getAttributes,\n isObj,\n selfClosingTags,\n startsWith,\n tokenize,\n toUpperCase,\n} from \"./util\";\n\n/**\n * **DomParser**\n *\n * Unlike the basic **Parser**, **DomParser** creates a new `Document` like instance with DOM-like\n * methods and properties and populates it with `Node` like objects resulted from the parsing\n * of a given HTML markup.\n *\n * @example\n * ```ts\n * const config = {\n * // On creating new node callback function\n * onNodeCallback?: myFunction(node: DOMNode) => DOMNode | YOURNode,\n * // Common dangerous tags that could lead to XSS attacks\n * filterTags: [\n * \"script\", \"style\", \"iframe\", \"object\", \"embed\", \"base\", \"form\",\n * \"input\", \"button\", \"textarea\", \"select\", \"option\"\n * ],\n * // Unsafe attributes that could lead to XSS attacks\n * filterAttrs: [\n * \"onerror\", \"onload\", \"onunload\", \"onclick\", \"ondblclick\", \"onmousedown\",\n * \"onmouseup\", \"onmouseover\", \"onmousemove\", \"onmouseout\", \"onkeydown\",\n * \"onkeypress\", \"onkeyup\", \"onchange\", \"onsubmit\", \"onreset\", \"onselect\",\n * \"onblur\", \"onfocus\", \"formaction\", \"href\", \"xlink:href\", \"action\"\n * ]\n * }\n * const { root: doc, components, tags } = DomParser.parseFromString(\"<!doctype html><html>This is starting html</html>\", config);\n * console.log(doc.documentElement.outerHTML);\n * // > \"<html>This is starting html</html>\"\n * ```\n *\n * @param startHTML Initial HTML content\n * @param config the `Parser` options to apply to the parsing of the startHTML markup.\n * @returns The `Document` like root node\n */\nexport const DomParser = (\n config?: Partial<DomParserOptions>,\n) => {\n if (config && !isObj(config)) {\n throw new Error(`${DOM_ERROR} 1st parameter is not an object.`);\n }\n\n // Common dangerous tags that could lead to XSS\n let unsafeTags = new Set<string>();\n let unsafeTagDepth = 0;\n\n // Unsafe attributes that could lead to XSS\n let unsafeAttrs = new Set<string>();\n\n // Apply config\n const { filterTags, filterAttrs, onNodeCallback } = config || {};\n if (filterTags?.length) unsafeTags = new Set(filterTags);\n if (filterAttrs?.length) unsafeAttrs = new Set(filterAttrs);\n const getAttrOptions = { unsafeAttrs } as GetAttributesOptions;\n // don't override the default function unless it's actualy set\n\n return {\n parseFromString(htmlString?: string) {\n if (htmlString && typeof htmlString !== \"string\") {\n throw new Error(`${DOM_ERROR} 1st parameter is not a string.`);\n }\n const root = createDocument();\n if (!htmlString) return { root, components: [], tags: [] };\n\n const stack: (RootNode | DOMNode)[] = [root];\n const tagStack: string[] = [];\n const components = new Set<string>();\n const tags = new Set<string>();\n const tokens = tokenize(htmlString);\n const tLen = tokens.length;\n let newNode: ChildNode;\n\n for (let i = 0; i < tLen; i += 1) {\n const { tokenType, value, isSC } = tokens[i];\n\n // Skip doctype, but store it as a root property\n if (tokenType === \"doctype\") {\n root.doctype = `<${value}>`;\n continue;\n }\n\n const currentParent = stack[stack.length - 1];\n const isClosing = startsWith(value, \"/\");\n const tagName = isClosing ? value.slice(1) : value.split(/[\\s/>]/)[0];\n const isSelfClosing = isSC || selfClosingTags.has(tagName);\n\n // Tag Matching Detection Logic\n if (tokenType === \"tag\" && !isSelfClosing) {\n // Start Tag (and not self-closing)\n if (!isClosing) {\n // Push tag name onto the tag stack\n tagStack.push(tagName);\n // Closing Tag\n } else {\n // Pop the last opened tag\n const expectedTag = tagStack.pop();\n if (expectedTag !== tagName) {\n if (expectedTag === undefined) {\n throw new Error(\n `${DOM_ERROR} Mismatched closing tag: </${tagName}>. No open tag found.`,\n );\n } else {\n throw new Error(\n `${DOM_ERROR} Mismatched closing tag: </${tagName}>. Expected closing tag for <${expectedTag}>.`,\n );\n }\n }\n }\n }\n\n // Skip unsafe tags AND their children\n if (unsafeTags.has(tagName)) {\n if (!isSelfClosing) {\n if (!isClosing) {\n unsafeTagDepth++;\n } else {\n unsafeTagDepth--;\n }\n }\n continue;\n }\n\n // Don't process anything while inside unsafe tags\n if (unsafeTagDepth > 0) continue;\n\n if ([\"text\", \"comment\"].includes(tokenType)) {\n newNode = createBasicNode(\n `#${tokenType as \"text\" | \"comment\"}`,\n value,\n ) as ChildNode;\n currentParent.append(newNode);\n continue;\n }\n\n // Register tag/component type\n (tagName[0] === toUpperCase(tagName[0]) || tagName.includes(\"-\")\n ? components\n : tags).add(tagName);\n\n if (!isClosing) {\n const attributes = getAttributes(value, getAttrOptions);\n newNode = createElement.call(\n root,\n tagName as DOMNode[\"tagName\"],\n attributes,\n );\n currentParent.append(newNode);\n stack.slice(1, -1).map((parent) =>\n (parent as DOMNode).registerChild(newNode as DOMNode)\n );\n\n if (onNodeCallback) onNodeCallback(newNode, currentParent, root);\n\n const charset = attributes?.charset;\n if (tagName === \"meta\" && charset) {\n root.charset = toUpperCase(charset);\n }\n\n !isSelfClosing && stack.push(newNode);\n } else if (!isSelfClosing && stack.length > 1) {\n stack.pop();\n }\n }\n\n // Check for unclosed tags at the end\n // an edge case where end tag is malformed `</incomplete`\n if (tagStack.length > 0) {\n const unclosedTag = tagStack.pop();\n throw new Error(`${DOM_ERROR} Unclosed tag: <${unclosedTag}>.`);\n }\n\n return {\n root,\n components: Array.from(components),\n tags: Array.from(tags),\n } satisfies DomParserResult;\n },\n };\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAuDA,MAAa,aACX,WACG;AACH,KAAI,UAAU,CAACA,mBAAM,OAAO,CAC1B,OAAM,IAAI,MAAM,GAAGC,uBAAU,kCAAkC;CAIjE,IAAI,6BAAa,IAAI,KAAa;CAClC,IAAI,iBAAiB;CAGrB,IAAI,8BAAc,IAAI,KAAa;CAGnC,MAAM,EAAE,YAAY,aAAa,mBAAmB,UAAU,EAAE;AAChE,KAAI,YAAY,OAAQ,cAAa,IAAI,IAAI,WAAW;AACxD,KAAI,aAAa,OAAQ,eAAc,IAAI,IAAI,YAAY;CAC3D,MAAM,iBAAiB,EAAE,aAAa;AAGtC,QAAO,EACL,gBAAgB,YAAqB;AACnC,MAAI,cAAc,OAAO,eAAe,SACtC,OAAM,IAAI,MAAM,GAAGA,uBAAU,iCAAiC;EAEhE,MAAM,OAAOC,kCAAgB;AAC7B,MAAI,CAAC,WAAY,QAAO;GAAE;GAAM,YAAY,EAAE;GAAE,MAAM,EAAE;GAAE;EAE1D,MAAMC,QAAgC,CAAC,KAAK;EAC5C,MAAMC,WAAqB,EAAE;EAC7B,MAAM,6BAAa,IAAI,KAAa;EACpC,MAAM,uBAAO,IAAI,KAAa;EAC9B,MAAM,SAASC,sBAAS,WAAW;EACnC,MAAM,OAAO,OAAO;EACpB,IAAIC;AAEJ,OAAK,IAAI,IAAI,GAAG,IAAI,MAAM,KAAK,GAAG;GAChC,MAAM,EAAE,WAAW,OAAO,SAAS,OAAO;AAG1C,OAAI,cAAc,WAAW;AAC3B,SAAK,UAAU,IAAI,MAAM;AACzB;;GAGF,MAAM,gBAAgB,MAAM,MAAM,SAAS;GAC3C,MAAM,YAAYC,wBAAW,OAAO,IAAI;GACxC,MAAM,UAAU,YAAY,MAAM,MAAM,EAAE,GAAG,MAAM,MAAM,SAAS,CAAC;GACnE,MAAM,gBAAgB,QAAQC,6BAAgB,IAAI,QAAQ;AAG1D,OAAI,cAAc,SAAS,CAAC,cAE1B,KAAI,CAAC,UAEH,UAAS,KAAK,QAAQ;QAEjB;IAEL,MAAM,cAAc,SAAS,KAAK;AAClC,QAAI,gBAAgB,QAClB,KAAI,gBAAgB,OAClB,OAAM,IAAI,MACR,GAAGP,uBAAU,6BAA6B,QAAQ,uBACnD;QAED,OAAM,IAAI,MACR,GAAGA,uBAAU,6BAA6B,QAAQ,+BAA+B,YAAY,IAC9F;;AAOT,OAAI,WAAW,IAAI,QAAQ,EAAE;AAC3B,QAAI,CAAC,cACH,KAAI,CAAC,UACH;QAEA;AAGJ;;AAIF,OAAI,iBAAiB,EAAG;AAExB,OAAI,CAAC,QAAQ,UAAU,CAAC,SAAS,UAAU,EAAE;AAC3C,cAAUQ,kCACR,IAAI,aACJ,MACD;AACD,kBAAc,OAAO,QAAQ;AAC7B;;AAIF,IAAC,QAAQ,OAAOC,yBAAY,QAAQ,GAAG,IAAI,QAAQ,SAAS,IAAI,GAC5D,aACA,MAAM,IAAI,QAAQ;AAEtB,OAAI,CAAC,WAAW;IACd,MAAM,aAAaC,2BAAc,OAAO,eAAe;AACvD,cAAUC,gCAAc,KACtB,MACA,SACA,WACD;AACD,kBAAc,OAAO,QAAQ;AAC7B,UAAM,MAAM,GAAG,GAAG,CAAC,KAAK,WACrB,OAAmB,cAAc,QAAmB,CACtD;AAED,QAAI,eAAgB,gBAAe,SAAS,eAAe,KAAK;IAEhE,MAAM,UAAU,YAAY;AAC5B,QAAI,YAAY,UAAU,QACxB,MAAK,UAAUF,yBAAY,QAAQ;AAGrC,KAAC,iBAAiB,MAAM,KAAK,QAAQ;cAC5B,CAAC,iBAAiB,MAAM,SAAS,EAC1C,OAAM,KAAK;;AAMf,MAAI,SAAS,SAAS,GAAG;GACvB,MAAM,cAAc,SAAS,KAAK;AAClC,SAAM,IAAI,MAAM,GAAGT,uBAAU,kBAAkB,YAAY,IAAI;;AAGjE,SAAO;GACL;GACA,YAAY,MAAM,KAAK,WAAW;GAClC,MAAM,MAAM,KAAK,KAAK;GACvB;IAEJ"}