UNPKG

@jcottam/html-metadata

Version:

This JavaScript library simplifies the extraction of HTML Meta and OpenGraph tags from HTML content or URLs.

1,695 lines (1,681 loc) 2.23 MB
'use strict'; var require$$0$3 = require('node:stream'); var require$$0$1 = require('buffer'); var require$$1$2 = require('string_decoder'); var require$$0$2 = require('node:assert'); var require$$0$4 = require('node:net'); var require$$2$2 = require('node:http'); var require$$5$1 = require('node:querystring'); var require$$6$1 = require('node:events'); var require$$0$6 = require('node:diagnostics_channel'); var require$$0$5 = require('node:util'); var require$$4$1 = require('node:tls'); var require$$0$7 = require('node:buffer'); var require$$0$8 = require('node:zlib'); var require$$5$2 = require('node:perf_hooks'); var require$$8 = require('node:util/types'); var require$$1$3 = require('node:worker_threads'); var require$$1$4 = require('node:async_hooks'); var require$$1$5 = require('node:console'); var require$$0$9 = require('node:fs/promises'); var require$$1$6 = require('node:path'); var require$$2$3 = require('node:timers'); var require$$1$7 = require('node:dns'); var require$$2$4 = require('node:sqlite'); const defaultOpts$2 = { _useHtmlParser2: false, }; /** * Flatten the options for Cheerio. * * This will set `_useHtmlParser2` to true if `xml` is set to true. * * @param options - The options to flatten. * @param baseOptions - The base options to use. * @returns The flattened options. */ function flattenOptions(options, baseOptions) { if (!options) { return baseOptions !== null && baseOptions !== void 0 ? baseOptions : defaultOpts$2; } const opts = { _useHtmlParser2: !!options.xmlMode, ...baseOptions, ...options, }; if (options.xml) { opts._useHtmlParser2 = true; opts.xmlMode = true; if (options.xml !== true) { Object.assign(opts, options.xml); } } else if (options.xmlMode) { opts._useHtmlParser2 = true; } return opts; } /** Types of elements found in htmlparser2's DOM */ var ElementType; (function (ElementType) { /** Type for the root element of a document */ ElementType["Root"] = "root"; /** Type for Text */ ElementType["Text"] = "text"; /** Type for <? ... ?> */ ElementType["Directive"] = "directive"; /** Type for <!-- ... --> */ ElementType["Comment"] = "comment"; /** Type for <script> tags */ ElementType["Script"] = "script"; /** Type for <style> tags */ ElementType["Style"] = "style"; /** Type for Any tag */ ElementType["Tag"] = "tag"; /** Type for <![CDATA[ ... ]]> */ ElementType["CDATA"] = "cdata"; /** Type for <!doctype ...> */ ElementType["Doctype"] = "doctype"; })(ElementType || (ElementType = {})); /** * Tests whether an element is a tag or not. * * @param elem Element to test */ function isTag$1(elem) { return (elem.type === ElementType.Tag || elem.type === ElementType.Script || elem.type === ElementType.Style); } // Exports for backwards compatibility /** Type for the root element of a document */ const Root = ElementType.Root; /** Type for Text */ const Text$1 = ElementType.Text; /** Type for <? ... ?> */ const Directive = ElementType.Directive; /** Type for <!-- ... --> */ const Comment$1 = ElementType.Comment; /** Type for <script> tags */ const Script = ElementType.Script; /** Type for <style> tags */ const Style = ElementType.Style; /** Type for Any tag */ const Tag = ElementType.Tag; /** Type for <![CDATA[ ... ]]> */ const CDATA$1 = ElementType.CDATA; /** Type for <!doctype ...> */ const Doctype = ElementType.Doctype; /** * This object will be used as the prototype for Nodes when creating a * DOM-Level-1-compliant structure. */ class Node { constructor() { /** Parent of the node */ this.parent = null; /** Previous sibling */ this.prev = null; /** Next sibling */ this.next = null; /** The start index of the node. Requires `withStartIndices` on the handler to be `true. */ this.startIndex = null; /** The end index of the node. Requires `withEndIndices` on the handler to be `true. */ this.endIndex = null; } // Read-write aliases for properties /** * Same as {@link parent}. * [DOM spec](https://dom.spec.whatwg.org)-compatible alias. */ get parentNode() { return this.parent; } set parentNode(parent) { this.parent = parent; } /** * Same as {@link prev}. * [DOM spec](https://dom.spec.whatwg.org)-compatible alias. */ get previousSibling() { return this.prev; } set previousSibling(prev) { this.prev = prev; } /** * Same as {@link next}. * [DOM spec](https://dom.spec.whatwg.org)-compatible alias. */ get nextSibling() { return this.next; } set nextSibling(next) { this.next = next; } /** * Clone this node, and optionally its children. * * @param recursive Clone child nodes as well. * @returns A clone of the node. */ cloneNode(recursive = false) { return cloneNode(this, recursive); } } /** * A node that contains some data. */ class DataNode extends Node { /** * @param data The content of the data node */ constructor(data) { super(); this.data = data; } /** * Same as {@link data}. * [DOM spec](https://dom.spec.whatwg.org)-compatible alias. */ get nodeValue() { return this.data; } set nodeValue(data) { this.data = data; } } /** * Text within the document. */ class Text extends DataNode { constructor() { super(...arguments); this.type = ElementType.Text; } get nodeType() { return 3; } } /** * Comments within the document. */ class Comment extends DataNode { constructor() { super(...arguments); this.type = ElementType.Comment; } get nodeType() { return 8; } } /** * Processing instructions, including doc types. */ class ProcessingInstruction extends DataNode { constructor(name, data) { super(data); this.name = name; this.type = ElementType.Directive; } get nodeType() { return 1; } } /** * A `Node` that can have children. */ class NodeWithChildren extends Node { /** * @param children Children of the node. Only certain node types can have children. */ constructor(children) { super(); this.children = children; } // Aliases /** First child of the node. */ get firstChild() { var _a; return (_a = this.children[0]) !== null && _a !== void 0 ? _a : null; } /** Last child of the node. */ get lastChild() { return this.children.length > 0 ? this.children[this.children.length - 1] : null; } /** * Same as {@link children}. * [DOM spec](https://dom.spec.whatwg.org)-compatible alias. */ get childNodes() { return this.children; } set childNodes(children) { this.children = children; } } class CDATA extends NodeWithChildren { constructor() { super(...arguments); this.type = ElementType.CDATA; } get nodeType() { return 4; } } /** * The root node of the document. */ class Document extends NodeWithChildren { constructor() { super(...arguments); this.type = ElementType.Root; } get nodeType() { return 9; } } /** * An element within the DOM. */ class Element extends NodeWithChildren { /** * @param name Name of the tag, eg. `div`, `span`. * @param attribs Object mapping attribute names to attribute values. * @param children Children of the node. */ constructor(name, attribs, children = [], type = name === "script" ? ElementType.Script : name === "style" ? ElementType.Style : ElementType.Tag) { super(children); this.name = name; this.attribs = attribs; this.type = type; } get nodeType() { return 1; } // DOM Level 1 aliases /** * Same as {@link name}. * [DOM spec](https://dom.spec.whatwg.org)-compatible alias. */ get tagName() { return this.name; } set tagName(name) { this.name = name; } get attributes() { return Object.keys(this.attribs).map((name) => { var _a, _b; return ({ name, value: this.attribs[name], namespace: (_a = this["x-attribsNamespace"]) === null || _a === void 0 ? void 0 : _a[name], prefix: (_b = this["x-attribsPrefix"]) === null || _b === void 0 ? void 0 : _b[name], }); }); } } /** * @param node Node to check. * @returns `true` if the node is a `Element`, `false` otherwise. */ function isTag(node) { return isTag$1(node); } /** * @param node Node to check. * @returns `true` if the node has the type `CDATA`, `false` otherwise. */ function isCDATA(node) { return node.type === ElementType.CDATA; } /** * @param node Node to check. * @returns `true` if the node has the type `Text`, `false` otherwise. */ function isText(node) { return node.type === ElementType.Text; } /** * @param node Node to check. * @returns `true` if the node has the type `Comment`, `false` otherwise. */ function isComment(node) { return node.type === ElementType.Comment; } /** * @param node Node to check. * @returns `true` if the node has the type `ProcessingInstruction`, `false` otherwise. */ function isDirective(node) { return node.type === ElementType.Directive; } /** * @param node Node to check. * @returns `true` if the node has the type `ProcessingInstruction`, `false` otherwise. */ function isDocument(node) { return node.type === ElementType.Root; } /** * @param node Node to check. * @returns `true` if the node has children, `false` otherwise. */ function hasChildren(node) { return Object.prototype.hasOwnProperty.call(node, "children"); } /** * Clone a node, and optionally its children. * * @param recursive Clone child nodes as well. * @returns A clone of the node. */ function cloneNode(node, recursive = false) { let result; if (isText(node)) { result = new Text(node.data); } else if (isComment(node)) { result = new Comment(node.data); } else if (isTag(node)) { const children = recursive ? cloneChildren(node.children) : []; const clone = new Element(node.name, { ...node.attribs }, children); children.forEach((child) => (child.parent = clone)); if (node.namespace != null) { clone.namespace = node.namespace; } if (node["x-attribsNamespace"]) { clone["x-attribsNamespace"] = { ...node["x-attribsNamespace"] }; } if (node["x-attribsPrefix"]) { clone["x-attribsPrefix"] = { ...node["x-attribsPrefix"] }; } result = clone; } else if (isCDATA(node)) { const children = recursive ? cloneChildren(node.children) : []; const clone = new CDATA(children); children.forEach((child) => (child.parent = clone)); result = clone; } else if (isDocument(node)) { const children = recursive ? cloneChildren(node.children) : []; const clone = new Document(children); children.forEach((child) => (child.parent = clone)); if (node["x-mode"]) { clone["x-mode"] = node["x-mode"]; } result = clone; } else if (isDirective(node)) { const instruction = new ProcessingInstruction(node.name, node.data); if (node["x-name"] != null) { instruction["x-name"] = node["x-name"]; instruction["x-publicId"] = node["x-publicId"]; instruction["x-systemId"] = node["x-systemId"]; } result = instruction; } else { throw new Error(`Not implemented yet: ${node.type}`); } result.startIndex = node.startIndex; result.endIndex = node.endIndex; if (node.sourceCodeLocation != null) { result.sourceCodeLocation = node.sourceCodeLocation; } return result; } function cloneChildren(childs) { const children = childs.map((child) => cloneNode(child, true)); for (let i = 1; i < children.length; i++) { children[i].prev = children[i - 1]; children[i - 1].next = children[i]; } return children; } // Default options const defaultOpts$1 = { withStartIndices: false, withEndIndices: false, xmlMode: false, }; class DomHandler { /** * @param callback Called once parsing has completed. * @param options Settings for the handler. * @param elementCB Callback whenever a tag is closed. */ constructor(callback, options, elementCB) { /** The elements of the DOM */ this.dom = []; /** The root element for the DOM */ this.root = new Document(this.dom); /** Indicated whether parsing has been completed. */ this.done = false; /** Stack of open tags. */ this.tagStack = [this.root]; /** A data node that is still being written to. */ this.lastNode = null; /** Reference to the parser instance. Used for location information. */ this.parser = null; // Make it possible to skip arguments, for backwards-compatibility if (typeof options === "function") { elementCB = options; options = defaultOpts$1; } if (typeof callback === "object") { options = callback; callback = undefined; } this.callback = callback !== null && callback !== void 0 ? callback : null; this.options = options !== null && options !== void 0 ? options : defaultOpts$1; this.elementCB = elementCB !== null && elementCB !== void 0 ? elementCB : null; } onparserinit(parser) { this.parser = parser; } // Resets the handler back to starting state onreset() { this.dom = []; this.root = new Document(this.dom); this.done = false; this.tagStack = [this.root]; this.lastNode = null; this.parser = null; } // Signals the handler that parsing is done onend() { if (this.done) return; this.done = true; this.parser = null; this.handleCallback(null); } onerror(error) { this.handleCallback(error); } onclosetag() { this.lastNode = null; const elem = this.tagStack.pop(); if (this.options.withEndIndices) { elem.endIndex = this.parser.endIndex; } if (this.elementCB) this.elementCB(elem); } onopentag(name, attribs) { const type = this.options.xmlMode ? ElementType.Tag : undefined; const element = new Element(name, attribs, undefined, type); this.addNode(element); this.tagStack.push(element); } ontext(data) { const { lastNode } = this; if (lastNode && lastNode.type === ElementType.Text) { lastNode.data += data; if (this.options.withEndIndices) { lastNode.endIndex = this.parser.endIndex; } } else { const node = new Text(data); this.addNode(node); this.lastNode = node; } } oncomment(data) { if (this.lastNode && this.lastNode.type === ElementType.Comment) { this.lastNode.data += data; return; } const node = new Comment(data); this.addNode(node); this.lastNode = node; } oncommentend() { this.lastNode = null; } oncdatastart() { const text = new Text(""); const node = new CDATA([text]); this.addNode(node); text.parent = node; this.lastNode = text; } oncdataend() { this.lastNode = null; } onprocessinginstruction(name, data) { const node = new ProcessingInstruction(name, data); this.addNode(node); } handleCallback(error) { if (typeof this.callback === "function") { this.callback(error, this.dom); } else if (error) { throw error; } } addNode(node) { const parent = this.tagStack[this.tagStack.length - 1]; const previousSibling = parent.children[parent.children.length - 1]; if (this.options.withStartIndices) { node.startIndex = this.parser.startIndex; } if (this.options.withEndIndices) { node.endIndex = this.parser.endIndex; } parent.children.push(node); if (previousSibling) { node.prev = previousSibling; previousSibling.next = node; } node.parent = parent; this.lastNode = null; } } const xmlReplacer = /["&'<>$\x80-\uFFFF]/g; const xmlCodeMap = new Map([ [34, "&quot;"], [38, "&amp;"], [39, "&apos;"], [60, "&lt;"], [62, "&gt;"], ]); // For compatibility with node < 4, we wrap `codePointAt` const getCodePoint = // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition String.prototype.codePointAt != null ? (str, index) => str.codePointAt(index) : // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae (c, index) => (c.charCodeAt(index) & 0xfc00) === 0xd800 ? (c.charCodeAt(index) - 0xd800) * 0x400 + c.charCodeAt(index + 1) - 0xdc00 + 0x10000 : c.charCodeAt(index); /** * Encodes all non-ASCII characters, as well as characters not valid in XML * documents using XML entities. * * If a character has no equivalent entity, a * numeric hexadecimal reference (eg. `&#xfc;`) will be used. */ function encodeXML(str) { let ret = ""; let lastIdx = 0; let match; while ((match = xmlReplacer.exec(str)) !== null) { const i = match.index; const char = str.charCodeAt(i); const next = xmlCodeMap.get(char); if (next !== undefined) { ret += str.substring(lastIdx, i) + next; lastIdx = i + 1; } else { ret += `${str.substring(lastIdx, i)}&#x${getCodePoint(str, i).toString(16)};`; // Increase by 1 if we have a surrogate pair lastIdx = xmlReplacer.lastIndex += Number((char & 0xfc00) === 0xd800); } } return ret + str.substr(lastIdx); } /** * Creates a function that escapes all characters matched by the given regular * expression using the given map of characters to escape to their entities. * * @param regex Regular expression to match characters to escape. * @param map Map of characters to escape to their entities. * * @returns Function that escapes all characters matched by the given regular * expression using the given map of characters to escape to their entities. */ function getEscaper$1(regex, map) { return function escape(data) { let match; let lastIdx = 0; let result = ""; while ((match = regex.exec(data))) { if (lastIdx !== match.index) { result += data.substring(lastIdx, match.index); } // We know that this character will be in the map. result += map.get(match[0].charCodeAt(0)); // Every match will be of length 1 lastIdx = match.index + 1; } return result + data.substring(lastIdx); }; } /** * Encodes all characters that have to be escaped in HTML attributes, * following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}. * * @param data String to escape. */ const escapeAttribute$1 = getEscaper$1(/["&\u00A0]/g, new Map([ [34, "&quot;"], [38, "&amp;"], [160, "&nbsp;"], ])); /** * Encodes all characters that have to be escaped in HTML text, * following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}. * * @param data String to escape. */ const escapeText$1 = getEscaper$1(/[&<>\u00A0]/g, new Map([ [38, "&amp;"], [60, "&lt;"], [62, "&gt;"], [160, "&nbsp;"], ])); const elementNames = new Map([ "altGlyph", "altGlyphDef", "altGlyphItem", "animateColor", "animateMotion", "animateTransform", "clipPath", "feBlend", "feColorMatrix", "feComponentTransfer", "feComposite", "feConvolveMatrix", "feDiffuseLighting", "feDisplacementMap", "feDistantLight", "feDropShadow", "feFlood", "feFuncA", "feFuncB", "feFuncG", "feFuncR", "feGaussianBlur", "feImage", "feMerge", "feMergeNode", "feMorphology", "feOffset", "fePointLight", "feSpecularLighting", "feSpotLight", "feTile", "feTurbulence", "foreignObject", "glyphRef", "linearGradient", "radialGradient", "textPath", ].map((val) => [val.toLowerCase(), val])); const attributeNames = new Map([ "definitionURL", "attributeName", "attributeType", "baseFrequency", "baseProfile", "calcMode", "clipPathUnits", "diffuseConstant", "edgeMode", "filterUnits", "glyphRef", "gradientTransform", "gradientUnits", "kernelMatrix", "kernelUnitLength", "keyPoints", "keySplines", "keyTimes", "lengthAdjust", "limitingConeAngle", "markerHeight", "markerUnits", "markerWidth", "maskContentUnits", "maskUnits", "numOctaves", "pathLength", "patternContentUnits", "patternTransform", "patternUnits", "pointsAtX", "pointsAtY", "pointsAtZ", "preserveAlpha", "preserveAspectRatio", "primitiveUnits", "refX", "refY", "repeatCount", "repeatDur", "requiredExtensions", "requiredFeatures", "specularConstant", "specularExponent", "spreadMethod", "startOffset", "stdDeviation", "stitchTiles", "surfaceScale", "systemLanguage", "tableValues", "targetX", "targetY", "textLength", "viewBox", "viewTarget", "xChannelSelector", "yChannelSelector", "zoomAndPan", ].map((val) => [val.toLowerCase(), val])); /* * Module dependencies */ const unencodedElements = new Set([ "style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript", ]); function replaceQuotes(value) { return value.replace(/"/g, "&quot;"); } /** * Format attributes */ function formatAttributes(attributes, opts) { var _a; if (!attributes) return; const encode = ((_a = opts.encodeEntities) !== null && _a !== void 0 ? _a : opts.decodeEntities) === false ? replaceQuotes : opts.xmlMode || opts.encodeEntities !== "utf8" ? encodeXML : escapeAttribute$1; return Object.keys(attributes) .map((key) => { var _a, _b; const value = (_a = attributes[key]) !== null && _a !== void 0 ? _a : ""; if (opts.xmlMode === "foreign") { /* Fix up mixed-case attribute names */ key = (_b = attributeNames.get(key)) !== null && _b !== void 0 ? _b : key; } if (!opts.emptyAttrs && !opts.xmlMode && value === "") { return key; } return `${key}="${encode(value)}"`; }) .join(" "); } /** * Self-enclosing tags */ const singleTag = new Set([ "area", "base", "basefont", "br", "col", "command", "embed", "frame", "hr", "img", "input", "isindex", "keygen", "link", "meta", "param", "source", "track", "wbr", ]); /** * Renders a DOM node or an array of DOM nodes to a string. * * Can be thought of as the equivalent of the `outerHTML` of the passed node(s). * * @param node Node to be rendered. * @param options Changes serialization behavior */ function render$1(node, options = {}) { const nodes = "length" in node ? node : [node]; let output = ""; for (let i = 0; i < nodes.length; i++) { output += renderNode(nodes[i], options); } return output; } function renderNode(node, options) { switch (node.type) { case Root: return render$1(node.children, options); // @ts-expect-error We don't use `Doctype` yet case Doctype: case Directive: return renderDirective(node); case Comment$1: return renderComment(node); case CDATA$1: return renderCdata(node); case Script: case Style: case Tag: return renderTag(node, options); case Text$1: return renderText(node, options); } } const foreignModeIntegrationPoints = new Set([ "mi", "mo", "mn", "ms", "mtext", "annotation-xml", "foreignObject", "desc", "title", ]); const foreignElements = new Set(["svg", "math"]); function renderTag(elem, opts) { var _a; // Handle SVG / MathML in HTML if (opts.xmlMode === "foreign") { /* Fix up mixed-case element names */ elem.name = (_a = elementNames.get(elem.name)) !== null && _a !== void 0 ? _a : elem.name; /* Exit foreign mode at integration points */ if (elem.parent && foreignModeIntegrationPoints.has(elem.parent.name)) { opts = { ...opts, xmlMode: false }; } } if (!opts.xmlMode && foreignElements.has(elem.name)) { opts = { ...opts, xmlMode: "foreign" }; } let tag = `<${elem.name}`; const attribs = formatAttributes(elem.attribs, opts); if (attribs) { tag += ` ${attribs}`; } if (elem.children.length === 0 && (opts.xmlMode ? // In XML mode or foreign mode, and user hasn't explicitly turned off self-closing tags opts.selfClosingTags !== false : // User explicitly asked for self-closing tags, even in HTML mode opts.selfClosingTags && singleTag.has(elem.name))) { if (!opts.xmlMode) tag += " "; tag += "/>"; } else { tag += ">"; if (elem.children.length > 0) { tag += render$1(elem.children, opts); } if (opts.xmlMode || !singleTag.has(elem.name)) { tag += `</${elem.name}>`; } } return tag; } function renderDirective(elem) { return `<${elem.data}>`; } function renderText(elem, opts) { var _a; let data = elem.data || ""; // If entities weren't decoded, no need to encode them back if (((_a = opts.encodeEntities) !== null && _a !== void 0 ? _a : opts.decodeEntities) !== false && !(!opts.xmlMode && elem.parent && unencodedElements.has(elem.parent.name))) { data = opts.xmlMode || opts.encodeEntities !== "utf8" ? encodeXML(data) : escapeText$1(data); } return data; } function renderCdata(elem) { return `<![CDATA[${elem.children[0].data}]]>`; } function renderComment(elem) { return `<!--${elem.data}-->`; } /** * @category Stringify * @deprecated Use the `dom-serializer` module directly. * @param node Node to get the outer HTML of. * @param options Options for serialization. * @returns `node`'s outer HTML. */ function getOuterHTML(node, options) { return render$1(node, options); } /** * @category Stringify * @deprecated Use the `dom-serializer` module directly. * @param node Node to get the inner HTML of. * @param options Options for serialization. * @returns `node`'s inner HTML. */ function getInnerHTML(node, options) { return hasChildren(node) ? node.children.map((node) => getOuterHTML(node, options)).join("") : ""; } /** * Get a node's inner text. Same as `textContent`, but inserts newlines for `<br>` tags. Ignores comments. * * @category Stringify * @deprecated Use `textContent` instead. * @param node Node to get the inner text of. * @returns `node`'s inner text. */ function getText(node) { if (Array.isArray(node)) return node.map(getText).join(""); if (isTag(node)) return node.name === "br" ? "\n" : getText(node.children); if (isCDATA(node)) return getText(node.children); if (isText(node)) return node.data; return ""; } /** * Get a node's text content. Ignores comments. * * @category Stringify * @param node Node to get the text content of. * @returns `node`'s text content. * @see {@link https://developer.mozilla.org/en-US/docs/Web/API/Node/textContent} */ function textContent(node) { if (Array.isArray(node)) return node.map(textContent).join(""); if (hasChildren(node) && !isComment(node)) { return textContent(node.children); } if (isText(node)) return node.data; return ""; } /** * Get a node's inner text, ignoring `<script>` and `<style>` tags. Ignores comments. * * @category Stringify * @param node Node to get the inner text of. * @returns `node`'s inner text. * @see {@link https://developer.mozilla.org/en-US/docs/Web/API/Node/innerText} */ function innerText(node) { if (Array.isArray(node)) return node.map(innerText).join(""); if (hasChildren(node) && (node.type === ElementType.Tag || isCDATA(node))) { return innerText(node.children); } if (isText(node)) return node.data; return ""; } /** * Get a node's children. * * @category Traversal * @param elem Node to get the children of. * @returns `elem`'s children, or an empty array. */ function getChildren(elem) { return hasChildren(elem) ? elem.children : []; } /** * Get a node's parent. * * @category Traversal * @param elem Node to get the parent of. * @returns `elem`'s parent node, or `null` if `elem` is a root node. */ function getParent(elem) { return elem.parent || null; } /** * Gets an elements siblings, including the element itself. * * Attempts to get the children through the element's parent first. If we don't * have a parent (the element is a root node), we walk the element's `prev` & * `next` to get all remaining nodes. * * @category Traversal * @param elem Element to get the siblings of. * @returns `elem`'s siblings, including `elem`. */ function getSiblings(elem) { const parent = getParent(elem); if (parent != null) return getChildren(parent); const siblings = [elem]; let { prev, next } = elem; while (prev != null) { siblings.unshift(prev); ({ prev } = prev); } while (next != null) { siblings.push(next); ({ next } = next); } return siblings; } /** * Gets an attribute from an element. * * @category Traversal * @param elem Element to check. * @param name Attribute name to retrieve. * @returns The element's attribute value, or `undefined`. */ function getAttributeValue(elem, name) { var _a; return (_a = elem.attribs) === null || _a === void 0 ? void 0 : _a[name]; } /** * Checks whether an element has an attribute. * * @category Traversal * @param elem Element to check. * @param name Attribute name to look for. * @returns Returns whether `elem` has the attribute `name`. */ function hasAttrib(elem, name) { return (elem.attribs != null && Object.prototype.hasOwnProperty.call(elem.attribs, name) && elem.attribs[name] != null); } /** * Get the tag name of an element. * * @category Traversal * @param elem The element to get the name for. * @returns The tag name of `elem`. */ function getName(elem) { return elem.name; } /** * Returns the next element sibling of a node. * * @category Traversal * @param elem The element to get the next sibling of. * @returns `elem`'s next sibling that is a tag, or `null` if there is no next * sibling. */ function nextElementSibling(elem) { let { next } = elem; while (next !== null && !isTag(next)) ({ next } = next); return next; } /** * Returns the previous element sibling of a node. * * @category Traversal * @param elem The element to get the previous sibling of. * @returns `elem`'s previous sibling that is a tag, or `null` if there is no * previous sibling. */ function prevElementSibling(elem) { let { prev } = elem; while (prev !== null && !isTag(prev)) ({ prev } = prev); return prev; } /** * Remove an element from the dom * * @category Manipulation * @param elem The element to be removed */ function removeElement(elem) { if (elem.prev) elem.prev.next = elem.next; if (elem.next) elem.next.prev = elem.prev; if (elem.parent) { const childs = elem.parent.children; const childsIndex = childs.lastIndexOf(elem); if (childsIndex >= 0) { childs.splice(childsIndex, 1); } } elem.next = null; elem.prev = null; elem.parent = null; } /** * Replace an element in the dom * * @category Manipulation * @param elem The element to be replaced * @param replacement The element to be added */ function replaceElement(elem, replacement) { const prev = (replacement.prev = elem.prev); if (prev) { prev.next = replacement; } const next = (replacement.next = elem.next); if (next) { next.prev = replacement; } const parent = (replacement.parent = elem.parent); if (parent) { const childs = parent.children; childs[childs.lastIndexOf(elem)] = replacement; elem.parent = null; } } /** * Append a child to an element. * * @category Manipulation * @param parent The element to append to. * @param child The element to be added as a child. */ function appendChild(parent, child) { removeElement(child); child.next = null; child.parent = parent; if (parent.children.push(child) > 1) { const sibling = parent.children[parent.children.length - 2]; sibling.next = child; child.prev = sibling; } else { child.prev = null; } } /** * Append an element after another. * * @category Manipulation * @param elem The element to append after. * @param next The element be added. */ function append$1(elem, next) { removeElement(next); const { parent } = elem; const currNext = elem.next; next.next = currNext; next.prev = elem; elem.next = next; next.parent = parent; if (currNext) { currNext.prev = next; if (parent) { const childs = parent.children; childs.splice(childs.lastIndexOf(currNext), 0, next); } } else if (parent) { parent.children.push(next); } } /** * Prepend a child to an element. * * @category Manipulation * @param parent The element to prepend before. * @param child The element to be added as a child. */ function prependChild(parent, child) { removeElement(child); child.parent = parent; child.prev = null; if (parent.children.unshift(child) !== 1) { const sibling = parent.children[1]; sibling.prev = child; child.next = sibling; } else { child.next = null; } } /** * Prepend an element before another. * * @category Manipulation * @param elem The element to prepend before. * @param prev The element be added. */ function prepend$1(elem, prev) { removeElement(prev); const { parent } = elem; if (parent) { const childs = parent.children; childs.splice(childs.indexOf(elem), 0, prev); } if (elem.prev) { elem.prev.next = prev; } prev.parent = parent; prev.prev = elem.prev; prev.next = elem; elem.prev = prev; } /** * Search a node and its children for nodes passing a test function. If `node` is not an array, it will be wrapped in one. * * @category Querying * @param test Function to test nodes on. * @param node Node to search. Will be included in the result set if it matches. * @param recurse Also consider child nodes. * @param limit Maximum number of nodes to return. * @returns All nodes passing `test`. */ function filter$2(test, node, recurse = true, limit = Infinity) { return find$2(test, Array.isArray(node) ? node : [node], recurse, limit); } /** * Search an array of nodes and their children for nodes passing a test function. * * @category Querying * @param test Function to test nodes on. * @param nodes Array of nodes to search. * @param recurse Also consider child nodes. * @param limit Maximum number of nodes to return. * @returns All nodes passing `test`. */ function find$2(test, nodes, recurse, limit) { const result = []; /** Stack of the arrays we are looking at. */ const nodeStack = [Array.isArray(nodes) ? nodes : [nodes]]; /** Stack of the indices within the arrays. */ const indexStack = [0]; for (;;) { // First, check if the current array has any more elements to look at. if (indexStack[0] >= nodeStack[0].length) { // If we have no more arrays to look at, we are done. if (indexStack.length === 1) { return result; } // Otherwise, remove the current array from the stack. nodeStack.shift(); indexStack.shift(); // Loop back to the start to continue with the next array. continue; } const elem = nodeStack[0][indexStack[0]++]; if (test(elem)) { result.push(elem); if (--limit <= 0) return result; } if (recurse && hasChildren(elem) && elem.children.length > 0) { /* * Add the children to the stack. We are depth-first, so this is * the next array we look at. */ indexStack.unshift(0); nodeStack.unshift(elem.children); } } } /** * Finds the first element inside of an array that matches a test function. This is an alias for `Array.prototype.find`. * * @category Querying * @param test Function to test nodes on. * @param nodes Array of nodes to search. * @returns The first node in the array that passes `test`. * @deprecated Use `Array.prototype.find` directly. */ function findOneChild(test, nodes) { return nodes.find(test); } /** * Finds one element in a tree that passes a test. * * @category Querying * @param test Function to test nodes on. * @param nodes Node or array of nodes to search. * @param recurse Also consider child nodes. * @returns The first node that passes `test`. */ function findOne(test, nodes, recurse = true) { const searchedNodes = Array.isArray(nodes) ? nodes : [nodes]; for (let i = 0; i < searchedNodes.length; i++) { const node = searchedNodes[i]; if (isTag(node) && test(node)) { return node; } if (recurse && hasChildren(node) && node.children.length > 0) { const found = findOne(test, node.children, true); if (found) return found; } } return null; } /** * Checks if a tree of nodes contains at least one node passing a test. * * @category Querying * @param test Function to test nodes on. * @param nodes Array of nodes to search. * @returns Whether a tree of nodes contains at least one node passing the test. */ function existsOne(test, nodes) { return (Array.isArray(nodes) ? nodes : [nodes]).some((node) => (isTag(node) && test(node)) || (hasChildren(node) && existsOne(test, node.children))); } /** * Search an array of nodes and their children for elements passing a test function. * * Same as `find`, but limited to elements and with less options, leading to reduced complexity. * * @category Querying * @param test Function to test nodes on. * @param nodes Array of nodes to search. * @returns All nodes passing `test`. */ function findAll(test, nodes) { const result = []; const nodeStack = [Array.isArray(nodes) ? nodes : [nodes]]; const indexStack = [0]; for (;;) { if (indexStack[0] >= nodeStack[0].length) { if (nodeStack.length === 1) { return result; } // Otherwise, remove the current array from the stack. nodeStack.shift(); indexStack.shift(); // Loop back to the start to continue with the next array. continue; } const elem = nodeStack[0][indexStack[0]++]; if (isTag(elem) && test(elem)) result.push(elem); if (hasChildren(elem) && elem.children.length > 0) { indexStack.unshift(0); nodeStack.unshift(elem.children); } } } /** * A map of functions to check nodes against. */ const Checks = { tag_name(name) { if (typeof name === "function") { return (elem) => isTag(elem) && name(elem.name); } else if (name === "*") { return isTag; } return (elem) => isTag(elem) && elem.name === name; }, tag_type(type) { if (typeof type === "function") { return (elem) => type(elem.type); } return (elem) => elem.type === type; }, tag_contains(data) { if (typeof data === "function") { return (elem) => isText(elem) && data(elem.data); } return (elem) => isText(elem) && elem.data === data; }, }; /** * Returns a function to check whether a node has an attribute with a particular * value. * * @param attrib Attribute to check. * @param value Attribute value to look for. * @returns A function to check whether the a node has an attribute with a * particular value. */ function getAttribCheck(attrib, value) { if (typeof value === "function") { return (elem) => isTag(elem) && value(elem.attribs[attrib]); } return (elem) => isTag(elem) && elem.attribs[attrib] === value; } /** * Returns a function that returns `true` if either of the input functions * returns `true` for a node. * * @param a First function to combine. * @param b Second function to combine. * @returns A function taking a node and returning `true` if either of the input * functions returns `true` for the node. */ function combineFuncs(a, b) { return (elem) => a(elem) || b(elem); } /** * Returns a function that executes all checks in `options` and returns `true` * if any of them match a node. * * @param options An object describing nodes to look for. * @returns A function that executes all checks in `options` and returns `true` * if any of them match a node. */ function compileTest(options) { const funcs = Object.keys(options).map((key) => { const value = options[key]; return Object.prototype.hasOwnProperty.call(Checks, key) ? Checks[key](value) : getAttribCheck(key, value); }); return funcs.length === 0 ? null : funcs.reduce(combineFuncs); } /** * Checks whether a node matches the description in `options`. * * @category Legacy Query Functions * @param options An object describing nodes to look for. * @param node The element to test. * @returns Whether the element matches the description in `options`. */ function testElement(options, node) { const test = compileTest(options); return test ? test(node) : true; } /** * Returns all nodes that match `options`. * * @category Legacy Query Functions * @param options An object describing nodes to look for. * @param nodes Nodes to search through. * @param recurse Also consider child nodes. * @param limit Maximum number of nodes to return. * @returns All nodes that match `options`. */ function getElements(options, nodes, recurse, limit = Infinity) { const test = compileTest(options); return test ? filter$2(test, nodes, recurse, limit) : []; } /** * Returns the node with the supplied ID. * * @category Legacy Query Functions * @param id The unique ID attribute value to look for. * @param nodes Nodes to search through. * @param recurse Also consider child nodes. * @returns The node with the supplied ID. */ function getElementById(id, nodes, recurse = true) { if (!Array.isArray(nodes)) nodes = [nodes]; return findOne(getAttribCheck("id", id), nodes, recurse); } /** * Returns all nodes with the supplied `tagName`. * * @category Legacy Query Functions * @param tagName Tag name to search for. * @param nodes Nodes to search through. * @param recurse Also consider child nodes. * @param limit Maximum number of nodes to return. * @returns All nodes with the supplied `tagName`. */ function getElementsByTagName(tagName, nodes, recurse = true, limit = Infinity) { return filter$2(Checks["tag_name"](tagName), nodes, recurse, limit); } /** * Returns all nodes with the supplied `className`. * * @category Legacy Query Functions * @param className Class name to search for. * @param nodes Nodes to search through. * @param recurse Also consider child nodes. * @param limit Maximum number of nodes to return. * @returns All nodes with the supplied `className`. */ function getElementsByClassName(className, nodes, recurse = true, limit = Infinity) { return filter$2(getAttribCheck("class", className), nodes, recurse, limit); } /** * Returns all nodes with the supplied `type`. * * @category Legacy Query Functions * @param type Element type to look for. * @param nodes Nodes to search through. * @param recurse Also consider child nodes. * @param limit Maximum number of nodes to return. * @returns All nodes with the supplied `type`. */ function getElementsByTagType(type, nodes, recurse = true, limit = Infinity) { return filter$2(Checks["tag_type"](type), nodes, recurse, limit); } /** * Given an array of nodes, remove any member that is contained by another * member. * * @category Helpers * @param nodes Nodes to filter. * @returns Remaining nodes that aren't contained by other nodes. */ function removeSubsets(nodes) { let idx = nodes.length; /* * Check if each node (or one of its ancestors) is already contained in the * array. */ while (--idx >= 0) { const node = nodes[idx]; /* * Remove the node if it is not unique. * We are going through the array from the end, so we only * have to check nodes that preceed the node under consideration in the array. */ if (idx > 0 && nodes.lastIndexOf(node, idx - 1) >= 0) { nodes.splice(idx, 1); continue; } for (let ancestor = node.parent; ancestor; ancestor = ancestor.parent) { if (nodes.includes(ancestor)) { nodes.splice(idx, 1); break; } } } return nodes; } /** * @category Helpers * @see {@link http://dom.spec.whatwg.org/#dom-node-comparedocumentposition} */ var DocumentPosition; (function (DocumentPosition) { DocumentPosition[DocumentPosition["DISCONNECTED"] = 1] = "DISCONNECTED"; DocumentPosition[DocumentPosition["PRECEDING"] = 2] = "PRECEDING"; DocumentPosition[DocumentPosition["FOLLOWING"] = 4] = "FOLLOWING"; DocumentPosition[DocumentPosition["CONTAINS"] = 8] = "CONTAINS"; DocumentPosition[DocumentPosition["CONTAINED_BY"] = 16] = "CONTAINED_BY"; })(DocumentPosition || (DocumentPosition = {})); /** * Compare the position of one node against another node in any other document, * returning a bitmask with the values from {@link DocumentPosition}. * * Document order: * > There is an ordering, document order, defined on all the nodes in the * > document corresponding to the order in which the first character of the * > XML representation of each node occurs in the XML representation of the * > document after expansion of general entities. Thus, the document element * > node will be the first node. Element nodes occur before their children. * > Thus, document order orders element nodes in order of the occurrence of * > their start-tag in the XML (after expansion of entities). The attribute * > nodes of an element occur after the element and before its children. The * > relative order of attribute nodes is implementation-dependent. * * Source: * http://www.w3.org/TR/DOM-Level-3-Core/glossary.html#dt-document-order * * @category Helpers * @param nodeA The first node to use in the comparison * @param nodeB The second node to use in the comparison * @returns A bitmask describing the input nodes' relative position. * * See http://dom.spec.whatwg.org/#dom-node-comparedocumentposition for * a description of these values. */ function compareDocumentPosition(nodeA, nodeB) { const aParents = []; const bParents = []; if (nodeA === nodeB) { return 0; } let current = hasChildren(nodeA) ? nodeA : nodeA.parent; while (current) { aParents.unshift(current); current = current.parent; } current = hasChildren(nodeB) ? nodeB : nodeB.parent; while (current) { bParents.unshift(current); current = current.parent; } const maxIdx = Math.min(aParents.length, bParents.length); let idx = 0; while (idx < maxIdx && aParents[idx] === bParents[idx]) { idx++; } if (idx === 0) { return DocumentPosition.DISCONNECTED; } const sharedParent = aParents[idx - 1]; const siblings = sharedParent.children; const aSibling = aParents[idx]; const bSibling = bParents[idx]; if (siblings.indexOf(aSibling) > siblings.indexOf(bSibling)) { if (sharedParent === nodeB) { return DocumentPosition.FOLLOWING | DocumentPosition.CONTAINED_BY; } return DocumentPosition.FOLLOWING; } if (sharedParent === nodeA) { return DocumentPosition.PRECEDI