@jcottam/html-metadata
Version:
This JavaScript library simplifies the extraction of HTML Meta and OpenGraph tags from HTML content or URLs.
1,695 lines (1,681 loc) • 2.23 MB
JavaScript
'use strict';
var require$$0$3 = require('node:stream');
var require$$0$1 = require('buffer');
var require$$1$2 = require('string_decoder');
var require$$0$2 = require('node:assert');
var require$$0$4 = require('node:net');
var require$$2$2 = require('node:http');
var require$$5$1 = require('node:querystring');
var require$$6$1 = require('node:events');
var require$$0$6 = require('node:diagnostics_channel');
var require$$0$5 = require('node:util');
var require$$4$1 = require('node:tls');
var require$$0$7 = require('node:buffer');
var require$$0$8 = require('node:zlib');
var require$$5$2 = require('node:perf_hooks');
var require$$8 = require('node:util/types');
var require$$1$3 = require('node:worker_threads');
var require$$1$4 = require('node:async_hooks');
var require$$1$5 = require('node:console');
var require$$0$9 = require('node:fs/promises');
var require$$1$6 = require('node:path');
var require$$2$3 = require('node:timers');
var require$$1$7 = require('node:dns');
var require$$2$4 = require('node:sqlite');
const defaultOpts$2 = {
_useHtmlParser2: false,
};
/**
* Flatten the options for Cheerio.
*
* This will set `_useHtmlParser2` to true if `xml` is set to true.
*
* @param options - The options to flatten.
* @param baseOptions - The base options to use.
* @returns The flattened options.
*/
function flattenOptions(options, baseOptions) {
if (!options) {
return baseOptions !== null && baseOptions !== void 0 ? baseOptions : defaultOpts$2;
}
const opts = {
_useHtmlParser2: !!options.xmlMode,
...baseOptions,
...options,
};
if (options.xml) {
opts._useHtmlParser2 = true;
opts.xmlMode = true;
if (options.xml !== true) {
Object.assign(opts, options.xml);
}
}
else if (options.xmlMode) {
opts._useHtmlParser2 = true;
}
return opts;
}
/** Types of elements found in htmlparser2's DOM */
var ElementType;
(function (ElementType) {
/** Type for the root element of a document */
ElementType["Root"] = "root";
/** Type for Text */
ElementType["Text"] = "text";
/** Type for <? ... ?> */
ElementType["Directive"] = "directive";
/** Type for <!-- ... --> */
ElementType["Comment"] = "comment";
/** Type for <script> tags */
ElementType["Script"] = "script";
/** Type for <style> tags */
ElementType["Style"] = "style";
/** Type for Any tag */
ElementType["Tag"] = "tag";
/** Type for <![CDATA[ ... ]]> */
ElementType["CDATA"] = "cdata";
/** Type for <!doctype ...> */
ElementType["Doctype"] = "doctype";
})(ElementType || (ElementType = {}));
/**
* Tests whether an element is a tag or not.
*
* @param elem Element to test
*/
function isTag$1(elem) {
return (elem.type === ElementType.Tag ||
elem.type === ElementType.Script ||
elem.type === ElementType.Style);
}
// Exports for backwards compatibility
/** Type for the root element of a document */
const Root = ElementType.Root;
/** Type for Text */
const Text$1 = ElementType.Text;
/** Type for <? ... ?> */
const Directive = ElementType.Directive;
/** Type for <!-- ... --> */
const Comment$1 = ElementType.Comment;
/** Type for <script> tags */
const Script = ElementType.Script;
/** Type for <style> tags */
const Style = ElementType.Style;
/** Type for Any tag */
const Tag = ElementType.Tag;
/** Type for <![CDATA[ ... ]]> */
const CDATA$1 = ElementType.CDATA;
/** Type for <!doctype ...> */
const Doctype = ElementType.Doctype;
/**
* This object will be used as the prototype for Nodes when creating a
* DOM-Level-1-compliant structure.
*/
class Node {
constructor() {
/** Parent of the node */
this.parent = null;
/** Previous sibling */
this.prev = null;
/** Next sibling */
this.next = null;
/** The start index of the node. Requires `withStartIndices` on the handler to be `true. */
this.startIndex = null;
/** The end index of the node. Requires `withEndIndices` on the handler to be `true. */
this.endIndex = null;
}
// Read-write aliases for properties
/**
* Same as {@link parent}.
* [DOM spec](https://dom.spec.whatwg.org)-compatible alias.
*/
get parentNode() {
return this.parent;
}
set parentNode(parent) {
this.parent = parent;
}
/**
* Same as {@link prev}.
* [DOM spec](https://dom.spec.whatwg.org)-compatible alias.
*/
get previousSibling() {
return this.prev;
}
set previousSibling(prev) {
this.prev = prev;
}
/**
* Same as {@link next}.
* [DOM spec](https://dom.spec.whatwg.org)-compatible alias.
*/
get nextSibling() {
return this.next;
}
set nextSibling(next) {
this.next = next;
}
/**
* Clone this node, and optionally its children.
*
* @param recursive Clone child nodes as well.
* @returns A clone of the node.
*/
cloneNode(recursive = false) {
return cloneNode(this, recursive);
}
}
/**
* A node that contains some data.
*/
class DataNode extends Node {
/**
* @param data The content of the data node
*/
constructor(data) {
super();
this.data = data;
}
/**
* Same as {@link data}.
* [DOM spec](https://dom.spec.whatwg.org)-compatible alias.
*/
get nodeValue() {
return this.data;
}
set nodeValue(data) {
this.data = data;
}
}
/**
* Text within the document.
*/
class Text extends DataNode {
constructor() {
super(...arguments);
this.type = ElementType.Text;
}
get nodeType() {
return 3;
}
}
/**
* Comments within the document.
*/
class Comment extends DataNode {
constructor() {
super(...arguments);
this.type = ElementType.Comment;
}
get nodeType() {
return 8;
}
}
/**
* Processing instructions, including doc types.
*/
class ProcessingInstruction extends DataNode {
constructor(name, data) {
super(data);
this.name = name;
this.type = ElementType.Directive;
}
get nodeType() {
return 1;
}
}
/**
* A `Node` that can have children.
*/
class NodeWithChildren extends Node {
/**
* @param children Children of the node. Only certain node types can have children.
*/
constructor(children) {
super();
this.children = children;
}
// Aliases
/** First child of the node. */
get firstChild() {
var _a;
return (_a = this.children[0]) !== null && _a !== void 0 ? _a : null;
}
/** Last child of the node. */
get lastChild() {
return this.children.length > 0
? this.children[this.children.length - 1]
: null;
}
/**
* Same as {@link children}.
* [DOM spec](https://dom.spec.whatwg.org)-compatible alias.
*/
get childNodes() {
return this.children;
}
set childNodes(children) {
this.children = children;
}
}
class CDATA extends NodeWithChildren {
constructor() {
super(...arguments);
this.type = ElementType.CDATA;
}
get nodeType() {
return 4;
}
}
/**
* The root node of the document.
*/
class Document extends NodeWithChildren {
constructor() {
super(...arguments);
this.type = ElementType.Root;
}
get nodeType() {
return 9;
}
}
/**
* An element within the DOM.
*/
class Element extends NodeWithChildren {
/**
* @param name Name of the tag, eg. `div`, `span`.
* @param attribs Object mapping attribute names to attribute values.
* @param children Children of the node.
*/
constructor(name, attribs, children = [], type = name === "script"
? ElementType.Script
: name === "style"
? ElementType.Style
: ElementType.Tag) {
super(children);
this.name = name;
this.attribs = attribs;
this.type = type;
}
get nodeType() {
return 1;
}
// DOM Level 1 aliases
/**
* Same as {@link name}.
* [DOM spec](https://dom.spec.whatwg.org)-compatible alias.
*/
get tagName() {
return this.name;
}
set tagName(name) {
this.name = name;
}
get attributes() {
return Object.keys(this.attribs).map((name) => {
var _a, _b;
return ({
name,
value: this.attribs[name],
namespace: (_a = this["x-attribsNamespace"]) === null || _a === void 0 ? void 0 : _a[name],
prefix: (_b = this["x-attribsPrefix"]) === null || _b === void 0 ? void 0 : _b[name],
});
});
}
}
/**
* @param node Node to check.
* @returns `true` if the node is a `Element`, `false` otherwise.
*/
function isTag(node) {
return isTag$1(node);
}
/**
* @param node Node to check.
* @returns `true` if the node has the type `CDATA`, `false` otherwise.
*/
function isCDATA(node) {
return node.type === ElementType.CDATA;
}
/**
* @param node Node to check.
* @returns `true` if the node has the type `Text`, `false` otherwise.
*/
function isText(node) {
return node.type === ElementType.Text;
}
/**
* @param node Node to check.
* @returns `true` if the node has the type `Comment`, `false` otherwise.
*/
function isComment(node) {
return node.type === ElementType.Comment;
}
/**
* @param node Node to check.
* @returns `true` if the node has the type `ProcessingInstruction`, `false` otherwise.
*/
function isDirective(node) {
return node.type === ElementType.Directive;
}
/**
* @param node Node to check.
* @returns `true` if the node has the type `ProcessingInstruction`, `false` otherwise.
*/
function isDocument(node) {
return node.type === ElementType.Root;
}
/**
* @param node Node to check.
* @returns `true` if the node has children, `false` otherwise.
*/
function hasChildren(node) {
return Object.prototype.hasOwnProperty.call(node, "children");
}
/**
* Clone a node, and optionally its children.
*
* @param recursive Clone child nodes as well.
* @returns A clone of the node.
*/
function cloneNode(node, recursive = false) {
let result;
if (isText(node)) {
result = new Text(node.data);
}
else if (isComment(node)) {
result = new Comment(node.data);
}
else if (isTag(node)) {
const children = recursive ? cloneChildren(node.children) : [];
const clone = new Element(node.name, { ...node.attribs }, children);
children.forEach((child) => (child.parent = clone));
if (node.namespace != null) {
clone.namespace = node.namespace;
}
if (node["x-attribsNamespace"]) {
clone["x-attribsNamespace"] = { ...node["x-attribsNamespace"] };
}
if (node["x-attribsPrefix"]) {
clone["x-attribsPrefix"] = { ...node["x-attribsPrefix"] };
}
result = clone;
}
else if (isCDATA(node)) {
const children = recursive ? cloneChildren(node.children) : [];
const clone = new CDATA(children);
children.forEach((child) => (child.parent = clone));
result = clone;
}
else if (isDocument(node)) {
const children = recursive ? cloneChildren(node.children) : [];
const clone = new Document(children);
children.forEach((child) => (child.parent = clone));
if (node["x-mode"]) {
clone["x-mode"] = node["x-mode"];
}
result = clone;
}
else if (isDirective(node)) {
const instruction = new ProcessingInstruction(node.name, node.data);
if (node["x-name"] != null) {
instruction["x-name"] = node["x-name"];
instruction["x-publicId"] = node["x-publicId"];
instruction["x-systemId"] = node["x-systemId"];
}
result = instruction;
}
else {
throw new Error(`Not implemented yet: ${node.type}`);
}
result.startIndex = node.startIndex;
result.endIndex = node.endIndex;
if (node.sourceCodeLocation != null) {
result.sourceCodeLocation = node.sourceCodeLocation;
}
return result;
}
function cloneChildren(childs) {
const children = childs.map((child) => cloneNode(child, true));
for (let i = 1; i < children.length; i++) {
children[i].prev = children[i - 1];
children[i - 1].next = children[i];
}
return children;
}
// Default options
const defaultOpts$1 = {
withStartIndices: false,
withEndIndices: false,
xmlMode: false,
};
class DomHandler {
/**
* @param callback Called once parsing has completed.
* @param options Settings for the handler.
* @param elementCB Callback whenever a tag is closed.
*/
constructor(callback, options, elementCB) {
/** The elements of the DOM */
this.dom = [];
/** The root element for the DOM */
this.root = new Document(this.dom);
/** Indicated whether parsing has been completed. */
this.done = false;
/** Stack of open tags. */
this.tagStack = [this.root];
/** A data node that is still being written to. */
this.lastNode = null;
/** Reference to the parser instance. Used for location information. */
this.parser = null;
// Make it possible to skip arguments, for backwards-compatibility
if (typeof options === "function") {
elementCB = options;
options = defaultOpts$1;
}
if (typeof callback === "object") {
options = callback;
callback = undefined;
}
this.callback = callback !== null && callback !== void 0 ? callback : null;
this.options = options !== null && options !== void 0 ? options : defaultOpts$1;
this.elementCB = elementCB !== null && elementCB !== void 0 ? elementCB : null;
}
onparserinit(parser) {
this.parser = parser;
}
// Resets the handler back to starting state
onreset() {
this.dom = [];
this.root = new Document(this.dom);
this.done = false;
this.tagStack = [this.root];
this.lastNode = null;
this.parser = null;
}
// Signals the handler that parsing is done
onend() {
if (this.done)
return;
this.done = true;
this.parser = null;
this.handleCallback(null);
}
onerror(error) {
this.handleCallback(error);
}
onclosetag() {
this.lastNode = null;
const elem = this.tagStack.pop();
if (this.options.withEndIndices) {
elem.endIndex = this.parser.endIndex;
}
if (this.elementCB)
this.elementCB(elem);
}
onopentag(name, attribs) {
const type = this.options.xmlMode ? ElementType.Tag : undefined;
const element = new Element(name, attribs, undefined, type);
this.addNode(element);
this.tagStack.push(element);
}
ontext(data) {
const { lastNode } = this;
if (lastNode && lastNode.type === ElementType.Text) {
lastNode.data += data;
if (this.options.withEndIndices) {
lastNode.endIndex = this.parser.endIndex;
}
}
else {
const node = new Text(data);
this.addNode(node);
this.lastNode = node;
}
}
oncomment(data) {
if (this.lastNode && this.lastNode.type === ElementType.Comment) {
this.lastNode.data += data;
return;
}
const node = new Comment(data);
this.addNode(node);
this.lastNode = node;
}
oncommentend() {
this.lastNode = null;
}
oncdatastart() {
const text = new Text("");
const node = new CDATA([text]);
this.addNode(node);
text.parent = node;
this.lastNode = text;
}
oncdataend() {
this.lastNode = null;
}
onprocessinginstruction(name, data) {
const node = new ProcessingInstruction(name, data);
this.addNode(node);
}
handleCallback(error) {
if (typeof this.callback === "function") {
this.callback(error, this.dom);
}
else if (error) {
throw error;
}
}
addNode(node) {
const parent = this.tagStack[this.tagStack.length - 1];
const previousSibling = parent.children[parent.children.length - 1];
if (this.options.withStartIndices) {
node.startIndex = this.parser.startIndex;
}
if (this.options.withEndIndices) {
node.endIndex = this.parser.endIndex;
}
parent.children.push(node);
if (previousSibling) {
node.prev = previousSibling;
previousSibling.next = node;
}
node.parent = parent;
this.lastNode = null;
}
}
const xmlReplacer = /["&'<>$\x80-\uFFFF]/g;
const xmlCodeMap = new Map([
[34, """],
[38, "&"],
[39, "'"],
[60, "<"],
[62, ">"],
]);
// For compatibility with node < 4, we wrap `codePointAt`
const getCodePoint =
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
String.prototype.codePointAt != null
? (str, index) => str.codePointAt(index)
: // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
(c, index) => (c.charCodeAt(index) & 0xfc00) === 0xd800
? (c.charCodeAt(index) - 0xd800) * 0x400 +
c.charCodeAt(index + 1) -
0xdc00 +
0x10000
: c.charCodeAt(index);
/**
* Encodes all non-ASCII characters, as well as characters not valid in XML
* documents using XML entities.
*
* If a character has no equivalent entity, a
* numeric hexadecimal reference (eg. `ü`) will be used.
*/
function encodeXML(str) {
let ret = "";
let lastIdx = 0;
let match;
while ((match = xmlReplacer.exec(str)) !== null) {
const i = match.index;
const char = str.charCodeAt(i);
const next = xmlCodeMap.get(char);
if (next !== undefined) {
ret += str.substring(lastIdx, i) + next;
lastIdx = i + 1;
}
else {
ret += `${str.substring(lastIdx, i)}&#x${getCodePoint(str, i).toString(16)};`;
// Increase by 1 if we have a surrogate pair
lastIdx = xmlReplacer.lastIndex += Number((char & 0xfc00) === 0xd800);
}
}
return ret + str.substr(lastIdx);
}
/**
* Creates a function that escapes all characters matched by the given regular
* expression using the given map of characters to escape to their entities.
*
* @param regex Regular expression to match characters to escape.
* @param map Map of characters to escape to their entities.
*
* @returns Function that escapes all characters matched by the given regular
* expression using the given map of characters to escape to their entities.
*/
function getEscaper$1(regex, map) {
return function escape(data) {
let match;
let lastIdx = 0;
let result = "";
while ((match = regex.exec(data))) {
if (lastIdx !== match.index) {
result += data.substring(lastIdx, match.index);
}
// We know that this character will be in the map.
result += map.get(match[0].charCodeAt(0));
// Every match will be of length 1
lastIdx = match.index + 1;
}
return result + data.substring(lastIdx);
};
}
/**
* Encodes all characters that have to be escaped in HTML attributes,
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
*
* @param data String to escape.
*/
const escapeAttribute$1 = getEscaper$1(/["&\u00A0]/g, new Map([
[34, """],
[38, "&"],
[160, " "],
]));
/**
* Encodes all characters that have to be escaped in HTML text,
* following {@link https://html.spec.whatwg.org/multipage/parsing.html#escapingString}.
*
* @param data String to escape.
*/
const escapeText$1 = getEscaper$1(/[&<>\u00A0]/g, new Map([
[38, "&"],
[60, "<"],
[62, ">"],
[160, " "],
]));
const elementNames = new Map([
"altGlyph",
"altGlyphDef",
"altGlyphItem",
"animateColor",
"animateMotion",
"animateTransform",
"clipPath",
"feBlend",
"feColorMatrix",
"feComponentTransfer",
"feComposite",
"feConvolveMatrix",
"feDiffuseLighting",
"feDisplacementMap",
"feDistantLight",
"feDropShadow",
"feFlood",
"feFuncA",
"feFuncB",
"feFuncG",
"feFuncR",
"feGaussianBlur",
"feImage",
"feMerge",
"feMergeNode",
"feMorphology",
"feOffset",
"fePointLight",
"feSpecularLighting",
"feSpotLight",
"feTile",
"feTurbulence",
"foreignObject",
"glyphRef",
"linearGradient",
"radialGradient",
"textPath",
].map((val) => [val.toLowerCase(), val]));
const attributeNames = new Map([
"definitionURL",
"attributeName",
"attributeType",
"baseFrequency",
"baseProfile",
"calcMode",
"clipPathUnits",
"diffuseConstant",
"edgeMode",
"filterUnits",
"glyphRef",
"gradientTransform",
"gradientUnits",
"kernelMatrix",
"kernelUnitLength",
"keyPoints",
"keySplines",
"keyTimes",
"lengthAdjust",
"limitingConeAngle",
"markerHeight",
"markerUnits",
"markerWidth",
"maskContentUnits",
"maskUnits",
"numOctaves",
"pathLength",
"patternContentUnits",
"patternTransform",
"patternUnits",
"pointsAtX",
"pointsAtY",
"pointsAtZ",
"preserveAlpha",
"preserveAspectRatio",
"primitiveUnits",
"refX",
"refY",
"repeatCount",
"repeatDur",
"requiredExtensions",
"requiredFeatures",
"specularConstant",
"specularExponent",
"spreadMethod",
"startOffset",
"stdDeviation",
"stitchTiles",
"surfaceScale",
"systemLanguage",
"tableValues",
"targetX",
"targetY",
"textLength",
"viewBox",
"viewTarget",
"xChannelSelector",
"yChannelSelector",
"zoomAndPan",
].map((val) => [val.toLowerCase(), val]));
/*
* Module dependencies
*/
const unencodedElements = new Set([
"style",
"script",
"xmp",
"iframe",
"noembed",
"noframes",
"plaintext",
"noscript",
]);
function replaceQuotes(value) {
return value.replace(/"/g, """);
}
/**
* Format attributes
*/
function formatAttributes(attributes, opts) {
var _a;
if (!attributes)
return;
const encode = ((_a = opts.encodeEntities) !== null && _a !== void 0 ? _a : opts.decodeEntities) === false
? replaceQuotes
: opts.xmlMode || opts.encodeEntities !== "utf8"
? encodeXML
: escapeAttribute$1;
return Object.keys(attributes)
.map((key) => {
var _a, _b;
const value = (_a = attributes[key]) !== null && _a !== void 0 ? _a : "";
if (opts.xmlMode === "foreign") {
/* Fix up mixed-case attribute names */
key = (_b = attributeNames.get(key)) !== null && _b !== void 0 ? _b : key;
}
if (!opts.emptyAttrs && !opts.xmlMode && value === "") {
return key;
}
return `${key}="${encode(value)}"`;
})
.join(" ");
}
/**
* Self-enclosing tags
*/
const singleTag = new Set([
"area",
"base",
"basefont",
"br",
"col",
"command",
"embed",
"frame",
"hr",
"img",
"input",
"isindex",
"keygen",
"link",
"meta",
"param",
"source",
"track",
"wbr",
]);
/**
* Renders a DOM node or an array of DOM nodes to a string.
*
* Can be thought of as the equivalent of the `outerHTML` of the passed node(s).
*
* @param node Node to be rendered.
* @param options Changes serialization behavior
*/
function render$1(node, options = {}) {
const nodes = "length" in node ? node : [node];
let output = "";
for (let i = 0; i < nodes.length; i++) {
output += renderNode(nodes[i], options);
}
return output;
}
function renderNode(node, options) {
switch (node.type) {
case Root:
return render$1(node.children, options);
// @ts-expect-error We don't use `Doctype` yet
case Doctype:
case Directive:
return renderDirective(node);
case Comment$1:
return renderComment(node);
case CDATA$1:
return renderCdata(node);
case Script:
case Style:
case Tag:
return renderTag(node, options);
case Text$1:
return renderText(node, options);
}
}
const foreignModeIntegrationPoints = new Set([
"mi",
"mo",
"mn",
"ms",
"mtext",
"annotation-xml",
"foreignObject",
"desc",
"title",
]);
const foreignElements = new Set(["svg", "math"]);
function renderTag(elem, opts) {
var _a;
// Handle SVG / MathML in HTML
if (opts.xmlMode === "foreign") {
/* Fix up mixed-case element names */
elem.name = (_a = elementNames.get(elem.name)) !== null && _a !== void 0 ? _a : elem.name;
/* Exit foreign mode at integration points */
if (elem.parent &&
foreignModeIntegrationPoints.has(elem.parent.name)) {
opts = { ...opts, xmlMode: false };
}
}
if (!opts.xmlMode && foreignElements.has(elem.name)) {
opts = { ...opts, xmlMode: "foreign" };
}
let tag = `<${elem.name}`;
const attribs = formatAttributes(elem.attribs, opts);
if (attribs) {
tag += ` ${attribs}`;
}
if (elem.children.length === 0 &&
(opts.xmlMode
? // In XML mode or foreign mode, and user hasn't explicitly turned off self-closing tags
opts.selfClosingTags !== false
: // User explicitly asked for self-closing tags, even in HTML mode
opts.selfClosingTags && singleTag.has(elem.name))) {
if (!opts.xmlMode)
tag += " ";
tag += "/>";
}
else {
tag += ">";
if (elem.children.length > 0) {
tag += render$1(elem.children, opts);
}
if (opts.xmlMode || !singleTag.has(elem.name)) {
tag += `</${elem.name}>`;
}
}
return tag;
}
function renderDirective(elem) {
return `<${elem.data}>`;
}
function renderText(elem, opts) {
var _a;
let data = elem.data || "";
// If entities weren't decoded, no need to encode them back
if (((_a = opts.encodeEntities) !== null && _a !== void 0 ? _a : opts.decodeEntities) !== false &&
!(!opts.xmlMode &&
elem.parent &&
unencodedElements.has(elem.parent.name))) {
data =
opts.xmlMode || opts.encodeEntities !== "utf8"
? encodeXML(data)
: escapeText$1(data);
}
return data;
}
function renderCdata(elem) {
return `<![CDATA[${elem.children[0].data}]]>`;
}
function renderComment(elem) {
return `<!--${elem.data}-->`;
}
/**
* @category Stringify
* @deprecated Use the `dom-serializer` module directly.
* @param node Node to get the outer HTML of.
* @param options Options for serialization.
* @returns `node`'s outer HTML.
*/
function getOuterHTML(node, options) {
return render$1(node, options);
}
/**
* @category Stringify
* @deprecated Use the `dom-serializer` module directly.
* @param node Node to get the inner HTML of.
* @param options Options for serialization.
* @returns `node`'s inner HTML.
*/
function getInnerHTML(node, options) {
return hasChildren(node)
? node.children.map((node) => getOuterHTML(node, options)).join("")
: "";
}
/**
* Get a node's inner text. Same as `textContent`, but inserts newlines for `<br>` tags. Ignores comments.
*
* @category Stringify
* @deprecated Use `textContent` instead.
* @param node Node to get the inner text of.
* @returns `node`'s inner text.
*/
function getText(node) {
if (Array.isArray(node))
return node.map(getText).join("");
if (isTag(node))
return node.name === "br" ? "\n" : getText(node.children);
if (isCDATA(node))
return getText(node.children);
if (isText(node))
return node.data;
return "";
}
/**
* Get a node's text content. Ignores comments.
*
* @category Stringify
* @param node Node to get the text content of.
* @returns `node`'s text content.
* @see {@link https://developer.mozilla.org/en-US/docs/Web/API/Node/textContent}
*/
function textContent(node) {
if (Array.isArray(node))
return node.map(textContent).join("");
if (hasChildren(node) && !isComment(node)) {
return textContent(node.children);
}
if (isText(node))
return node.data;
return "";
}
/**
* Get a node's inner text, ignoring `<script>` and `<style>` tags. Ignores comments.
*
* @category Stringify
* @param node Node to get the inner text of.
* @returns `node`'s inner text.
* @see {@link https://developer.mozilla.org/en-US/docs/Web/API/Node/innerText}
*/
function innerText(node) {
if (Array.isArray(node))
return node.map(innerText).join("");
if (hasChildren(node) && (node.type === ElementType.Tag || isCDATA(node))) {
return innerText(node.children);
}
if (isText(node))
return node.data;
return "";
}
/**
* Get a node's children.
*
* @category Traversal
* @param elem Node to get the children of.
* @returns `elem`'s children, or an empty array.
*/
function getChildren(elem) {
return hasChildren(elem) ? elem.children : [];
}
/**
* Get a node's parent.
*
* @category Traversal
* @param elem Node to get the parent of.
* @returns `elem`'s parent node, or `null` if `elem` is a root node.
*/
function getParent(elem) {
return elem.parent || null;
}
/**
* Gets an elements siblings, including the element itself.
*
* Attempts to get the children through the element's parent first. If we don't
* have a parent (the element is a root node), we walk the element's `prev` &
* `next` to get all remaining nodes.
*
* @category Traversal
* @param elem Element to get the siblings of.
* @returns `elem`'s siblings, including `elem`.
*/
function getSiblings(elem) {
const parent = getParent(elem);
if (parent != null)
return getChildren(parent);
const siblings = [elem];
let { prev, next } = elem;
while (prev != null) {
siblings.unshift(prev);
({ prev } = prev);
}
while (next != null) {
siblings.push(next);
({ next } = next);
}
return siblings;
}
/**
* Gets an attribute from an element.
*
* @category Traversal
* @param elem Element to check.
* @param name Attribute name to retrieve.
* @returns The element's attribute value, or `undefined`.
*/
function getAttributeValue(elem, name) {
var _a;
return (_a = elem.attribs) === null || _a === void 0 ? void 0 : _a[name];
}
/**
* Checks whether an element has an attribute.
*
* @category Traversal
* @param elem Element to check.
* @param name Attribute name to look for.
* @returns Returns whether `elem` has the attribute `name`.
*/
function hasAttrib(elem, name) {
return (elem.attribs != null &&
Object.prototype.hasOwnProperty.call(elem.attribs, name) &&
elem.attribs[name] != null);
}
/**
* Get the tag name of an element.
*
* @category Traversal
* @param elem The element to get the name for.
* @returns The tag name of `elem`.
*/
function getName(elem) {
return elem.name;
}
/**
* Returns the next element sibling of a node.
*
* @category Traversal
* @param elem The element to get the next sibling of.
* @returns `elem`'s next sibling that is a tag, or `null` if there is no next
* sibling.
*/
function nextElementSibling(elem) {
let { next } = elem;
while (next !== null && !isTag(next))
({ next } = next);
return next;
}
/**
* Returns the previous element sibling of a node.
*
* @category Traversal
* @param elem The element to get the previous sibling of.
* @returns `elem`'s previous sibling that is a tag, or `null` if there is no
* previous sibling.
*/
function prevElementSibling(elem) {
let { prev } = elem;
while (prev !== null && !isTag(prev))
({ prev } = prev);
return prev;
}
/**
* Remove an element from the dom
*
* @category Manipulation
* @param elem The element to be removed
*/
function removeElement(elem) {
if (elem.prev)
elem.prev.next = elem.next;
if (elem.next)
elem.next.prev = elem.prev;
if (elem.parent) {
const childs = elem.parent.children;
const childsIndex = childs.lastIndexOf(elem);
if (childsIndex >= 0) {
childs.splice(childsIndex, 1);
}
}
elem.next = null;
elem.prev = null;
elem.parent = null;
}
/**
* Replace an element in the dom
*
* @category Manipulation
* @param elem The element to be replaced
* @param replacement The element to be added
*/
function replaceElement(elem, replacement) {
const prev = (replacement.prev = elem.prev);
if (prev) {
prev.next = replacement;
}
const next = (replacement.next = elem.next);
if (next) {
next.prev = replacement;
}
const parent = (replacement.parent = elem.parent);
if (parent) {
const childs = parent.children;
childs[childs.lastIndexOf(elem)] = replacement;
elem.parent = null;
}
}
/**
* Append a child to an element.
*
* @category Manipulation
* @param parent The element to append to.
* @param child The element to be added as a child.
*/
function appendChild(parent, child) {
removeElement(child);
child.next = null;
child.parent = parent;
if (parent.children.push(child) > 1) {
const sibling = parent.children[parent.children.length - 2];
sibling.next = child;
child.prev = sibling;
}
else {
child.prev = null;
}
}
/**
* Append an element after another.
*
* @category Manipulation
* @param elem The element to append after.
* @param next The element be added.
*/
function append$1(elem, next) {
removeElement(next);
const { parent } = elem;
const currNext = elem.next;
next.next = currNext;
next.prev = elem;
elem.next = next;
next.parent = parent;
if (currNext) {
currNext.prev = next;
if (parent) {
const childs = parent.children;
childs.splice(childs.lastIndexOf(currNext), 0, next);
}
}
else if (parent) {
parent.children.push(next);
}
}
/**
* Prepend a child to an element.
*
* @category Manipulation
* @param parent The element to prepend before.
* @param child The element to be added as a child.
*/
function prependChild(parent, child) {
removeElement(child);
child.parent = parent;
child.prev = null;
if (parent.children.unshift(child) !== 1) {
const sibling = parent.children[1];
sibling.prev = child;
child.next = sibling;
}
else {
child.next = null;
}
}
/**
* Prepend an element before another.
*
* @category Manipulation
* @param elem The element to prepend before.
* @param prev The element be added.
*/
function prepend$1(elem, prev) {
removeElement(prev);
const { parent } = elem;
if (parent) {
const childs = parent.children;
childs.splice(childs.indexOf(elem), 0, prev);
}
if (elem.prev) {
elem.prev.next = prev;
}
prev.parent = parent;
prev.prev = elem.prev;
prev.next = elem;
elem.prev = prev;
}
/**
* Search a node and its children for nodes passing a test function. If `node` is not an array, it will be wrapped in one.
*
* @category Querying
* @param test Function to test nodes on.
* @param node Node to search. Will be included in the result set if it matches.
* @param recurse Also consider child nodes.
* @param limit Maximum number of nodes to return.
* @returns All nodes passing `test`.
*/
function filter$2(test, node, recurse = true, limit = Infinity) {
return find$2(test, Array.isArray(node) ? node : [node], recurse, limit);
}
/**
* Search an array of nodes and their children for nodes passing a test function.
*
* @category Querying
* @param test Function to test nodes on.
* @param nodes Array of nodes to search.
* @param recurse Also consider child nodes.
* @param limit Maximum number of nodes to return.
* @returns All nodes passing `test`.
*/
function find$2(test, nodes, recurse, limit) {
const result = [];
/** Stack of the arrays we are looking at. */
const nodeStack = [Array.isArray(nodes) ? nodes : [nodes]];
/** Stack of the indices within the arrays. */
const indexStack = [0];
for (;;) {
// First, check if the current array has any more elements to look at.
if (indexStack[0] >= nodeStack[0].length) {
// If we have no more arrays to look at, we are done.
if (indexStack.length === 1) {
return result;
}
// Otherwise, remove the current array from the stack.
nodeStack.shift();
indexStack.shift();
// Loop back to the start to continue with the next array.
continue;
}
const elem = nodeStack[0][indexStack[0]++];
if (test(elem)) {
result.push(elem);
if (--limit <= 0)
return result;
}
if (recurse && hasChildren(elem) && elem.children.length > 0) {
/*
* Add the children to the stack. We are depth-first, so this is
* the next array we look at.
*/
indexStack.unshift(0);
nodeStack.unshift(elem.children);
}
}
}
/**
* Finds the first element inside of an array that matches a test function. This is an alias for `Array.prototype.find`.
*
* @category Querying
* @param test Function to test nodes on.
* @param nodes Array of nodes to search.
* @returns The first node in the array that passes `test`.
* @deprecated Use `Array.prototype.find` directly.
*/
function findOneChild(test, nodes) {
return nodes.find(test);
}
/**
* Finds one element in a tree that passes a test.
*
* @category Querying
* @param test Function to test nodes on.
* @param nodes Node or array of nodes to search.
* @param recurse Also consider child nodes.
* @returns The first node that passes `test`.
*/
function findOne(test, nodes, recurse = true) {
const searchedNodes = Array.isArray(nodes) ? nodes : [nodes];
for (let i = 0; i < searchedNodes.length; i++) {
const node = searchedNodes[i];
if (isTag(node) && test(node)) {
return node;
}
if (recurse && hasChildren(node) && node.children.length > 0) {
const found = findOne(test, node.children, true);
if (found)
return found;
}
}
return null;
}
/**
* Checks if a tree of nodes contains at least one node passing a test.
*
* @category Querying
* @param test Function to test nodes on.
* @param nodes Array of nodes to search.
* @returns Whether a tree of nodes contains at least one node passing the test.
*/
function existsOne(test, nodes) {
return (Array.isArray(nodes) ? nodes : [nodes]).some((node) => (isTag(node) && test(node)) ||
(hasChildren(node) && existsOne(test, node.children)));
}
/**
* Search an array of nodes and their children for elements passing a test function.
*
* Same as `find`, but limited to elements and with less options, leading to reduced complexity.
*
* @category Querying
* @param test Function to test nodes on.
* @param nodes Array of nodes to search.
* @returns All nodes passing `test`.
*/
function findAll(test, nodes) {
const result = [];
const nodeStack = [Array.isArray(nodes) ? nodes : [nodes]];
const indexStack = [0];
for (;;) {
if (indexStack[0] >= nodeStack[0].length) {
if (nodeStack.length === 1) {
return result;
}
// Otherwise, remove the current array from the stack.
nodeStack.shift();
indexStack.shift();
// Loop back to the start to continue with the next array.
continue;
}
const elem = nodeStack[0][indexStack[0]++];
if (isTag(elem) && test(elem))
result.push(elem);
if (hasChildren(elem) && elem.children.length > 0) {
indexStack.unshift(0);
nodeStack.unshift(elem.children);
}
}
}
/**
* A map of functions to check nodes against.
*/
const Checks = {
tag_name(name) {
if (typeof name === "function") {
return (elem) => isTag(elem) && name(elem.name);
}
else if (name === "*") {
return isTag;
}
return (elem) => isTag(elem) && elem.name === name;
},
tag_type(type) {
if (typeof type === "function") {
return (elem) => type(elem.type);
}
return (elem) => elem.type === type;
},
tag_contains(data) {
if (typeof data === "function") {
return (elem) => isText(elem) && data(elem.data);
}
return (elem) => isText(elem) && elem.data === data;
},
};
/**
* Returns a function to check whether a node has an attribute with a particular
* value.
*
* @param attrib Attribute to check.
* @param value Attribute value to look for.
* @returns A function to check whether the a node has an attribute with a
* particular value.
*/
function getAttribCheck(attrib, value) {
if (typeof value === "function") {
return (elem) => isTag(elem) && value(elem.attribs[attrib]);
}
return (elem) => isTag(elem) && elem.attribs[attrib] === value;
}
/**
* Returns a function that returns `true` if either of the input functions
* returns `true` for a node.
*
* @param a First function to combine.
* @param b Second function to combine.
* @returns A function taking a node and returning `true` if either of the input
* functions returns `true` for the node.
*/
function combineFuncs(a, b) {
return (elem) => a(elem) || b(elem);
}
/**
* Returns a function that executes all checks in `options` and returns `true`
* if any of them match a node.
*
* @param options An object describing nodes to look for.
* @returns A function that executes all checks in `options` and returns `true`
* if any of them match a node.
*/
function compileTest(options) {
const funcs = Object.keys(options).map((key) => {
const value = options[key];
return Object.prototype.hasOwnProperty.call(Checks, key)
? Checks[key](value)
: getAttribCheck(key, value);
});
return funcs.length === 0 ? null : funcs.reduce(combineFuncs);
}
/**
* Checks whether a node matches the description in `options`.
*
* @category Legacy Query Functions
* @param options An object describing nodes to look for.
* @param node The element to test.
* @returns Whether the element matches the description in `options`.
*/
function testElement(options, node) {
const test = compileTest(options);
return test ? test(node) : true;
}
/**
* Returns all nodes that match `options`.
*
* @category Legacy Query Functions
* @param options An object describing nodes to look for.
* @param nodes Nodes to search through.
* @param recurse Also consider child nodes.
* @param limit Maximum number of nodes to return.
* @returns All nodes that match `options`.
*/
function getElements(options, nodes, recurse, limit = Infinity) {
const test = compileTest(options);
return test ? filter$2(test, nodes, recurse, limit) : [];
}
/**
* Returns the node with the supplied ID.
*
* @category Legacy Query Functions
* @param id The unique ID attribute value to look for.
* @param nodes Nodes to search through.
* @param recurse Also consider child nodes.
* @returns The node with the supplied ID.
*/
function getElementById(id, nodes, recurse = true) {
if (!Array.isArray(nodes))
nodes = [nodes];
return findOne(getAttribCheck("id", id), nodes, recurse);
}
/**
* Returns all nodes with the supplied `tagName`.
*
* @category Legacy Query Functions
* @param tagName Tag name to search for.
* @param nodes Nodes to search through.
* @param recurse Also consider child nodes.
* @param limit Maximum number of nodes to return.
* @returns All nodes with the supplied `tagName`.
*/
function getElementsByTagName(tagName, nodes, recurse = true, limit = Infinity) {
return filter$2(Checks["tag_name"](tagName), nodes, recurse, limit);
}
/**
* Returns all nodes with the supplied `className`.
*
* @category Legacy Query Functions
* @param className Class name to search for.
* @param nodes Nodes to search through.
* @param recurse Also consider child nodes.
* @param limit Maximum number of nodes to return.
* @returns All nodes with the supplied `className`.
*/
function getElementsByClassName(className, nodes, recurse = true, limit = Infinity) {
return filter$2(getAttribCheck("class", className), nodes, recurse, limit);
}
/**
* Returns all nodes with the supplied `type`.
*
* @category Legacy Query Functions
* @param type Element type to look for.
* @param nodes Nodes to search through.
* @param recurse Also consider child nodes.
* @param limit Maximum number of nodes to return.
* @returns All nodes with the supplied `type`.
*/
function getElementsByTagType(type, nodes, recurse = true, limit = Infinity) {
return filter$2(Checks["tag_type"](type), nodes, recurse, limit);
}
/**
* Given an array of nodes, remove any member that is contained by another
* member.
*
* @category Helpers
* @param nodes Nodes to filter.
* @returns Remaining nodes that aren't contained by other nodes.
*/
function removeSubsets(nodes) {
let idx = nodes.length;
/*
* Check if each node (or one of its ancestors) is already contained in the
* array.
*/
while (--idx >= 0) {
const node = nodes[idx];
/*
* Remove the node if it is not unique.
* We are going through the array from the end, so we only
* have to check nodes that preceed the node under consideration in the array.
*/
if (idx > 0 && nodes.lastIndexOf(node, idx - 1) >= 0) {
nodes.splice(idx, 1);
continue;
}
for (let ancestor = node.parent; ancestor; ancestor = ancestor.parent) {
if (nodes.includes(ancestor)) {
nodes.splice(idx, 1);
break;
}
}
}
return nodes;
}
/**
* @category Helpers
* @see {@link http://dom.spec.whatwg.org/#dom-node-comparedocumentposition}
*/
var DocumentPosition;
(function (DocumentPosition) {
DocumentPosition[DocumentPosition["DISCONNECTED"] = 1] = "DISCONNECTED";
DocumentPosition[DocumentPosition["PRECEDING"] = 2] = "PRECEDING";
DocumentPosition[DocumentPosition["FOLLOWING"] = 4] = "FOLLOWING";
DocumentPosition[DocumentPosition["CONTAINS"] = 8] = "CONTAINS";
DocumentPosition[DocumentPosition["CONTAINED_BY"] = 16] = "CONTAINED_BY";
})(DocumentPosition || (DocumentPosition = {}));
/**
* Compare the position of one node against another node in any other document,
* returning a bitmask with the values from {@link DocumentPosition}.
*
* Document order:
* > There is an ordering, document order, defined on all the nodes in the
* > document corresponding to the order in which the first character of the
* > XML representation of each node occurs in the XML representation of the
* > document after expansion of general entities. Thus, the document element
* > node will be the first node. Element nodes occur before their children.
* > Thus, document order orders element nodes in order of the occurrence of
* > their start-tag in the XML (after expansion of entities). The attribute
* > nodes of an element occur after the element and before its children. The
* > relative order of attribute nodes is implementation-dependent.
*
* Source:
* http://www.w3.org/TR/DOM-Level-3-Core/glossary.html#dt-document-order
*
* @category Helpers
* @param nodeA The first node to use in the comparison
* @param nodeB The second node to use in the comparison
* @returns A bitmask describing the input nodes' relative position.
*
* See http://dom.spec.whatwg.org/#dom-node-comparedocumentposition for
* a description of these values.
*/
function compareDocumentPosition(nodeA, nodeB) {
const aParents = [];
const bParents = [];
if (nodeA === nodeB) {
return 0;
}
let current = hasChildren(nodeA) ? nodeA : nodeA.parent;
while (current) {
aParents.unshift(current);
current = current.parent;
}
current = hasChildren(nodeB) ? nodeB : nodeB.parent;
while (current) {
bParents.unshift(current);
current = current.parent;
}
const maxIdx = Math.min(aParents.length, bParents.length);
let idx = 0;
while (idx < maxIdx && aParents[idx] === bParents[idx]) {
idx++;
}
if (idx === 0) {
return DocumentPosition.DISCONNECTED;
}
const sharedParent = aParents[idx - 1];
const siblings = sharedParent.children;
const aSibling = aParents[idx];
const bSibling = bParents[idx];
if (siblings.indexOf(aSibling) > siblings.indexOf(bSibling)) {
if (sharedParent === nodeB) {
return DocumentPosition.FOLLOWING | DocumentPosition.CONTAINED_BY;
}
return DocumentPosition.FOLLOWING;
}
if (sharedParent === nodeA) {
return DocumentPosition.PRECEDI