UNPKG

domutils

Version:

Utilities for working with htmlparser2's dom

155 lines (139 loc) 4.84 kB
import { type AnyNode, hasChildren, type ParentNode } from "domhandler"; /** * Given an array of nodes, remove any member that is contained by another * member. * * @category Helpers * @param nodes Nodes to filter. * @returns Remaining nodes that aren't contained by other nodes. */ export function removeSubsets(nodes: AnyNode[]): AnyNode[] { let index = nodes.length; /* * Check if each node (or one of its ancestors) is already contained in the * array. */ while (--index >= 0) { const node = nodes[index]; /* * Remove the node if it is not unique. * We are going through the array from the end, so we only * have to check nodes that preceed the node under consideration in the array. */ if (index > 0 && nodes.lastIndexOf(node, index - 1) >= 0) { nodes.splice(index, 1); continue; } for (let ancestor = node.parent; ancestor; ancestor = ancestor.parent) { if (nodes.includes(ancestor)) { nodes.splice(index, 1); break; } } } return nodes; } /** * @category Helpers * @see {@link http://dom.spec.whatwg.org/#dom-node-comparedocumentposition} */ export const enum DocumentPosition { DISCONNECTED = 1, PRECEDING = 2, FOLLOWING = 4, CONTAINS = 8, CONTAINED_BY = 16, } /** * Compare the position of one node against another node in any other document, * returning a bitmask with the values from {@link DocumentPosition}. * * Document order: * > There is an ordering, document order, defined on all the nodes in the * > document corresponding to the order in which the first character of the * > XML representation of each node occurs in the XML representation of the * > document after expansion of general entities. Thus, the document element * > node will be the first node. Element nodes occur before their children. * > Thus, document order orders element nodes in order of the occurrence of * > their start-tag in the XML (after expansion of entities). The attribute * > nodes of an element occur after the element and before its children. The * > relative order of attribute nodes is implementation-dependent. * * Source: * http://www.w3.org/TR/DOM-Level-3-Core/glossary.html#dt-document-order * * @category Helpers * @param nodeA The first node to use in the comparison * @param nodeB The second node to use in the comparison * @returns A bitmask describing the input nodes' relative position. * * See http://dom.spec.whatwg.org/#dom-node-comparedocumentposition for * a description of these values. */ export function compareDocumentPosition( nodeA: AnyNode, nodeB: AnyNode, ): number { const aParents: ParentNode[] = []; const bParents: ParentNode[] = []; if (nodeA === nodeB) { return 0; } let current = hasChildren(nodeA) ? nodeA : nodeA.parent; while (current) { aParents.unshift(current); current = current.parent; } current = hasChildren(nodeB) ? nodeB : nodeB.parent; while (current) { bParents.unshift(current); current = current.parent; } const maxIndex = Math.min(aParents.length, bParents.length); let index = 0; while (index < maxIndex && aParents[index] === bParents[index]) { index++; } if (index === 0) { return DocumentPosition.DISCONNECTED; } const sharedParent = aParents[index - 1]; const siblings: AnyNode[] = sharedParent.children; const aSibling = aParents[index]; const bSibling = bParents[index]; if (siblings.indexOf(aSibling) > siblings.indexOf(bSibling)) { if (sharedParent === nodeB) { return DocumentPosition.FOLLOWING | DocumentPosition.CONTAINED_BY; } return DocumentPosition.FOLLOWING; } if (sharedParent === nodeA) { return DocumentPosition.PRECEDING | DocumentPosition.CONTAINS; } return DocumentPosition.PRECEDING; } /** * Sort an array of nodes based on their relative position in the document, * removing any duplicate nodes. If the array contains nodes that do not belong * to the same document, sort order is unspecified. * * @category Helpers * @param nodes Array of DOM nodes. * @returns Collection of unique nodes, sorted in document order. */ export function uniqueSort<T extends AnyNode>(nodes: T[]): T[] { nodes = nodes.filter( (node, index, array) => !array.includes(node, index + 1), ); nodes.sort((a, b) => { const relative = compareDocumentPosition(a, b); if (relative & DocumentPosition.PRECEDING) { return -1; } if (relative & DocumentPosition.FOLLOWING) { return 1; } return 0; }); return nodes; }