UNPKG

remark-accurate-remove

Version:

Remove anything from the markdown accurately.

203 lines (200 loc) 9.2 kB
import * as cheerio from 'cheerio'; const remarkAccurateRemove = (steps = []) => { return (tree) => { const hasStringValue = (maybeNode) => typeof maybeNode.value === 'string'; const isParentNode = (maybeNode) => Array.isArray(maybeNode.children); const nodeToString = (node) => { if (!node) return ''; if (hasStringValue(node)) { return node.value; } if (isParentNode(node)) { return node.children.map(nodeToString).join(''); } return ''; }; const findSectionBoundaryIndices = (root, startText, endText) => { const children = root.children; let startIndex = -1; let endIndex = -1; const parseHeadingQuery = (query) => { const trimmed = query.trim(); const hashes = trimmed.match(/^#+/); const level = hashes ? hashes[0].length : undefined; const text = hashes ? trimmed.slice(level).trim() : trimmed; return { level, text }; }; const startQuery = parseHeadingQuery(startText); const endQuery = parseHeadingQuery(endText); for (let i = 0; i < children.length; i++) { const node = children[i]; if (!node) continue; if (node.type === 'heading') { const heading = node; const headingText = nodeToString(heading).trim(); if (headingText === startQuery.text && (startQuery.level === undefined || heading.depth === startQuery.level)) { startIndex = i; break; } } } for (let j = startIndex + 1; j < children.length; j++) { const node = children[j]; if (!node) continue; if (node.type === 'heading') { const heading = node; const headingText = nodeToString(heading).trim(); if (headingText === endQuery.text && (endQuery.level === undefined || heading.depth === endQuery.level)) { endIndex = j; break; } } } return { startIndex, endIndex }; }; const boundarySpecifierSet = new Set([ 'including-start-boundary', 'including-end-boundary', 'including-boundaries', ]); const isBoundarySpecifier = (value) => typeof value === 'string' && boundarySpecifierSet.has(value); const isAllContentRemoval = (candidate) => candidate.includes('all-content'); for (const step of steps) { const { remove, range } = step; if (isAllContentRemoval(remove)) { if (!range) { continue; } const [startMarker, endMarker] = range; const { startIndex, endIndex } = findSectionBoundaryIndices(tree, startMarker, endMarker); if (startIndex === -1) { continue; } const effectiveEndIndex = endIndex === -1 ? tree.children.length : endIndex; const boundaryOptions = remove.filter(isBoundarySpecifier); const includeStart = boundaryOptions.includes('including-start-boundary') || boundaryOptions.includes('including-boundaries'); const includeEnd = boundaryOptions.includes('including-end-boundary') || boundaryOptions.includes('including-boundaries'); let removeStart = includeStart ? startIndex : startIndex + 1; let removeEnd = includeEnd ? effectiveEndIndex : effectiveEndIndex - 1; if (removeStart < 0) removeStart = 0; if (removeEnd >= tree.children.length) removeEnd = tree.children.length - 1; if (removeEnd < removeStart) { continue; } tree.children.splice(removeStart, removeEnd - removeStart + 1); continue; } const [removeType] = remove; if (removeType === 'html') { const selector = remove.length === 2 && typeof remove[1] === 'object' ? remove[1].$ : null; const processChildrenForHTML = (parent) => { if (!parent.children) return; for (let i = parent.children.length - 1; i >= 0; i--) { const node = parent.children[i]; if (node.type === 'html') { const htmlNode = node; if (!selector) { parent.children.splice(i, 1); continue; } const $ = cheerio.load(htmlNode.value, { xml: { decodeEntities: false, xmlMode: false }, }); try { $(selector).remove(); } catch { } const updatedHTML = $.html(); if (!updatedHTML || updatedHTML.trim() === '') { parent.children.splice(i, 1); } else { htmlNode.value = updatedHTML; } } else if (node.children) { processChildrenForHTML(node); } } }; if (range) { const [startMarker, endMarker] = range; const { startIndex, endIndex } = findSectionBoundaryIndices(tree, startMarker, endMarker); if (startIndex === -1) continue; const effectiveEndIndex = endIndex === -1 ? tree.children.length : endIndex; for (let idx = effectiveEndIndex - 1; idx > startIndex; idx--) { const sectionNode = tree.children[idx]; if (sectionNode.children) { processChildrenForHTML(sectionNode); } if (sectionNode.type === 'html') { const dummyParent = { children: [sectionNode] }; processChildrenForHTML(dummyParent); if (dummyParent.children.length === 0) { tree.children.splice(idx, 1); } else if (dummyParent.children[0] !== sectionNode) { tree.children[idx] = dummyParent.children[0]; } } } } else { processChildrenForHTML(tree); } } else if (removeType === 'regex') { const pattern = remove[1]; let regex; if (typeof pattern === 'string') { regex = new RegExp(pattern, 'g'); } else { const flags = pattern.flags.includes('g') ? pattern.flags : `${pattern.flags}g`; regex = new RegExp(pattern.source, flags); } const removePatternInNode = (node) => { if (!node) return; if (node.type === 'text') { const textNode = node; textNode.value = textNode.value.replace(regex, ''); } else if (node.children) { for (const child of node.children) { removePatternInNode(child); } } }; if (range) { const [startMarker, endMarker] = range; const { startIndex, endIndex } = findSectionBoundaryIndices(tree, startMarker, endMarker); if (startIndex === -1) continue; const effectiveEndIndex = endIndex === -1 ? tree.children.length : endIndex; for (let idx = startIndex + 1; idx < effectiveEndIndex; idx++) { removePatternInNode(tree.children[idx]); } } else { removePatternInNode(tree); } } } return tree; }; }; export { remarkAccurateRemove }; //# sourceMappingURL=index.esm.js.map