remark-accurate-remove
Version:
Remove anything from the markdown accurately.
203 lines (200 loc) • 9.2 kB
JavaScript
import * as cheerio from 'cheerio';
const remarkAccurateRemove = (steps = []) => {
return (tree) => {
const hasStringValue = (maybeNode) => typeof maybeNode.value === 'string';
const isParentNode = (maybeNode) => Array.isArray(maybeNode.children);
const nodeToString = (node) => {
if (!node)
return '';
if (hasStringValue(node)) {
return node.value;
}
if (isParentNode(node)) {
return node.children.map(nodeToString).join('');
}
return '';
};
const findSectionBoundaryIndices = (root, startText, endText) => {
const children = root.children;
let startIndex = -1;
let endIndex = -1;
const parseHeadingQuery = (query) => {
const trimmed = query.trim();
const hashes = trimmed.match(/^#+/);
const level = hashes ? hashes[0].length : undefined;
const text = hashes ? trimmed.slice(level).trim() : trimmed;
return { level, text };
};
const startQuery = parseHeadingQuery(startText);
const endQuery = parseHeadingQuery(endText);
for (let i = 0; i < children.length; i++) {
const node = children[i];
if (!node)
continue;
if (node.type === 'heading') {
const heading = node;
const headingText = nodeToString(heading).trim();
if (headingText === startQuery.text &&
(startQuery.level === undefined || heading.depth === startQuery.level)) {
startIndex = i;
break;
}
}
}
for (let j = startIndex + 1; j < children.length; j++) {
const node = children[j];
if (!node)
continue;
if (node.type === 'heading') {
const heading = node;
const headingText = nodeToString(heading).trim();
if (headingText === endQuery.text &&
(endQuery.level === undefined || heading.depth === endQuery.level)) {
endIndex = j;
break;
}
}
}
return { startIndex, endIndex };
};
const boundarySpecifierSet = new Set([
'including-start-boundary',
'including-end-boundary',
'including-boundaries',
]);
const isBoundarySpecifier = (value) => typeof value === 'string' && boundarySpecifierSet.has(value);
const isAllContentRemoval = (candidate) => candidate.includes('all-content');
for (const step of steps) {
const { remove, range } = step;
if (isAllContentRemoval(remove)) {
if (!range) {
continue;
}
const [startMarker, endMarker] = range;
const { startIndex, endIndex } = findSectionBoundaryIndices(tree, startMarker, endMarker);
if (startIndex === -1) {
continue;
}
const effectiveEndIndex = endIndex === -1 ? tree.children.length : endIndex;
const boundaryOptions = remove.filter(isBoundarySpecifier);
const includeStart = boundaryOptions.includes('including-start-boundary') ||
boundaryOptions.includes('including-boundaries');
const includeEnd = boundaryOptions.includes('including-end-boundary') ||
boundaryOptions.includes('including-boundaries');
let removeStart = includeStart ? startIndex : startIndex + 1;
let removeEnd = includeEnd ? effectiveEndIndex : effectiveEndIndex - 1;
if (removeStart < 0)
removeStart = 0;
if (removeEnd >= tree.children.length)
removeEnd = tree.children.length - 1;
if (removeEnd < removeStart) {
continue;
}
tree.children.splice(removeStart, removeEnd - removeStart + 1);
continue;
}
const [removeType] = remove;
if (removeType === 'html') {
const selector = remove.length === 2 && typeof remove[1] === 'object' ? remove[1].$ : null;
const processChildrenForHTML = (parent) => {
if (!parent.children)
return;
for (let i = parent.children.length - 1; i >= 0; i--) {
const node = parent.children[i];
if (node.type === 'html') {
const htmlNode = node;
if (!selector) {
parent.children.splice(i, 1);
continue;
}
const $ = cheerio.load(htmlNode.value, {
xml: { decodeEntities: false, xmlMode: false },
});
try {
$(selector).remove();
}
catch {
}
const updatedHTML = $.html();
if (!updatedHTML || updatedHTML.trim() === '') {
parent.children.splice(i, 1);
}
else {
htmlNode.value = updatedHTML;
}
}
else if (node.children) {
processChildrenForHTML(node);
}
}
};
if (range) {
const [startMarker, endMarker] = range;
const { startIndex, endIndex } = findSectionBoundaryIndices(tree, startMarker, endMarker);
if (startIndex === -1)
continue;
const effectiveEndIndex = endIndex === -1 ? tree.children.length : endIndex;
for (let idx = effectiveEndIndex - 1; idx > startIndex; idx--) {
const sectionNode = tree.children[idx];
if (sectionNode.children) {
processChildrenForHTML(sectionNode);
}
if (sectionNode.type === 'html') {
const dummyParent = { children: [sectionNode] };
processChildrenForHTML(dummyParent);
if (dummyParent.children.length === 0) {
tree.children.splice(idx, 1);
}
else if (dummyParent.children[0] !== sectionNode) {
tree.children[idx] = dummyParent.children[0];
}
}
}
}
else {
processChildrenForHTML(tree);
}
}
else if (removeType === 'regex') {
const pattern = remove[1];
let regex;
if (typeof pattern === 'string') {
regex = new RegExp(pattern, 'g');
}
else {
const flags = pattern.flags.includes('g') ? pattern.flags : `${pattern.flags}g`;
regex = new RegExp(pattern.source, flags);
}
const removePatternInNode = (node) => {
if (!node)
return;
if (node.type === 'text') {
const textNode = node;
textNode.value = textNode.value.replace(regex, '');
}
else if (node.children) {
for (const child of node.children) {
removePatternInNode(child);
}
}
};
if (range) {
const [startMarker, endMarker] = range;
const { startIndex, endIndex } = findSectionBoundaryIndices(tree, startMarker, endMarker);
if (startIndex === -1)
continue;
const effectiveEndIndex = endIndex === -1 ? tree.children.length : endIndex;
for (let idx = startIndex + 1; idx < effectiveEndIndex; idx++) {
removePatternInNode(tree.children[idx]);
}
}
else {
removePatternInNode(tree);
}
}
}
return tree;
};
};
export { remarkAccurateRemove };
//# sourceMappingURL=index.esm.js.map