@tricoteuses/arbre-de-la-loi
Version:
Generate ASTs from the French bills & laws; manipulate & export them to Markdown, etc.
102 lines (101 loc) • 3.8 kB
JavaScript
import deepEqual from "deep-equal";
import { HastType } from "./hast";
export function mergeSpanElementsWithSameAttributes(node) {
let children = node.children;
if (children !== undefined) {
let previous = children[0];
let index = 1;
for (const child of [...children.slice(1)]) {
if (previous.type === HastType.Element &&
previous.tagName === "span" &&
previous.children.length === 1 &&
previous.children[0].type === HastType.Text &&
child.type === HastType.Element &&
child.tagName === "span" &&
child.children.length === 1 &&
child.children[0].type === HastType.Text &&
deepEqual(previous.properties, child.properties)) {
previous.children[0].value += child.children[0].value;
children.splice(index, 1);
}
else {
previous = child;
index += 1;
}
}
for (const child of children) {
mergeSpanElementsWithSameAttributes(child);
}
}
}
export function mergeTexts(node) {
let children = node.children;
if (children !== undefined) {
let previous = children[0];
let index = 1;
for (const child of [...children.slice(1)]) {
if (previous.type === HastType.Text &&
child.type === HastType.Text) {
previous.value += child.value;
children.splice(index, 1);
}
else {
previous = child;
index += 1;
}
}
for (const child of children) {
mergeTexts(child);
}
}
}
/// Useful for documents generated from PDF.
/// Example: https://www.assemblee-nationale.fr/dyn/opendata/PRJLANR5L15B0235.html
/// where each character is a span with a letter-spacing style.
export function removeLetterSpacingStyleAttributes(html) {
return html.replace(/;?\s*letter-spacing:\s*(-?\d+(\.\d*)?\s*pt|normal)/gm, "");
}
export function removeEmptyStyleAttributes(html) {
return html.replace(/\s*style=""/g, "");
}
export function removeStyleElements(node) {
let children = node.children;
if (children !== undefined) {
;
node.children = children = children.filter((child) => child.type !== HastType.Element ||
child.tagName !== "style");
for (const child of children) {
removeStyleElements(child);
}
}
}
export function replaceSpanElementsContainingOnlySpacesWithTexts(node) {
let children = node.children;
if (children !== undefined) {
;
node.children = children = children.map((child) => child.type === HastType.Element &&
child.tagName === "span" &&
child.children.length === 1 &&
child.children[0].type === HastType.Text &&
child.children[0].value.match(/^[ \t\n\f\r\xa0]*$/)
? child.children[0] : child);
for (const child of children) {
replaceSpanElementsContainingOnlySpacesWithTexts(child);
}
}
}
export function replaceSpanElementsWithoutPropertiesWithTexts(node) {
let children = node.children;
if (children !== undefined) {
;
node.children = children = children.map((child) => child.type === HastType.Element &&
child.tagName === "span" &&
(child.properties === undefined || Object.keys(child.properties).length === 0) &&
child.children.length === 1 &&
child.children[0].type === HastType.Text
? child.children[0] : child);
for (const child of children) {
replaceSpanElementsWithoutPropertiesWithTexts(child);
}
}
}