UNPKG

@tricoteuses/arbre-de-la-loi

Version:

Generate ASTs from the French bills & laws; manipulate & export them to Markdown, etc.

102 lines (101 loc) 3.8 kB
import deepEqual from "deep-equal"; import { HastType } from "./hast"; export function mergeSpanElementsWithSameAttributes(node) { let children = node.children; if (children !== undefined) { let previous = children[0]; let index = 1; for (const child of [...children.slice(1)]) { if (previous.type === HastType.Element && previous.tagName === "span" && previous.children.length === 1 && previous.children[0].type === HastType.Text && child.type === HastType.Element && child.tagName === "span" && child.children.length === 1 && child.children[0].type === HastType.Text && deepEqual(previous.properties, child.properties)) { previous.children[0].value += child.children[0].value; children.splice(index, 1); } else { previous = child; index += 1; } } for (const child of children) { mergeSpanElementsWithSameAttributes(child); } } } export function mergeTexts(node) { let children = node.children; if (children !== undefined) { let previous = children[0]; let index = 1; for (const child of [...children.slice(1)]) { if (previous.type === HastType.Text && child.type === HastType.Text) { previous.value += child.value; children.splice(index, 1); } else { previous = child; index += 1; } } for (const child of children) { mergeTexts(child); } } } /// Useful for documents generated from PDF. /// Example: https://www.assemblee-nationale.fr/dyn/opendata/PRJLANR5L15B0235.html /// where each character is a span with a letter-spacing style. export function removeLetterSpacingStyleAttributes(html) { return html.replace(/;?\s*letter-spacing:\s*(-?\d+(\.\d*)?\s*pt|normal)/gm, ""); } export function removeEmptyStyleAttributes(html) { return html.replace(/\s*style=""/g, ""); } export function removeStyleElements(node) { let children = node.children; if (children !== undefined) { ; node.children = children = children.filter((child) => child.type !== HastType.Element || child.tagName !== "style"); for (const child of children) { removeStyleElements(child); } } } export function replaceSpanElementsContainingOnlySpacesWithTexts(node) { let children = node.children; if (children !== undefined) { ; node.children = children = children.map((child) => child.type === HastType.Element && child.tagName === "span" && child.children.length === 1 && child.children[0].type === HastType.Text && child.children[0].value.match(/^[ \t\n\f\r\xa0]*$/) ? child.children[0] : child); for (const child of children) { replaceSpanElementsContainingOnlySpacesWithTexts(child); } } } export function replaceSpanElementsWithoutPropertiesWithTexts(node) { let children = node.children; if (children !== undefined) { ; node.children = children = children.map((child) => child.type === HastType.Element && child.tagName === "span" && (child.properties === undefined || Object.keys(child.properties).length === 0) && child.children.length === 1 && child.children[0].type === HastType.Text ? child.children[0] : child); for (const child of children) { replaceSpanElementsWithoutPropertiesWithTexts(child); } } }