UNPKG

@tricoteuses/arbre-de-la-loi

Version:

Generate ASTs from the French bills & laws; manipulate & export them to Markdown, etc.

213 lines (212 loc) 9.15 kB
export var NodeType; (function (NodeType) { NodeType[NodeType["TEXTE"] = 0] = "TEXTE"; NodeType[NodeType["TOME"] = 1] = "TOME"; NodeType[NodeType["PARTIE"] = 2] = "PARTIE"; NodeType[NodeType["LIVRE"] = 3] = "LIVRE"; NodeType[NodeType["TITRE"] = 4] = "TITRE"; NodeType[NodeType["SOUS_TITRE"] = 5] = "SOUS_TITRE"; NodeType[NodeType["CHAPITRE"] = 6] = "CHAPITRE"; NodeType[NodeType["SECTION"] = 7] = "SECTION"; NodeType[NodeType["SOUS_SECTION"] = 8] = "SOUS_SECTION"; NodeType[NodeType["ARTICLE"] = 9] = "ARTICLE"; // Nodes that are always direct children of "TEXTE": NodeType[NodeType["ANNEXE"] = 10] = "ANNEXE"; NodeType[NodeType["ETAT"] = 11] = "ETAT"; // Nodes that are always root nodes: NodeType[NodeType["EXPOSE_DES_MOTIFS"] = 12] = "EXPOSE_DES_MOTIFS"; NodeType[NodeType["AUTRE"] = 13] = "AUTRE"; })(NodeType || (NodeType = {})); function headerFromLine(line) { const header = line // Remove bold (generated by Turndown) from header. .replace(/^\*\*(.+)\*\*$/, "$1"); return header; } export function parseTexteMarkdown(markdown, texteHeaders) { const rootNodes = [ { headers: ["Autre"], type: NodeType.AUTRE, }, ]; const breadcrumb = [rootNodes[0]]; linesLoop: for (const line of markdown.split("\n")) { let currentRootNode = rootNodes[rootNodes.length - 1]; const lineSimplified = line .normalize("NFD") .replace(/[\u0300-\u036f]/g, "") .replace(/\*\*/g, "") // Remove Markdown bold (added by Turndown). .replace(/\(nouveau\)/, "") .replace(/\(Pour coordination\)/, "") .replace(/\(Supprimés?\)/, "") .replace(/ /g, " ") .replace(/[\-,.…]/g, "") .trim() .replace(/ {1,}/g, "_") .toUpperCase(); for (const [regExp, headingType] of [ // "Exposé des motifs" without content: // * http://www.assemblee-nationale.fr/15/textes/0702.asp // "Exposé des motifs" with content: // * http://www.assemblee-nationale.fr/15/textes/1326.asp // * http://www.assemblee-nationale.fr/15/textes/1610.asp [/^EXPOSE_DES_MOTIFS$/, NodeType.EXPOSE_DES_MOTIFS], // "Projet de loi" ou "Proposition de loi": // * http://www.assemblee-nationale.fr/15/textes/0232.asp // * http://www.assemblee-nationale.fr/15/textes/0626.asp // * http://www.assemblee-nationale.fr/15/textes/0676.asp [/^(PROJET|PROPOSITION)_DE_LOI(_|$)/, NodeType.TEXTE], [/^TEXTE_(DE_LA_PROPOSITION|DU_PROJET)_DE_LOI(_|$)/, NodeType.TEXTE], ]) { if (lineSimplified.match(regExp) !== null) { currentRootNode = { headers: [headerFromLine(line)], type: headingType, }; rootNodes.push(currentRootNode); breadcrumb[0] = currentRootNode; breadcrumb.length = 1; continue linesLoop; } } if ([NodeType.AUTRE, NodeType.TEXTE].includes(currentRootNode.type)) { for (const [regExp, headingType] of [ [/^ARTICLES?_/, NodeType.ARTICLE], [/^CHAPITRE_/, NodeType.CHAPITRE], [/^LIVRE_/, NodeType.LIVRE], [/^PARTIE_/, NodeType.PARTIE], [/^(PREMIERE|SECONDE|DEUXIEME|TROISIEME|QUATRIEME)_PARTIE(_|$)/, NodeType.PARTIE], [/^SECTION_/, NodeType.SECTION], [/^SOUSSECTION_/, NodeType.SOUS_SECTION], [/^SOUSTITRE_/, NodeType.SOUS_TITRE], [/^TITRE_/, NodeType.TITRE], [/^TOME_/, NodeType.TOME], ]) { if (lineSimplified.match(regExp) !== null) { const node = { headers: [headerFromLine(line)], type: headingType, }; if (currentRootNode.type !== NodeType.TEXTE) { currentRootNode = { headers: texteHeaders, type: NodeType.TEXTE, }; rootNodes.push(currentRootNode); breadcrumb[0] = currentRootNode; breadcrumb.length = 1; } // Try to retrieve a node of the same type in breadcrumb and // append new node to it. for (const [depth, breadcrumbNode] of breadcrumb.entries()) { if (breadcrumbNode.type === headingType) { breadcrumb[depth - 1].children.push(node); breadcrumb[depth] = node; breadcrumb.length = depth + 1; continue linesLoop; } } // Create a new level in breadcrumb for the new node const parentNode = breadcrumb[breadcrumb.length - 1]; console.assert(parentNode.children === undefined); parentNode.children = [node]; breadcrumb.push(node); continue linesLoop; } } if (currentRootNode.type === NodeType.TEXTE) { for (const [regExp, headingType] of [ [/^ETAT_/, NodeType.ETAT], [/^(RAPPORT_)?ANNEXE(_|$)/, NodeType.ANNEXE], ]) { if (lineSimplified.match(regExp) !== null) { const node = { headers: [headerFromLine(line)], type: headingType, }; if (currentRootNode.children === undefined) { currentRootNode.children = [node]; console.assert(breadcrumb.length === 1); breadcrumb.push(node); } else { currentRootNode.children.push(node); console.assert(breadcrumb.length >= 2); breadcrumb[1] = node; breadcrumb.length = 2; } continue linesLoop; } } const node = breadcrumb[breadcrumb.length - 1]; if (node.alineas === undefined && node.headers.length === 1 && ![NodeType.ARTICLE, NodeType.EXPOSE_DES_MOTIFS].includes(node.type)) { if (line.trim()) { // Second title of node node.headers.push(headerFromLine(line)); } continue linesLoop; } if (node.alineas === undefined) { node.alineas = []; } // Remove (optional) "pastille" (aka alinea number) from alinea. const alinea = line.replace(/^\(\d+\)\s*/, ""); node.alineas.push(alinea); continue linesLoop; } } // The currentRootNode is not of type TEXTE. const node = breadcrumb[breadcrumb.length - 1]; if (node.alineas === undefined) { node.alineas = []; } node.alineas.push(line); } // console.log(JSON.stringify(rootNodes, null, 2)) return rootNodes; } export function restructureTexteMarkdown(markdown, texteHeaders) { markdown = markdown // Replace non-breaking Hyphen with normal hyphen. .replace(/\u{2011}/gu, "-"); markdown = parseTexteMarkdown(markdown, texteHeaders) .filter(node => node.type === NodeType.TEXTE) .map(node => stringifyTree(node)) .join("\n\n") // Replace multiple spaces with a single space. .replace(/ {2,}/g, " ") // Replace sequences of more than 2 \n with exactly 2 \n. .replace(/\n{3,}/g, "\n\n") // Remove leading line breaks. .replace(/^\n+/, "") // Remove trailing line breaks. .replace(/\n{2,}$/, ""); return markdown; } function stringifyNodeToLines(node, depth, lines) { for (const header of node.headers) { if (lines.length > 0 && lines[lines.length - 1] !== "") { lines.push(""); } lines.push(`${"#".repeat(depth + 1)} ${header.trim()}`); lines.push(""); } if (node.alineas !== undefined) { for (const alinea of node.alineas) { lines.push(alinea.trimRight()); } } if (node.children !== undefined) { for (const child of node.children) { stringifyNodeToLines(child, depth + 1, lines); } } } export function stringifyTree(node) { const lines = []; stringifyNodeToLines(node, 0, lines); return lines.join("\n"); }