UNPKG

@tricoteuses/arbre-de-la-loi

Version:

Generate ASTs from the French bills & laws; manipulate & export them to Markdown, etc.

187 lines (186 loc) 6.33 kB
import { TypeDocument } from "@tricoteuses/assemblee"; import { EnabledDatasets, loadAssembleeData, pathFromDocumentUid, } from "@tricoteuses/assemblee/lib/loaders"; import assert from "assert"; import { execSync } from "child_process"; import commandLineArgs from "command-line-args"; import fs from "fs-extra"; import hastFromParse5 from "hast-util-from-parse5"; import parse5 from "parse5"; import path from "path"; import unistRemovePosition from "unist-util-remove-position"; import vfile from "vfile"; import { documentsFromAssembleeHast } from "../assemblee_html_parser"; const optionsDefinitions = [ { alias: "c", help: "commit documents", name: "commit", type: Boolean, }, { alias: "l", defaultValue: "15", name: "legislature", type: String, }, { alias: "o", help: "convert only one document", name: "one", type: Boolean, }, { alias: "r", help: "push commit to given remote", multiple: true, name: "remote", type: String, }, { alias: "s", help: "don't log anything", name: "silent", type: Boolean, }, { alias: "u", help: `UID of first Assemblée's "texte de loi" to retrieve`, name: "uid", type: String, }, { alias: "v", help: "verbose logs", name: "verbose", type: Boolean, }, { defaultOption: true, help: "directory containing Assemblée open data files", name: "dataDir", type: String, }, ]; const options = commandLineArgs(optionsDefinitions); function commitAndPush(repositoryDir, commit, remotes) { let error = false; if (commit) { execSync("git add .", { cwd: repositoryDir, env: process.env, encoding: "utf-8", stdio: ["ignore", "ignore", "pipe"], }); try { execSync('git commit -m "Nouvelle moisson"', { cwd: repositoryDir, env: process.env, encoding: "utf-8", }); } catch (childProcess) { if (childProcess.stderr === null || !/nothing to commit/.test(childProcess.stdout)) { console.error(childProcess.output); throw childProcess; } } for (const remote of remotes || []) { try { execSync(`git push ${remote} master`, { cwd: repositoryDir, env: process.env, encoding: "utf-8", stdio: ["ignore", "ignore", "pipe"], }); } catch (childProcess) { // Don't stop when push fails. console.error(childProcess.output); error = true; } } } return error; } async function convertTextesLois() { assert(!options.commit || options.one, 'Options "commit" & "one" are incompatible'); assert(!options.commit || options.uid, 'Options "commit" & "uid" are incompatible'); const dataDir = options.dataDir; const { documentByUid } = loadAssembleeData(dataDir, EnabledDatasets.DossiersLegislatifs, options.legislature); const documentsCleanHtmlDir = path.join(dataDir, "documents_html_nettoye"); assert(fs.existsSync(documentsCleanHtmlDir), `Directory "${documentsCleanHtmlDir}" doesn't exist`); const documentsJsonDir = path.join(dataDir, "textes_lois_json"); fs.ensureDirSync(documentsJsonDir); if (!options.one && !options.uid) { for (const filename of fs.readdirSync(documentsJsonDir)) { if (filename[0] === ".") { continue; } fs.removeSync(path.join(documentsJsonDir, filename)); } } const textesLois = Object.values(documentByUid) .filter((texteLoi) => texteLoi.xsiType === TypeDocument.TexteLoiType) // Ignore "textes de lois" from Sénat. .filter((texteLoi) => texteLoi.uid.substring(4, 6) !== "SN") // Keep only "lettres rectificatives", "projets de lois" et "propositions de lois". .filter((texteLoi) => texteLoi.uid.startsWith("LETT") || texteLoi.uid.startsWith("PION") || texteLoi.uid.startsWith("PRJL")) .sort((a, b) => a.uid.localeCompare(b.uid)); const firstUid = options.uid; let skip = !!firstUid; for (const texteLoi of textesLois) { if (skip) { if (texteLoi.uid === firstUid) { skip = false; } else { continue; } } const texteLoiCleanHtmlBasename = pathFromDocumentUid(documentsCleanHtmlDir, texteLoi.uid); const texteLoiCleanHtmlFilePath = `${texteLoiCleanHtmlBasename}.html`; if (!fs.existsSync(texteLoiCleanHtmlFilePath)) { continue; } if (!options.silent) { console.log(`Converting bill ${texteLoi.uid}…`); } const texteLoiCleanHtml = fs.readFileSync(texteLoiCleanHtmlFilePath, { encoding: "utf8", }); const texteLoiParse5Ast = parse5.parse(texteLoiCleanHtml, { sourceCodeLocationInfo: true, }); const texteLoiVfile = vfile({ contents: texteLoiCleanHtml, path: texteLoiCleanHtmlFilePath, }); const textLoiHast = hastFromParse5(texteLoiParse5Ast, { file: texteLoiVfile, }); const documents = documentsFromAssembleeHast(textLoiHast, { silent: options.silent, verbose: options.verbose, }); unistRemovePosition(textLoiHast); const texteLoiJsonBasename = pathFromDocumentUid(documentsJsonDir, texteLoi.uid); const texteLoiJsonHtmlFilePath = `${texteLoiJsonBasename}.json`; fs.writeJsonSync(texteLoiJsonHtmlFilePath, documents, { encoding: "utf8", spaces: 2, }); if (options.one) { break; } } if (commitAndPush(documentsJsonDir, options.commit, options.remote)) { process.exit(1); } } convertTextesLois().catch((error) => { console.log(error); process.exit(1); });