@tricoteuses/arbre-de-la-loi
Version:
Generate ASTs from the French bills & laws; manipulate & export them to Markdown, etc.
187 lines (186 loc) • 6.33 kB
JavaScript
import { TypeDocument } from "@tricoteuses/assemblee";
import { EnabledDatasets, loadAssembleeData, pathFromDocumentUid, } from "@tricoteuses/assemblee/lib/loaders";
import assert from "assert";
import { execSync } from "child_process";
import commandLineArgs from "command-line-args";
import fs from "fs-extra";
import hastFromParse5 from "hast-util-from-parse5";
import parse5 from "parse5";
import path from "path";
import unistRemovePosition from "unist-util-remove-position";
import vfile from "vfile";
import { documentsFromAssembleeHast } from "../assemblee_html_parser";
const optionsDefinitions = [
{
alias: "c",
help: "commit documents",
name: "commit",
type: Boolean,
},
{
alias: "l",
defaultValue: "15",
name: "legislature",
type: String,
},
{
alias: "o",
help: "convert only one document",
name: "one",
type: Boolean,
},
{
alias: "r",
help: "push commit to given remote",
multiple: true,
name: "remote",
type: String,
},
{
alias: "s",
help: "don't log anything",
name: "silent",
type: Boolean,
},
{
alias: "u",
help: `UID of first Assemblée's "texte de loi" to retrieve`,
name: "uid",
type: String,
},
{
alias: "v",
help: "verbose logs",
name: "verbose",
type: Boolean,
},
{
defaultOption: true,
help: "directory containing Assemblée open data files",
name: "dataDir",
type: String,
},
];
const options = commandLineArgs(optionsDefinitions);
function commitAndPush(repositoryDir, commit, remotes) {
let error = false;
if (commit) {
execSync("git add .", {
cwd: repositoryDir,
env: process.env,
encoding: "utf-8",
stdio: ["ignore", "ignore", "pipe"],
});
try {
execSync('git commit -m "Nouvelle moisson"', {
cwd: repositoryDir,
env: process.env,
encoding: "utf-8",
});
}
catch (childProcess) {
if (childProcess.stderr === null ||
!/nothing to commit/.test(childProcess.stdout)) {
console.error(childProcess.output);
throw childProcess;
}
}
for (const remote of remotes || []) {
try {
execSync(`git push ${remote} master`, {
cwd: repositoryDir,
env: process.env,
encoding: "utf-8",
stdio: ["ignore", "ignore", "pipe"],
});
}
catch (childProcess) {
// Don't stop when push fails.
console.error(childProcess.output);
error = true;
}
}
}
return error;
}
async function convertTextesLois() {
assert(!options.commit || options.one, 'Options "commit" & "one" are incompatible');
assert(!options.commit || options.uid, 'Options "commit" & "uid" are incompatible');
const dataDir = options.dataDir;
const { documentByUid } = loadAssembleeData(dataDir, EnabledDatasets.DossiersLegislatifs, options.legislature);
const documentsCleanHtmlDir = path.join(dataDir, "documents_html_nettoye");
assert(fs.existsSync(documentsCleanHtmlDir), `Directory "${documentsCleanHtmlDir}" doesn't exist`);
const documentsJsonDir = path.join(dataDir, "textes_lois_json");
fs.ensureDirSync(documentsJsonDir);
if (!options.one && !options.uid) {
for (const filename of fs.readdirSync(documentsJsonDir)) {
if (filename[0] === ".") {
continue;
}
fs.removeSync(path.join(documentsJsonDir, filename));
}
}
const textesLois = Object.values(documentByUid)
.filter((texteLoi) => texteLoi.xsiType === TypeDocument.TexteLoiType)
// Ignore "textes de lois" from Sénat.
.filter((texteLoi) => texteLoi.uid.substring(4, 6) !== "SN")
// Keep only "lettres rectificatives", "projets de lois" et "propositions de lois".
.filter((texteLoi) => texteLoi.uid.startsWith("LETT") ||
texteLoi.uid.startsWith("PION") ||
texteLoi.uid.startsWith("PRJL"))
.sort((a, b) => a.uid.localeCompare(b.uid));
const firstUid = options.uid;
let skip = !!firstUid;
for (const texteLoi of textesLois) {
if (skip) {
if (texteLoi.uid === firstUid) {
skip = false;
}
else {
continue;
}
}
const texteLoiCleanHtmlBasename = pathFromDocumentUid(documentsCleanHtmlDir, texteLoi.uid);
const texteLoiCleanHtmlFilePath = `${texteLoiCleanHtmlBasename}.html`;
if (!fs.existsSync(texteLoiCleanHtmlFilePath)) {
continue;
}
if (!options.silent) {
console.log(`Converting bill ${texteLoi.uid}…`);
}
const texteLoiCleanHtml = fs.readFileSync(texteLoiCleanHtmlFilePath, {
encoding: "utf8",
});
const texteLoiParse5Ast = parse5.parse(texteLoiCleanHtml, {
sourceCodeLocationInfo: true,
});
const texteLoiVfile = vfile({
contents: texteLoiCleanHtml,
path: texteLoiCleanHtmlFilePath,
});
const textLoiHast = hastFromParse5(texteLoiParse5Ast, {
file: texteLoiVfile,
});
const documents = documentsFromAssembleeHast(textLoiHast, {
silent: options.silent,
verbose: options.verbose,
});
unistRemovePosition(textLoiHast);
const texteLoiJsonBasename = pathFromDocumentUid(documentsJsonDir, texteLoi.uid);
const texteLoiJsonHtmlFilePath = `${texteLoiJsonBasename}.json`;
fs.writeJsonSync(texteLoiJsonHtmlFilePath, documents, {
encoding: "utf8",
spaces: 2,
});
if (options.one) {
break;
}
}
if (commitAndPush(documentsJsonDir, options.commit, options.remote)) {
process.exit(1);
}
}
convertTextesLois().catch((error) => {
console.log(error);
process.exit(1);
});