@tricoteuses/arbre-de-la-loi
Version:
Generate ASTs from the French bills & laws; manipulate & export them to Markdown, etc.
177 lines (176 loc) • 5.71 kB
JavaScript
import { EnabledDatasets, loadAssembleeData, pathFromDocumentUid, } from "@tricoteuses/assemblee/lib/loaders";
import assert from "assert";
import { execSync } from "child_process";
import commandLineArgs from "command-line-args";
import fs from "fs-extra";
import fetch from "node-fetch";
import path from "path";
const optionsDefinitions = [
{
alias: "c",
help: "commit documents",
name: "commit",
type: Boolean,
},
{
alias: "f",
help: "retrieve all documents, even already retrieved ones",
name: "full",
type: Boolean,
},
{
alias: "l",
defaultValue: "15",
name: "legislature",
type: String,
},
{
alias: "r",
help: "push commit to given remote",
multiple: true,
name: "remote",
type: String,
},
{
alias: "s",
help: "don't log anything",
name: "silent",
type: Boolean,
},
{
alias: "u",
help: "UID of first Assemblée's document to retrieve",
name: "uid",
type: String,
},
{
alias: "v",
help: "verbose logs",
name: "verbose",
type: Boolean,
},
{
defaultOption: true,
help: "directory containing Assemblée open data files",
name: "dataDir",
type: String,
},
];
const options = commandLineArgs(optionsDefinitions);
function commitAndPush(repositoryDir, commit, remotes) {
let error = false;
if (commit) {
execSync("git add .", {
cwd: repositoryDir,
env: process.env,
encoding: "utf-8",
stdio: ["ignore", "ignore", "pipe"],
});
try {
execSync('git commit -m "Nouvelle moisson"', {
cwd: repositoryDir,
env: process.env,
encoding: "utf-8",
});
}
catch (childProcess) {
if (childProcess.stderr === null ||
!/nothing to commit/.test(childProcess.stdout)) {
console.error(childProcess.output);
throw childProcess;
}
}
for (const remote of remotes || []) {
try {
execSync(`git push ${remote} master`, {
cwd: repositoryDir,
env: process.env,
encoding: "utf-8",
stdio: ["ignore", "ignore", "pipe"],
});
}
catch (childProcess) {
// Don't stop when push fails.
console.error(childProcess.output);
error = true;
}
}
}
return error;
}
async function retrieveDocuments() {
assert(!options.commit || options.uid, 'Options "commit" & "uid" are incompatible');
const dataDir = options.dataDir;
const { documentByUid } = loadAssembleeData(dataDir, EnabledDatasets.DossiersLegislatifs, options.legislature);
const documentsDir = path.join(dataDir, "documents_html");
fs.ensureDirSync(documentsDir);
if (options.full && !options.uid) {
for (const filename of fs.readdirSync(documentsDir)) {
if (filename[0] === ".") {
continue;
}
fs.removeSync(path.join(documentsDir, filename));
}
}
const documents = Object.values(documentByUid)
// Ignore documents from Sénat.
.filter((document) => document.uid.substring(4, 6) !== "SN")
.sort((a, b) => a.uid.localeCompare(b.uid));
const firstUid = options.uid;
let skip = !!firstUid;
for (const document of documents) {
if (skip) {
if (document.uid === firstUid) {
skip = false;
}
else {
continue;
}
}
const basename = pathFromDocumentUid(documentsDir, document.uid);
const fileNotFoundPath = `${basename}.404`;
const filePath = `${basename}.html`;
if (!options.full &&
(fs.existsSync(filePath) || fs.existsSync(fileNotFoundPath))) {
continue;
}
// Note: Both URLs should work.
// const url = new URL(`https://www.assemblee-nationale.fr/dyn/docs/${document.uid}.raw`)
const url = new URL(`https://www.assemblee-nationale.fr/dyn/opendata/${document.uid}.html`);
if (!options.silent) {
console.log(`Retrieving document ${document.uid} at ${url.toString()}…`);
}
const response = await fetch(url);
const page = await response.text();
if (response.ok) {
fs.writeFileSync(filePath, page, {
encoding: "utf8",
});
fs.removeSync(fileNotFoundPath);
}
else {
if (response.status === 404) {
if (!options.silent) {
console.warn(`Page "${url}" not found.`);
fs.writeFileSync(fileNotFoundPath, `Page "${url}" not found.`, {
encoding: "utf8",
});
fs.removeSync(filePath);
}
}
else {
console.error(`Error while getting page "${url}":\n\nError:\n${JSON.stringify({ code: response.status, message: response.statusText }, null, 2)}`);
fs.removeSync(fileNotFoundPath);
fs.removeSync(filePath);
}
}
}
if (commitAndPush(documentsDir, options.commit, options.remote)) {
process.exit(1);
}
return documents;
}
retrieveDocuments().catch((error) => {
console.log(error);
process.exit(1);
});