UNPKG

@tricoteuses/arbre-de-la-loi

Version:

Generate ASTs from the French bills & laws; manipulate & export them to Markdown, etc.

177 lines (176 loc) 5.71 kB
import { EnabledDatasets, loadAssembleeData, pathFromDocumentUid, } from "@tricoteuses/assemblee/lib/loaders"; import assert from "assert"; import { execSync } from "child_process"; import commandLineArgs from "command-line-args"; import fs from "fs-extra"; import fetch from "node-fetch"; import path from "path"; const optionsDefinitions = [ { alias: "c", help: "commit documents", name: "commit", type: Boolean, }, { alias: "f", help: "retrieve all documents, even already retrieved ones", name: "full", type: Boolean, }, { alias: "l", defaultValue: "15", name: "legislature", type: String, }, { alias: "r", help: "push commit to given remote", multiple: true, name: "remote", type: String, }, { alias: "s", help: "don't log anything", name: "silent", type: Boolean, }, { alias: "u", help: "UID of first Assemblée's document to retrieve", name: "uid", type: String, }, { alias: "v", help: "verbose logs", name: "verbose", type: Boolean, }, { defaultOption: true, help: "directory containing Assemblée open data files", name: "dataDir", type: String, }, ]; const options = commandLineArgs(optionsDefinitions); function commitAndPush(repositoryDir, commit, remotes) { let error = false; if (commit) { execSync("git add .", { cwd: repositoryDir, env: process.env, encoding: "utf-8", stdio: ["ignore", "ignore", "pipe"], }); try { execSync('git commit -m "Nouvelle moisson"', { cwd: repositoryDir, env: process.env, encoding: "utf-8", }); } catch (childProcess) { if (childProcess.stderr === null || !/nothing to commit/.test(childProcess.stdout)) { console.error(childProcess.output); throw childProcess; } } for (const remote of remotes || []) { try { execSync(`git push ${remote} master`, { cwd: repositoryDir, env: process.env, encoding: "utf-8", stdio: ["ignore", "ignore", "pipe"], }); } catch (childProcess) { // Don't stop when push fails. console.error(childProcess.output); error = true; } } } return error; } async function retrieveDocuments() { assert(!options.commit || options.uid, 'Options "commit" & "uid" are incompatible'); const dataDir = options.dataDir; const { documentByUid } = loadAssembleeData(dataDir, EnabledDatasets.DossiersLegislatifs, options.legislature); const documentsDir = path.join(dataDir, "documents_html"); fs.ensureDirSync(documentsDir); if (options.full && !options.uid) { for (const filename of fs.readdirSync(documentsDir)) { if (filename[0] === ".") { continue; } fs.removeSync(path.join(documentsDir, filename)); } } const documents = Object.values(documentByUid) // Ignore documents from Sénat. .filter((document) => document.uid.substring(4, 6) !== "SN") .sort((a, b) => a.uid.localeCompare(b.uid)); const firstUid = options.uid; let skip = !!firstUid; for (const document of documents) { if (skip) { if (document.uid === firstUid) { skip = false; } else { continue; } } const basename = pathFromDocumentUid(documentsDir, document.uid); const fileNotFoundPath = `${basename}.404`; const filePath = `${basename}.html`; if (!options.full && (fs.existsSync(filePath) || fs.existsSync(fileNotFoundPath))) { continue; } // Note: Both URLs should work. // const url = new URL(`https://www.assemblee-nationale.fr/dyn/docs/${document.uid}.raw`) const url = new URL(`https://www.assemblee-nationale.fr/dyn/opendata/${document.uid}.html`); if (!options.silent) { console.log(`Retrieving document ${document.uid} at ${url.toString()}…`); } const response = await fetch(url); const page = await response.text(); if (response.ok) { fs.writeFileSync(filePath, page, { encoding: "utf8", }); fs.removeSync(fileNotFoundPath); } else { if (response.status === 404) { if (!options.silent) { console.warn(`Page "${url}" not found.`); fs.writeFileSync(fileNotFoundPath, `Page "${url}" not found.`, { encoding: "utf8", }); fs.removeSync(filePath); } } else { console.error(`Error while getting page "${url}":\n\nError:\n${JSON.stringify({ code: response.status, message: response.statusText }, null, 2)}`); fs.removeSync(fileNotFoundPath); fs.removeSync(filePath); } } } if (commitAndPush(documentsDir, options.commit, options.remote)) { process.exit(1); } return documents; } retrieveDocuments().catch((error) => { console.log(error); process.exit(1); });