UNPKG

@tricoteuses/senat

Version:

Handle French Sénat's open data

236 lines (235 loc) 11.1 kB
import assert from "assert"; import commandLineArgs from "command-line-args"; import fs from "fs-extra"; import path from "path"; import { datasets, EnabledDatasets, getEnabledDatasets } from "../datasets"; import { DATA_ORIGINAL_FOLDER, DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER, } from "../loaders"; import { findAllAmendements, findAllCirconscriptions, findAllDebats, findAllLois, findAllOrganismes, findAllQuestions, findAllSens, findAuteur, findSenatRapportUrls, findSenatTexteUrls, } from "../model"; import { UNDEFINED_SESSION } from "../types/sessions"; import { getSessionFromDate, getSessionFromSignet } from "./datautil"; import { commonOptions } from "./shared/cli_helpers"; import { ensureAndClearDir } from "./shared/util"; const optionsDefinitions = [...commonOptions]; const options = commandLineArgs(optionsDefinitions); const SENAT_TEXTE_XML_BASE_URL = "https://www.senat.fr/akomantoso/"; const SENAT_TEXTE_BASE_URL = "https://www.senat.fr/leg/"; const SENAT_EXPOSE_DES_MOTIFS_BASE_URL = "https://www.senat.fr/leg/exposes-des-motifs/"; const SENAT_RAPPORT_BASE_URL = "https://www.senat.fr/rap/"; async function convertData() { const dataDir = options["dataDir"]; assert(dataDir, "Missing argument: data directory"); const enabledDatasets = getEnabledDatasets(options["categories"]); console.time("data transformation time"); if (enabledDatasets & EnabledDatasets.Ameli) { await convertDatasetAmeli(dataDir); } if (enabledDatasets & EnabledDatasets.Debats) { await convertDatasetDebats(dataDir); } if (enabledDatasets & EnabledDatasets.DosLeg) { await convertDatasetDosLeg(dataDir); } if (enabledDatasets & EnabledDatasets.Questions) { await convertDatasetQuestions(dataDir); } if (enabledDatasets & EnabledDatasets.Sens) { await convertDatasetSens(dataDir); } if (!options["silent"]) { console.timeEnd("data transformation time"); } } async function convertDatasetAmeli(dataDir) { const dataset = datasets.ameli; if (!options["silent"]) { console.log(`Converting database ${dataset.database} data into files…`); } const ameliReorganizedRootDir = path.join(dataDir, dataset.database); ensureAndClearDir(ameliReorganizedRootDir); for await (const amendement of findAllAmendements()) { if (options["verbose"]) { console.log(`Converting ${amendement.numero} file…`); } const session = String(amendement.session) || UNDEFINED_SESSION; const signetDossierLegislatif = amendement.signet_dossier_legislatif || `${amendement.nature_texte}-${amendement.numero_texte}`.toLowerCase(); const ameliReorganizedDir = path.join(ameliReorganizedRootDir, String(session), signetDossierLegislatif); fs.ensureDirSync(ameliReorganizedDir); const amendementFileName = `${amendement.numero}.json`; fs.writeJSONSync(path.join(ameliReorganizedDir, amendementFileName), amendement, { spaces: 2 }); } } async function convertDatasetDebats(dataDir) { const dataset = datasets.debats; if (!options["silent"]) { console.log(`Converting database ${dataset.database} data into files…`); } const debatsReorganizedRootDir = path.join(dataDir, dataset.database); ensureAndClearDir(debatsReorganizedRootDir); for await (const debat of findAllDebats()) { if (options["verbose"]) { console.log(`Converting ${debat.id} file…`); } const enrichedDebat = await enrichDebat(debat); const session = getSessionFromDate(enrichedDebat.date_seance); const debatsReorganizedDir = path.join(debatsReorganizedRootDir, String(session)); fs.ensureDirSync(debatsReorganizedDir); const debatFileName = `${enrichedDebat.id}.json`; fs.writeJSONSync(path.join(debatsReorganizedDir, debatFileName), enrichedDebat, { spaces: 2 }); } } async function enrichDebat(debat) { const enrichedDebat = { ...debat }; for (const section of enrichedDebat.sections) { for (const intervention of section.interventions) { intervention.auteur = await findAuteur(intervention.auteur_code); } } for (const section of enrichedDebat.sections_divers) { for (const intervention of section.interventions) { intervention.auteur = await findAuteur(intervention.auteur_code); } } return enrichedDebat; } async function convertDatasetDosLeg(dataDir) { const dataset = datasets.dosleg; if (!options["silent"]) { console.log(`Converting database ${dataset.database} data into files…`); } const doslegReorganizedRootDir = path.join(dataDir, dataset.database); const dossiersReorganizedDir = path.join(doslegReorganizedRootDir, DOSLEG_DOSSIERS_FOLDER); ensureAndClearDir(doslegReorganizedRootDir); ensureAndClearDir(dossiersReorganizedDir); for await (const loi of findAllLois()) { if (options["verbose"]) { console.log(`Converting ${loi.signet} file…`); } let loiReorganizedDir = path.join(dossiersReorganizedDir, String(UNDEFINED_SESSION)); const session = getSessionFromSignet(loi.signet) || UNDEFINED_SESSION; loiReorganizedDir = path.join(dossiersReorganizedDir, String(session)); fs.ensureDirSync(loiReorganizedDir); const loiFileName = `${loi.signet}.json`; fs.writeJSONSync(path.join(loiReorganizedDir, loiFileName), loi, { spaces: 2, }); } await convertTexteUrls(dataDir); await convertRapportUrls(dataDir); } async function convertDatasetQuestions(dataDir) { const dataset = datasets.questions; if (!options["silent"]) { console.log(`Converting database ${dataset.database} data into files…`); } const questionsReorganizedRootDir = path.join(dataDir, dataset.database); ensureAndClearDir(questionsReorganizedRootDir); for await (const question of findAllQuestions()) { if (options["verbose"]) { console.log(`Converting ${question.reference} file…`); } const legislature = question.legislature ? question.legislature : 0; const questionReorganizedDir = path.join(questionsReorganizedRootDir, String(legislature)); fs.ensureDirSync(questionReorganizedDir); const questionFileName = `${question.reference}.json`; fs.writeJSONSync(path.join(questionReorganizedDir, questionFileName), question, { spaces: 2 }); } } async function convertTexteUrls(dataDir) { const textesDir = path.join(dataDir, TEXTE_FOLDER); fs.ensureDirSync(textesDir); const originalTextesDir = path.join(textesDir, DATA_ORIGINAL_FOLDER); for await (const texte of findSenatTexteUrls()) { const texteName = path.parse(texte.url).name; const texteDir = path.join(originalTextesDir, `${texte.session ?? UNDEFINED_SESSION}`, texteName); fs.ensureDirSync(texteDir); const metadata = { name: texteName, session: texte.session, url_expose_des_motifs: texte.hasExposeDesMotifs ? new URL(`${texteName}-expose.html`, SENAT_EXPOSE_DES_MOTIFS_BASE_URL) : undefined, url_xml: new URL(`${texteName}.akn.xml`, SENAT_TEXTE_XML_BASE_URL), url_html: new URL(`${texteName}.html`, SENAT_TEXTE_BASE_URL), url_pdf: new URL(`${texteName}.pdf`, SENAT_TEXTE_BASE_URL), }; fs.writeJSONSync(path.join(texteDir, DOCUMENT_METADATA_FILE), metadata, { spaces: 2, }); } } async function convertRapportUrls(dataDir) { const rapportsDir = path.join(dataDir, RAPPORT_FOLDER); fs.ensureDirSync(rapportsDir); const originalTextesDir = path.join(rapportsDir, DATA_ORIGINAL_FOLDER); for await (const rapport of findSenatRapportUrls()) { const parsedRapportUrl = path.parse(rapport.url); const rapportName = parsedRapportUrl.name; const rapportDir = path.join(originalTextesDir, `${rapport.session ?? UNDEFINED_SESSION}`, rapportName); fs.ensureDirSync(rapportDir); const rapportHtmlUrlBase = `${rapportName}_mono.html`; const rapportHtmlUrl = path.format({ dir: parsedRapportUrl.dir, base: rapportHtmlUrlBase, }); const rapportPdfUrlBase = `${rapportName}1.pdf`; const rapportPdfUrl = path.format({ dir: parsedRapportUrl.dir, base: rapportPdfUrlBase, }); const metadata = { name: rapportName, session: rapport.session, url_html: new URL(rapportHtmlUrl, SENAT_RAPPORT_BASE_URL), url_pdf: new URL(rapportPdfUrl, SENAT_RAPPORT_BASE_URL), }; fs.writeJSONSync(path.join(rapportDir, DOCUMENT_METADATA_FILE), metadata, { spaces: 2, }); } } async function convertDatasetSens(dataDir) { const dataset = datasets.sens; if (!options["silent"]) { console.log(`Converting database ${dataset.database} data into files…`); } const sensReorganizedRootDir = path.join(dataDir, dataset.database); const senateursReorganizedDir = path.join(sensReorganizedRootDir, SENS_SENATEURS_FOLDER); const circonscriptionsReorganizedDir = path.join(sensReorganizedRootDir, SENS_CIRCONSCRIPTIONS_FOLDER); const organismesReorganizedDir = path.join(sensReorganizedRootDir, SENS_ORGANISMES_FOLDER); ensureAndClearDir(sensReorganizedRootDir); ensureAndClearDir(senateursReorganizedDir); ensureAndClearDir(circonscriptionsReorganizedDir); ensureAndClearDir(organismesReorganizedDir); for await (const sen of findAllSens()) { if (options["verbose"]) { console.log(`Converting ${sen.matricule} file…`); } const senFileName = `${sen.matricule}.json`; fs.writeJSONSync(path.join(senateursReorganizedDir, senFileName), sen, { spaces: 2, }); } for await (const circonscription of findAllCirconscriptions()) { if (options["verbose"]) { console.log(`Converting ${circonscription.identifiant} file…`); } const circonscriptionFileName = `${circonscription.identifiant}.json`; fs.writeJSONSync(path.join(circonscriptionsReorganizedDir, circonscriptionFileName), circonscription, { spaces: 2 }); } for await (const organisme of findAllOrganismes()) { if (options["verbose"]) { console.log(`Converting ${organisme.code} file…`); } const organismeFileName = `${organisme.code}.json`; const organismeDir = path.join(organismesReorganizedDir, organisme.type_code); fs.ensureDirSync(organismeDir); fs.writeJSONSync(path.join(organismeDir, organismeFileName), organisme, { spaces: 2 }); } } convertData() .then(() => process.exit(0)) .catch((error) => { console.log(error); process.exit(1); });