@tricoteuses/senat
Version:
Handle French Sénat's open data
236 lines (235 loc) • 11.1 kB
JavaScript
import assert from "assert";
import commandLineArgs from "command-line-args";
import fs from "fs-extra";
import path from "path";
import { datasets, EnabledDatasets, getEnabledDatasets } from "../datasets";
import { DATA_ORIGINAL_FOLDER, DOCUMENT_METADATA_FILE, DOSLEG_DOSSIERS_FOLDER, RAPPORT_FOLDER, SENS_CIRCONSCRIPTIONS_FOLDER, SENS_ORGANISMES_FOLDER, SENS_SENATEURS_FOLDER, TEXTE_FOLDER, } from "../loaders";
import { findAllAmendements, findAllCirconscriptions, findAllDebats, findAllLois, findAllOrganismes, findAllQuestions, findAllSens, findAuteur, findSenatRapportUrls, findSenatTexteUrls, } from "../model";
import { UNDEFINED_SESSION } from "../types/sessions";
import { getSessionFromDate, getSessionFromSignet } from "./datautil";
import { commonOptions } from "./shared/cli_helpers";
import { ensureAndClearDir } from "./shared/util";
const optionsDefinitions = [...commonOptions];
const options = commandLineArgs(optionsDefinitions);
const SENAT_TEXTE_XML_BASE_URL = "https://www.senat.fr/akomantoso/";
const SENAT_TEXTE_BASE_URL = "https://www.senat.fr/leg/";
const SENAT_EXPOSE_DES_MOTIFS_BASE_URL = "https://www.senat.fr/leg/exposes-des-motifs/";
const SENAT_RAPPORT_BASE_URL = "https://www.senat.fr/rap/";
async function convertData() {
const dataDir = options["dataDir"];
assert(dataDir, "Missing argument: data directory");
const enabledDatasets = getEnabledDatasets(options["categories"]);
console.time("data transformation time");
if (enabledDatasets & EnabledDatasets.Ameli) {
await convertDatasetAmeli(dataDir);
}
if (enabledDatasets & EnabledDatasets.Debats) {
await convertDatasetDebats(dataDir);
}
if (enabledDatasets & EnabledDatasets.DosLeg) {
await convertDatasetDosLeg(dataDir);
}
if (enabledDatasets & EnabledDatasets.Questions) {
await convertDatasetQuestions(dataDir);
}
if (enabledDatasets & EnabledDatasets.Sens) {
await convertDatasetSens(dataDir);
}
if (!options["silent"]) {
console.timeEnd("data transformation time");
}
}
async function convertDatasetAmeli(dataDir) {
const dataset = datasets.ameli;
if (!options["silent"]) {
console.log(`Converting database ${dataset.database} data into files…`);
}
const ameliReorganizedRootDir = path.join(dataDir, dataset.database);
ensureAndClearDir(ameliReorganizedRootDir);
for await (const amendement of findAllAmendements()) {
if (options["verbose"]) {
console.log(`Converting ${amendement.numero} file…`);
}
const session = String(amendement.session) || UNDEFINED_SESSION;
const signetDossierLegislatif = amendement.signet_dossier_legislatif ||
`${amendement.nature_texte}-${amendement.numero_texte}`.toLowerCase();
const ameliReorganizedDir = path.join(ameliReorganizedRootDir, String(session), signetDossierLegislatif);
fs.ensureDirSync(ameliReorganizedDir);
const amendementFileName = `${amendement.numero}.json`;
fs.writeJSONSync(path.join(ameliReorganizedDir, amendementFileName), amendement, { spaces: 2 });
}
}
async function convertDatasetDebats(dataDir) {
const dataset = datasets.debats;
if (!options["silent"]) {
console.log(`Converting database ${dataset.database} data into files…`);
}
const debatsReorganizedRootDir = path.join(dataDir, dataset.database);
ensureAndClearDir(debatsReorganizedRootDir);
for await (const debat of findAllDebats()) {
if (options["verbose"]) {
console.log(`Converting ${debat.id} file…`);
}
const enrichedDebat = await enrichDebat(debat);
const session = getSessionFromDate(enrichedDebat.date_seance);
const debatsReorganizedDir = path.join(debatsReorganizedRootDir, String(session));
fs.ensureDirSync(debatsReorganizedDir);
const debatFileName = `${enrichedDebat.id}.json`;
fs.writeJSONSync(path.join(debatsReorganizedDir, debatFileName), enrichedDebat, { spaces: 2 });
}
}
async function enrichDebat(debat) {
const enrichedDebat = { ...debat };
for (const section of enrichedDebat.sections) {
for (const intervention of section.interventions) {
intervention.auteur = await findAuteur(intervention.auteur_code);
}
}
for (const section of enrichedDebat.sections_divers) {
for (const intervention of section.interventions) {
intervention.auteur = await findAuteur(intervention.auteur_code);
}
}
return enrichedDebat;
}
async function convertDatasetDosLeg(dataDir) {
const dataset = datasets.dosleg;
if (!options["silent"]) {
console.log(`Converting database ${dataset.database} data into files…`);
}
const doslegReorganizedRootDir = path.join(dataDir, dataset.database);
const dossiersReorganizedDir = path.join(doslegReorganizedRootDir, DOSLEG_DOSSIERS_FOLDER);
ensureAndClearDir(doslegReorganizedRootDir);
ensureAndClearDir(dossiersReorganizedDir);
for await (const loi of findAllLois()) {
if (options["verbose"]) {
console.log(`Converting ${loi.signet} file…`);
}
let loiReorganizedDir = path.join(dossiersReorganizedDir, String(UNDEFINED_SESSION));
const session = getSessionFromSignet(loi.signet) || UNDEFINED_SESSION;
loiReorganizedDir = path.join(dossiersReorganizedDir, String(session));
fs.ensureDirSync(loiReorganizedDir);
const loiFileName = `${loi.signet}.json`;
fs.writeJSONSync(path.join(loiReorganizedDir, loiFileName), loi, {
spaces: 2,
});
}
await convertTexteUrls(dataDir);
await convertRapportUrls(dataDir);
}
async function convertDatasetQuestions(dataDir) {
const dataset = datasets.questions;
if (!options["silent"]) {
console.log(`Converting database ${dataset.database} data into files…`);
}
const questionsReorganizedRootDir = path.join(dataDir, dataset.database);
ensureAndClearDir(questionsReorganizedRootDir);
for await (const question of findAllQuestions()) {
if (options["verbose"]) {
console.log(`Converting ${question.reference} file…`);
}
const legislature = question.legislature ? question.legislature : 0;
const questionReorganizedDir = path.join(questionsReorganizedRootDir, String(legislature));
fs.ensureDirSync(questionReorganizedDir);
const questionFileName = `${question.reference}.json`;
fs.writeJSONSync(path.join(questionReorganizedDir, questionFileName), question, { spaces: 2 });
}
}
async function convertTexteUrls(dataDir) {
const textesDir = path.join(dataDir, TEXTE_FOLDER);
fs.ensureDirSync(textesDir);
const originalTextesDir = path.join(textesDir, DATA_ORIGINAL_FOLDER);
for await (const texte of findSenatTexteUrls()) {
const texteName = path.parse(texte.url).name;
const texteDir = path.join(originalTextesDir, `${texte.session ?? UNDEFINED_SESSION}`, texteName);
fs.ensureDirSync(texteDir);
const metadata = {
name: texteName,
session: texte.session,
url_expose_des_motifs: texte.hasExposeDesMotifs
? new URL(`${texteName}-expose.html`, SENAT_EXPOSE_DES_MOTIFS_BASE_URL)
: undefined,
url_xml: new URL(`${texteName}.akn.xml`, SENAT_TEXTE_XML_BASE_URL),
url_html: new URL(`${texteName}.html`, SENAT_TEXTE_BASE_URL),
url_pdf: new URL(`${texteName}.pdf`, SENAT_TEXTE_BASE_URL),
};
fs.writeJSONSync(path.join(texteDir, DOCUMENT_METADATA_FILE), metadata, {
spaces: 2,
});
}
}
async function convertRapportUrls(dataDir) {
const rapportsDir = path.join(dataDir, RAPPORT_FOLDER);
fs.ensureDirSync(rapportsDir);
const originalTextesDir = path.join(rapportsDir, DATA_ORIGINAL_FOLDER);
for await (const rapport of findSenatRapportUrls()) {
const parsedRapportUrl = path.parse(rapport.url);
const rapportName = parsedRapportUrl.name;
const rapportDir = path.join(originalTextesDir, `${rapport.session ?? UNDEFINED_SESSION}`, rapportName);
fs.ensureDirSync(rapportDir);
const rapportHtmlUrlBase = `${rapportName}_mono.html`;
const rapportHtmlUrl = path.format({
dir: parsedRapportUrl.dir,
base: rapportHtmlUrlBase,
});
const rapportPdfUrlBase = `${rapportName}1.pdf`;
const rapportPdfUrl = path.format({
dir: parsedRapportUrl.dir,
base: rapportPdfUrlBase,
});
const metadata = {
name: rapportName,
session: rapport.session,
url_html: new URL(rapportHtmlUrl, SENAT_RAPPORT_BASE_URL),
url_pdf: new URL(rapportPdfUrl, SENAT_RAPPORT_BASE_URL),
};
fs.writeJSONSync(path.join(rapportDir, DOCUMENT_METADATA_FILE), metadata, {
spaces: 2,
});
}
}
async function convertDatasetSens(dataDir) {
const dataset = datasets.sens;
if (!options["silent"]) {
console.log(`Converting database ${dataset.database} data into files…`);
}
const sensReorganizedRootDir = path.join(dataDir, dataset.database);
const senateursReorganizedDir = path.join(sensReorganizedRootDir, SENS_SENATEURS_FOLDER);
const circonscriptionsReorganizedDir = path.join(sensReorganizedRootDir, SENS_CIRCONSCRIPTIONS_FOLDER);
const organismesReorganizedDir = path.join(sensReorganizedRootDir, SENS_ORGANISMES_FOLDER);
ensureAndClearDir(sensReorganizedRootDir);
ensureAndClearDir(senateursReorganizedDir);
ensureAndClearDir(circonscriptionsReorganizedDir);
ensureAndClearDir(organismesReorganizedDir);
for await (const sen of findAllSens()) {
if (options["verbose"]) {
console.log(`Converting ${sen.matricule} file…`);
}
const senFileName = `${sen.matricule}.json`;
fs.writeJSONSync(path.join(senateursReorganizedDir, senFileName), sen, {
spaces: 2,
});
}
for await (const circonscription of findAllCirconscriptions()) {
if (options["verbose"]) {
console.log(`Converting ${circonscription.identifiant} file…`);
}
const circonscriptionFileName = `${circonscription.identifiant}.json`;
fs.writeJSONSync(path.join(circonscriptionsReorganizedDir, circonscriptionFileName), circonscription, { spaces: 2 });
}
for await (const organisme of findAllOrganismes()) {
if (options["verbose"]) {
console.log(`Converting ${organisme.code} file…`);
}
const organismeFileName = `${organisme.code}.json`;
const organismeDir = path.join(organismesReorganizedDir, organisme.type_code);
fs.ensureDirSync(organismeDir);
fs.writeJSONSync(path.join(organismeDir, organismeFileName), organisme, { spaces: 2 });
}
}
convertData()
.then(() => process.exit(0))
.catch((error) => {
console.log(error);
process.exit(1);
});