UNPKG

@tricoteuses/senat

Version:

Handle French Sénat's open data

109 lines (108 loc) 4.37 kB
import assert from "assert"; import commandLineArgs from "command-line-args"; import fs from "fs-extra"; import { DateTime } from "luxon"; import path from "path"; import { AGENDA_FOLDER, DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders"; import { parseAgendaFromFile } from "../model/agenda"; import { getSessionsFromStart } from "../types/sessions"; import { ID_DATE_FORMAT } from "./datautil"; import { commonOptions } from "./shared/cli_helpers"; import { ensureAndClearDir } from "./shared/util"; const optionsDefinitions = [ ...commonOptions, { help: "parse and convert agenda events into JSON", name: "parseAgenda", type: Boolean, }, ]; const options = commandLineArgs(optionsDefinitions); const SENAT_GLOBAL_AGENDA_URL_ROOT = "https://www.senat.fr/aglae/Global"; const EVENT_DATE_FORMAT = "ddMMyyyy"; class AgendaError extends Error { constructor(message, agendaName) { super(`An error occurred while retrieving Agenda ${agendaName}: ${message}`); } } async function retrieveAgendas(dataDir, sessions) { const agendaRootDir = path.join(dataDir, AGENDA_FOLDER); ensureAndClearDir(agendaRootDir); const originalAgendaDir = path.join(agendaRootDir, DATA_ORIGINAL_FOLDER); fs.ensureDirSync(originalAgendaDir); const transformedAgendaDir = path.join(agendaRootDir, DATA_TRANSFORMED_FOLDER); if (options["parseAgenda"]) { fs.ensureDirSync(transformedAgendaDir); } for (const session of sessions) { const originalAgendaSessionDir = path.join(originalAgendaDir, `${session}`); fs.ensureDirSync(originalAgendaSessionDir); const transformedAgendaSessionDir = path.join(transformedAgendaDir, `${session}`); if (options["parseAgenda"]) { fs.ensureDirSync(transformedAgendaSessionDir); } const fifteenDaysFromNow = new Date(); fifteenDaysFromNow.setDate(fifteenDaysFromNow.getDate() + 15); for (const date = new Date(session, 9, 1); date <= new Date(session + 1, 8, 30) && date <= fifteenDaysFromNow; date.setDate(date.getDate() + 1)) { const agendaName = DateTime.fromJSDate(date).toFormat(EVENT_DATE_FORMAT); const agendaFileName = DateTime.fromJSDate(date).toFormat(ID_DATE_FORMAT); const agendaPath = path.join(originalAgendaSessionDir, `${agendaFileName}.html`); try { await downloadAgenda(agendaName, agendaPath); if (options["parseAgenda"]) { await parseAgenda(transformedAgendaSessionDir, agendaFileName, agendaPath); } } catch (error) { console.error(error); } } } } async function downloadAgenda(agendaName, agendaPath) { const agendaUrl = `${SENAT_GLOBAL_AGENDA_URL_ROOT}/agl${agendaName}.html`; if (!options["silent"]) { console.log(`Downloading Agenda ${agendaUrl}…`); } const response = await fetch(agendaUrl); if (!response.ok) { if (response.status === 404) { console.warn(`Agenda ${agendaUrl} not found`); } else { throw new AgendaError(String(response.status), agendaName); } return; } const agendaContent = await response.arrayBuffer(); if (!agendaContent) { return; } fs.writeFileSync(agendaPath, Buffer.from(agendaContent)); } async function parseAgenda(transformedAgendaSessionDir, agendaFileName, agendaPath) { if (!options["silent"]) { console.log(`Parsing Agenda ${agendaPath}…`); } const parsedAgendaEvents = await parseAgendaFromFile(agendaPath); if (!parsedAgendaEvents || parsedAgendaEvents.length === 0) { return; } fs.writeJSONSync(path.join(transformedAgendaSessionDir, `${agendaFileName}.json`), parsedAgendaEvents, { spaces: 2 }); } async function main() { const dataDir = options["dataDir"]; assert(dataDir, "Missing argument: data directory"); const sessions = getSessionsFromStart(options["fromSession"]); console.time("agenda processing time"); await retrieveAgendas(dataDir, sessions); if (!options["silent"]) { console.timeEnd("agenda processing time"); } } main() .then(() => process.exit(0)) .catch((error) => { console.log(error); process.exit(1); });