@tricoteuses/senat
Version:
Handle French Sénat's open data
109 lines (108 loc) • 4.37 kB
JavaScript
import assert from "assert";
import commandLineArgs from "command-line-args";
import fs from "fs-extra";
import { DateTime } from "luxon";
import path from "path";
import { AGENDA_FOLDER, DATA_ORIGINAL_FOLDER, DATA_TRANSFORMED_FOLDER } from "../loaders";
import { parseAgendaFromFile } from "../model/agenda";
import { getSessionsFromStart } from "../types/sessions";
import { ID_DATE_FORMAT } from "./datautil";
import { commonOptions } from "./shared/cli_helpers";
import { ensureAndClearDir } from "./shared/util";
const optionsDefinitions = [
...commonOptions,
{
help: "parse and convert agenda events into JSON",
name: "parseAgenda",
type: Boolean,
},
];
const options = commandLineArgs(optionsDefinitions);
const SENAT_GLOBAL_AGENDA_URL_ROOT = "https://www.senat.fr/aglae/Global";
const EVENT_DATE_FORMAT = "ddMMyyyy";
class AgendaError extends Error {
constructor(message, agendaName) {
super(`An error occurred while retrieving Agenda ${agendaName}: ${message}`);
}
}
async function retrieveAgendas(dataDir, sessions) {
const agendaRootDir = path.join(dataDir, AGENDA_FOLDER);
ensureAndClearDir(agendaRootDir);
const originalAgendaDir = path.join(agendaRootDir, DATA_ORIGINAL_FOLDER);
fs.ensureDirSync(originalAgendaDir);
const transformedAgendaDir = path.join(agendaRootDir, DATA_TRANSFORMED_FOLDER);
if (options["parseAgenda"]) {
fs.ensureDirSync(transformedAgendaDir);
}
for (const session of sessions) {
const originalAgendaSessionDir = path.join(originalAgendaDir, `${session}`);
fs.ensureDirSync(originalAgendaSessionDir);
const transformedAgendaSessionDir = path.join(transformedAgendaDir, `${session}`);
if (options["parseAgenda"]) {
fs.ensureDirSync(transformedAgendaSessionDir);
}
const fifteenDaysFromNow = new Date();
fifteenDaysFromNow.setDate(fifteenDaysFromNow.getDate() + 15);
for (const date = new Date(session, 9, 1); date <= new Date(session + 1, 8, 30) && date <= fifteenDaysFromNow; date.setDate(date.getDate() + 1)) {
const agendaName = DateTime.fromJSDate(date).toFormat(EVENT_DATE_FORMAT);
const agendaFileName = DateTime.fromJSDate(date).toFormat(ID_DATE_FORMAT);
const agendaPath = path.join(originalAgendaSessionDir, `${agendaFileName}.html`);
try {
await downloadAgenda(agendaName, agendaPath);
if (options["parseAgenda"]) {
await parseAgenda(transformedAgendaSessionDir, agendaFileName, agendaPath);
}
}
catch (error) {
console.error(error);
}
}
}
}
async function downloadAgenda(agendaName, agendaPath) {
const agendaUrl = `${SENAT_GLOBAL_AGENDA_URL_ROOT}/agl${agendaName}.html`;
if (!options["silent"]) {
console.log(`Downloading Agenda ${agendaUrl}…`);
}
const response = await fetch(agendaUrl);
if (!response.ok) {
if (response.status === 404) {
console.warn(`Agenda ${agendaUrl} not found`);
}
else {
throw new AgendaError(String(response.status), agendaName);
}
return;
}
const agendaContent = await response.arrayBuffer();
if (!agendaContent) {
return;
}
fs.writeFileSync(agendaPath, Buffer.from(agendaContent));
}
async function parseAgenda(transformedAgendaSessionDir, agendaFileName, agendaPath) {
if (!options["silent"]) {
console.log(`Parsing Agenda ${agendaPath}…`);
}
const parsedAgendaEvents = await parseAgendaFromFile(agendaPath);
if (!parsedAgendaEvents || parsedAgendaEvents.length === 0) {
return;
}
fs.writeJSONSync(path.join(transformedAgendaSessionDir, `${agendaFileName}.json`), parsedAgendaEvents, { spaces: 2 });
}
async function main() {
const dataDir = options["dataDir"];
assert(dataDir, "Missing argument: data directory");
const sessions = getSessionsFromStart(options["fromSession"]);
console.time("agenda processing time");
await retrieveAgendas(dataDir, sessions);
if (!options["silent"]) {
console.timeEnd("agenda processing time");
}
}
main()
.then(() => process.exit(0))
.catch((error) => {
console.log(error);
process.exit(1);
});