UNPKG

@tricoteuses/senat

Version:

Handle French Sénat's open data

174 lines (173 loc) 7.66 kB
import fs from "fs"; import path from "path"; import { datasets } from "./datasets"; import { UNDEFINED_SESSION } from "./types/sessions"; export { EnabledDatasets } from "./datasets"; export const AGENDA_FOLDER = "agenda"; export const COMPTES_RENDUS_FOLDER = "seances"; export const DOSLEG_DOSSIERS_FOLDER = "dossiers"; export const RAPPORT_FOLDER = "rap"; export const SENS_CIRCONSCRIPTIONS_FOLDER = "circonscriptions"; export const SENS_ORGANISMES_FOLDER = "organismes"; export const SENS_SENATEURS_FOLDER = "senateurs"; export const TEXTE_FOLDER = "leg"; export const DATA_ORIGINAL_FOLDER = "original"; export const DATA_TRANSFORMED_FOLDER = "transformed"; export const DOCUMENT_METADATA_FILE = "metadata.json"; export function* iterFilePaths(dirPath) { if (dirPath && fs.existsSync(dirPath)) { const files = fs.readdirSync(dirPath, { withFileTypes: true, recursive: true, }); for (const file of files) { if (file.isFile()) { yield path.join(file.parentPath, file.name); } } } } function* iterLoadSenatItems(dataDir, dataName, legislatureOrSession, subDir, { log = false } = {}) { let itemsDir = path.join(dataDir, dataName); if (subDir) { itemsDir = path.join(itemsDir, subDir); } if (legislatureOrSession) { itemsDir = path.join(itemsDir, String(legislatureOrSession)); } for (const filePath of iterFilePaths(itemsDir)) { if (log) { console.log(`Loading file: ${filePath}…`); } const itemJson = fs.readFileSync(filePath, { encoding: "utf8" }); const item = JSON.parse(itemJson); const filePathFromDataset = filePath.substring(filePath.indexOf(dataName) + dataName.length); yield { item, filePathFromDataset, legislature: legislatureOrSession, }; } } export function* iterLoadSenatAmendements(dataDir, session, options = {}) { for (const amendementItem of iterLoadSenatItems(dataDir, datasets.ameli.database, session, undefined, options)) { yield amendementItem; } } export function* iterLoadSenatDebats(dataDir, session, options = {}) { for (const debatItem of iterLoadSenatItems(dataDir, datasets.debats.database, session, undefined, options)) { yield debatItem; } } export function* iterLoadSenatDossiersLegislatifs(dataDir, session, options = {}) { for (const dossierLegislatifItem of iterLoadSenatItems(dataDir, datasets.dosleg.database, session, DOSLEG_DOSSIERS_FOLDER, options)) { yield dossierLegislatifItem; } } export function* iterLoadSenatDossiersLegislatifsRapportUrls(dataDir, session) { let itemsDir = path.join(dataDir, RAPPORT_FOLDER, DATA_ORIGINAL_FOLDER); if (session) { itemsDir = path.join(itemsDir, session.toString()); } for (const filePath of iterFilePaths(itemsDir)) { const parsedFilePath = path.parse(filePath); if (parsedFilePath.base === DOCUMENT_METADATA_FILE) { const itemJson = fs.readFileSync(filePath, { encoding: "utf8" }); const item = JSON.parse(itemJson); yield { item, }; } } } export function* iterLoadSenatDossiersLegislatifsTexteUrls(dataDir, session) { let itemsDir = path.join(dataDir, TEXTE_FOLDER, DATA_ORIGINAL_FOLDER); if (session) { itemsDir = path.join(itemsDir, session.toString()); } for (const filePath of iterFilePaths(itemsDir)) { const parsedFilePath = path.parse(filePath); if (parsedFilePath.base === DOCUMENT_METADATA_FILE) { const itemJson = fs.readFileSync(filePath, { encoding: "utf8" }); const item = JSON.parse(itemJson); yield { item, }; } } } export function* iterLoadSenatDossiersLegislatifsDocuments(dataDir, session, documentType, options = {}) { for (const { item: dossierLegislatif } of iterLoadSenatDossiersLegislatifs(dataDir, session, options)) { for (const lecture of dossierLegislatif.lectures) { const lecturesSenat = lecture.lectures_assemblee.filter((lectureAssemblee) => lectureAssemblee.assemblee === "Sénat"); for (const lectureSenat of lecturesSenat) { for (const document of lectureSenat[documentType]) { const enrichedDocument = { signet_dossier: dossierLegislatif.signet, url_dossier_senat: dossierLegislatif.url, url_dossier_assemblee_nationale: dossierLegislatif.url_dossier_assemblee_nationale, type_lecture: lecture.type_lecture, libelle_lecture: lecture.libelle, libelle_organisme: lectureSenat.libelle_organisme, ...document, }; const documentItem = { item: enrichedDocument, }; if (document.url) { const documentName = path.parse(document.url).name; documentItem.filePathFromDataset = path.join(`${document.session ?? UNDEFINED_SESSION}`, documentName, `${documentName}.pdf`); } yield documentItem; } } } } } export function* iterLoadSenatDossiersLegislatifsRapports(dataDir, session, options = {}) { for (const iterItem of iterLoadSenatDossiersLegislatifsDocuments(dataDir, session, "rapports", options)) { yield iterItem; } } export function* iterLoadSenatDossiersLegislatifsTextes(dataDir, session, options = {}) { for (const iterItem of iterLoadSenatDossiersLegislatifsDocuments(dataDir, session, "textes", options)) { yield iterItem; } } export function loadSenatTexteContent(dataDir, textePathFromDataset) { const parsedTextePath = path.parse(textePathFromDataset); const jsonTexteName = `${parsedTextePath.name}.json`; const fullTextePath = path.join(dataDir, TEXTE_FOLDER, DATA_TRANSFORMED_FOLDER, parsedTextePath.dir, jsonTexteName); if (!fs.existsSync(fullTextePath)) { return { item: null }; } const texteJson = fs.readFileSync(fullTextePath, { encoding: "utf8" }); return { item: JSON.parse(texteJson) }; } export function* iterLoadSenatEvenements(dataDir, session, options = {}) { for (const evenementsItem of iterLoadSenatItems(dataDir, AGENDA_FOLDER, session, DATA_TRANSFORMED_FOLDER, options)) { for (const evenement of evenementsItem.item) { yield { item: evenement }; } } } export function* iterLoadSenatCirconscriptions(dataDir, options = {}) { for (const circonscriptionItem of iterLoadSenatItems(dataDir, datasets.sens.database, undefined, SENS_CIRCONSCRIPTIONS_FOLDER, options)) { yield circonscriptionItem; } } export function* iterLoadSenatOrganismes(dataDir, options = {}) { for (const organismeItem of iterLoadSenatItems(dataDir, datasets.sens.database, undefined, SENS_ORGANISMES_FOLDER, options)) { yield organismeItem; } } export function* iterLoadSenatSenateurs(dataDir, options = {}) { for (const senateurItem of iterLoadSenatItems(dataDir, datasets.sens.database, undefined, SENS_SENATEURS_FOLDER, options)) { yield senateurItem; } } export function* iterLoadSenatQuestions(dataDir, legislature, options = {}) { for (const questionItem of iterLoadSenatItems(dataDir, datasets.questions.database, legislature, undefined, options)) { yield questionItem; } }