UNPKG

@tricoteuses/senat

Version:

Handle French Sénat's open data

78 lines (77 loc) 2.52 kB
import assert from "assert"; import commandLineArgs from "command-line-args"; import fetch from "cross-fetch"; import fs from "fs-extra"; import path from "path"; import { texteFieldsToParseInt, texteFieldsToTrim } from "../types/dosleg"; import { checkDatabase } from "../databases"; import { parseIntFields, trimFieldsRight } from "../fields"; const optionsDefinitions = [ { alias: "s", help: "don't log anything", name: "silent", type: Boolean, }, { alias: "v", help: "verbose logs", name: "verbose", type: Boolean, }, { defaultOption: true, help: "directory containing Sénat open data files", name: "dataDir", type: String, }, ]; const options = commandLineArgs(optionsDefinitions); async function main() { const dataDir = options.dataDir; assert(dataDir, "Missing argument: data directory"); const db = await checkDatabase("dosleg"); const legDir = path.join(dataDir, "leg"); fs.ensureDirSync(legDir); const textes = (await db.any(` SELECT * FROM texte `)).map((texte) => parseIntFields(texteFieldsToParseInt, trimFieldsRight(texteFieldsToTrim, texte))); for (const texte of textes) { if (texte.texurl === null) { continue; } if (texte.typurl !== "I") { // This is not an "I"nternal URL. continue; } if (texte.texurl.endsWith(".pdf")) { continue; } const url = new URL(texte.texurl, "http://www.senat.fr/leg/").toString(); if (!options.silent) { console.log(`Retrieving page ${url}…`); } const response = await fetch(url); const page = await response.text(); if (!response.ok) { if (response.status === 404) { console.log(`Page ${url} not found`); } else { console.error(`An error occurred while retrieving page ${url}: ${response.status} ${response.statusText}\n${page}`); } continue; } const pathParsed = path.parse(path.join(legDir, ...texte.texurl.split("/").filter(Boolean))); const pageDir = path.join(pathParsed.dir, pathParsed.name); fs.ensureDirSync(pageDir); fs.writeFileSync(path.join(pageDir, "index" + pathParsed.ext), page); } } main() .then(() => process.exit(0)) .catch((error) => { console.log(error); process.exit(1); });