UNPKG

@tricoteuses/senat

Version:

Handle French Sénat's open data

96 lines (89 loc) 2.37 kB
import assert from "assert" import commandLineArgs from "command-line-args" import fetch from "cross-fetch" import fs from "fs-extra" import path from "path" import { Texte, texteFieldsToParseInt, texteFieldsToTrim } from "../types/dosleg" import { checkDatabase } from "../databases" import { parseIntFields, trimFieldsRight } from "../fields" const optionsDefinitions = [ { alias: "s", help: "don't log anything", name: "silent", type: Boolean, }, { alias: "v", help: "verbose logs", name: "verbose", type: Boolean, }, { defaultOption: true, help: "directory containing Sénat open data files", name: "dataDir", type: String, }, ] const options = commandLineArgs(optionsDefinitions) async function main() { const dataDir = options.dataDir assert(dataDir, "Missing argument: data directory") const db = await checkDatabase("dosleg") const legDir = path.join(dataDir, "leg") fs.ensureDirSync(legDir) const textes: Texte[] = ( await db.any( ` SELECT * FROM texte `, ) ).map((texte: Texte) => parseIntFields( texteFieldsToParseInt, trimFieldsRight(texteFieldsToTrim, texte), ), ) for (const texte of textes) { if (texte.texurl === null) { continue } if (texte.typurl !== "I") { // This is not an "I"nternal URL. continue } if (texte.texurl.endsWith(".pdf")) { continue } const url = new URL(texte.texurl, "http://www.senat.fr/leg/").toString() if (!options.silent) { console.log(`Retrieving page ${url}…`) } const response = await fetch(url) const page = await response.text() if (!response.ok) { if (response.status === 404) { console.log(`Page ${url} not found`) } else { console.error( `An error occurred while retrieving page ${url}: ${response.status} ${response.statusText}\n${page}`, ) } continue } const pathParsed = path.parse( path.join(legDir, ...texte.texurl.split("/").filter(Boolean)), ) const pageDir = path.join(pathParsed.dir, pathParsed.name) fs.ensureDirSync(pageDir) fs.writeFileSync(path.join(pageDir, "index" + pathParsed.ext), page) } } main() .then(() => process.exit(0)) .catch((error) => { console.log(error) process.exit(1) })