@tricoteuses/senat
Version:
Handle French Sénat's open data
78 lines (77 loc) • 2.52 kB
JavaScript
import assert from "assert";
import commandLineArgs from "command-line-args";
import fetch from "cross-fetch";
import fs from "fs-extra";
import path from "path";
import { texteFieldsToParseInt, texteFieldsToTrim } from "../types/dosleg";
import { checkDatabase } from "../databases";
import { parseIntFields, trimFieldsRight } from "../fields";
const optionsDefinitions = [
{
alias: "s",
help: "don't log anything",
name: "silent",
type: Boolean,
},
{
alias: "v",
help: "verbose logs",
name: "verbose",
type: Boolean,
},
{
defaultOption: true,
help: "directory containing Sénat open data files",
name: "dataDir",
type: String,
},
];
const options = commandLineArgs(optionsDefinitions);
async function main() {
const dataDir = options.dataDir;
assert(dataDir, "Missing argument: data directory");
const db = await checkDatabase("dosleg");
const legDir = path.join(dataDir, "leg");
fs.ensureDirSync(legDir);
const textes = (await db.any(`
SELECT *
FROM texte
`)).map((texte) => parseIntFields(texteFieldsToParseInt, trimFieldsRight(texteFieldsToTrim, texte)));
for (const texte of textes) {
if (texte.texurl === null) {
continue;
}
if (texte.typurl !== "I") {
// This is not an "I"nternal URL.
continue;
}
if (texte.texurl.endsWith(".pdf")) {
continue;
}
const url = new URL(texte.texurl, "http://www.senat.fr/leg/").toString();
if (!options.silent) {
console.log(`Retrieving page ${url}…`);
}
const response = await fetch(url);
const page = await response.text();
if (!response.ok) {
if (response.status === 404) {
console.log(`Page ${url} not found`);
}
else {
console.error(`An error occurred while retrieving page ${url}: ${response.status} ${response.statusText}\n${page}`);
}
continue;
}
const pathParsed = path.parse(path.join(legDir, ...texte.texurl.split("/").filter(Boolean)));
const pageDir = path.join(pathParsed.dir, pathParsed.name);
fs.ensureDirSync(pageDir);
fs.writeFileSync(path.join(pageDir, "index" + pathParsed.ext), page);
}
}
main()
.then(() => process.exit(0))
.catch((error) => {
console.log(error);
process.exit(1);
});