@tricoteuses/senat
Version:
Handle French Sénat's open data
96 lines (89 loc) • 2.37 kB
text/typescript
import assert from "assert"
import commandLineArgs from "command-line-args"
import fetch from "cross-fetch"
import fs from "fs-extra"
import path from "path"
import { Texte, texteFieldsToParseInt, texteFieldsToTrim } from "../types/dosleg"
import { checkDatabase } from "../databases"
import { parseIntFields, trimFieldsRight } from "../fields"
const optionsDefinitions = [
{
alias: "s",
help: "don't log anything",
name: "silent",
type: Boolean,
},
{
alias: "v",
help: "verbose logs",
name: "verbose",
type: Boolean,
},
{
defaultOption: true,
help: "directory containing Sénat open data files",
name: "dataDir",
type: String,
},
]
const options = commandLineArgs(optionsDefinitions)
async function main() {
const dataDir = options.dataDir
assert(dataDir, "Missing argument: data directory")
const db = await checkDatabase("dosleg")
const legDir = path.join(dataDir, "leg")
fs.ensureDirSync(legDir)
const textes: Texte[] = (
await db.any(
`
SELECT *
FROM texte
`,
)
).map((texte: Texte) =>
parseIntFields(
texteFieldsToParseInt,
trimFieldsRight(texteFieldsToTrim, texte),
),
)
for (const texte of textes) {
if (texte.texurl === null) {
continue
}
if (texte.typurl !== "I") {
// This is not an "I"nternal URL.
continue
}
if (texte.texurl.endsWith(".pdf")) {
continue
}
const url = new URL(texte.texurl, "http://www.senat.fr/leg/").toString()
if (!options.silent) {
console.log(`Retrieving page ${url}…`)
}
const response = await fetch(url)
const page = await response.text()
if (!response.ok) {
if (response.status === 404) {
console.log(`Page ${url} not found`)
} else {
console.error(
`An error occurred while retrieving page ${url}: ${response.status} ${response.statusText}\n${page}`,
)
}
continue
}
const pathParsed = path.parse(
path.join(legDir, ...texte.texurl.split("/").filter(Boolean)),
)
const pageDir = path.join(pathParsed.dir, pathParsed.name)
fs.ensureDirSync(pageDir)
fs.writeFileSync(path.join(pageDir, "index" + pathParsed.ext), page)
}
}
main()
.then(() => process.exit(0))
.catch((error) => {
console.log(error)
process.exit(1)
})