@rr0/cms
Version:
RR0 Content Management System (CMS)
144 lines (143 loc) • 5.86 kB
JavaScript
import { JSDOM } from "jsdom";
import { HttpSource } from "../HttpSource.js";
import { ObjectUtil, UrlUtil } from "../../../util/index.js";
import { TimeTextBuilder } from "../../text/TimeTextBuilder.js";
import { MessageUtils } from "../../../lang/index.js";
import { UrecatDatasource } from "./UrecatDatasource.js";
import { Level2Date as EdtfDate } from "@rr0/time";
export class UrecatHttpDatasource extends UrecatDatasource {
constructor(baseUrl, searchPath = "ce3") {
super(["Gross, Patrick"], "URECAT");
this.baseUrl = baseUrl;
this.searchPath = searchPath;
this.http = new HttpSource();
this.intlOptions = {
year: "numeric",
month: "long",
day: "numeric",
weekday: "long",
hour: "2-digit",
minute: "2-digit",
timeZoneName: "short"
};
}
queryUrl(context) {
var _a;
const time = context.time;
const day = time.getDayOfMonth();
const month = time.getMonth();
const year = (_a = time.getYear()) !== null && _a !== void 0 ? _a : "full";
const lang = context.locale === "fr" ? "f" : "";
const requestUrl = UrlUtil.join(this.searchPath, `_${year}${lang}.htm`);
return new URL(requestUrl, this.baseUrl);
}
getWitnesses(witnessesStr) {
let lastName = "";
const andNames = witnessesStr
.split(" et ")
.flatMap(and => and.split(","))
.map(m => m.trim())
.map(m => m.replace(/^ses /, ""))
.flatMap(name => {
const lowName = name.toLowerCase();
const countEntry = Object.entries(UrecatHttpDatasource.wordToCount).find(entry => {
if (lowName.startsWith(entry[0] + " ")) {
return entry;
}
});
if (countEntry && countEntry[1] > 1) {
const oneName = lowName.substring(countEntry[0].length + 1).slice(0, -1);
const witnessesNames = [];
for (let i = 1; i <= countEntry[1]; i++) {
witnessesNames.push(oneName + " " + i);
}
return witnessesNames;
}
else {
const names = name.split(" ");
if (names.length > 1) {
lastName = names[names.length - 1];
}
return name;
}
});
return andNames.map(name => {
if (name.indexOf(" ") < 0 && name.indexOf("'") < 0) {
name = name + (lastName ? " " + lastName : "");
}
return { name };
});
}
async readCases(context) {
const searchUrl = this.queryUrl(context);
const page = await this.http.fetch(searchUrl, { headers: { accept: "text/html;charset=iso-8859-1" } });
const doc = new JSDOM(page).window.document.documentElement;
const tableBody = doc.querySelector("th").parentElement.parentElement;
const rowEls = tableBody.querySelectorAll("tr");
const rows = Array.from(rowEls);
rows.shift();
return Array.from(rows).map(row => this.getFromRow(context, row));
}
getLocation(column) {
let [placeName, departmentOrState, country] = column.textContent.split(",").map(s => s.trim());
if (!country) {
country = departmentOrState;
departmentOrState = undefined;
}
return { placeName, country, departmentOrState };
}
getDate(context, caseLink, row) {
const timeStr = caseLink.pathname.substring(this.searchPath.length + 2);
const dateFields = UrecatHttpDatasource.urlDateFormat.exec(timeStr);
const itemContext = context.clone();
const timeContext = itemContext.time;
const monthField = dateFields[2];
const dayOfMonth = dateFields[3];
timeContext.date = new EdtfDate({
year: parseInt(dateFields[1], 10),
month: monthField ? parseInt(monthField, 10) : undefined,
day: dayOfMonth ? dayOfMonth !== "00" ? parseInt(dayOfMonth, 10) : undefined : undefined
});
return itemContext;
}
getLink(linkCol) {
const caseLink = linkCol.firstElementChild;
return new URL(UrlUtil.join(this.searchPath, caseLink.href), this.baseUrl);
}
getFromRow(context, row) {
const columns = row.querySelectorAll("td");
const url = this.getLink(columns[1]);
const caseContext = this.getDate(context, url, row);
const { placeName, departmentOrState, country } = this.getLocation(columns[1]);
const witnesses = this.getWitnesses(columns[2].textContent);
const timeStr = new TimeTextBuilder(this.intlOptions).build(caseContext, {
year: "numeric",
month: "long",
day: "numeric",
hour: "2-digit",
minute: "2-digit"
});
const sightingDate = caseContext.time.date;
const countStr = ObjectUtil.keyFromValue(UrecatHttpDatasource.wordToCount, witnesses.length);
const title = `${timeStr}, ${placeName}, ${departmentOrState}, ${country}, ${countStr} ${MessageUtils.pluralWord(witnesses.length, "personne")}`.toUpperCase();
const id = url.pathname.substring(this.searchPath.length + 2);
return {
id,
time: sightingDate,
url: url.href,
title,
basicInfo: { base: { sightingDate, location: { placeName, country, departmentOrState }, witnesses } }
};
}
}
UrecatHttpDatasource.urlDateFormat = /(\d\d\d\d)(?:-(\d\d)(?:-(\d\d))?)?/;
UrecatHttpDatasource.wordToCount = {
"une": 1,
"un": 1,
"deux": 2,
"trois": 3,
"quatre": 4,
"cinq": 5,
"six": 6,
"sept": 7
};