@rr0/cms
Version:
RR0 Content Management System (CMS)
147 lines (146 loc) • 5.62 kB
JavaScript
import { HttpSource } from "../HttpSource.js";
import { UrlUtil } from "../../../util/index.js";
import { RR0Datasource } from "./RR0Datasource.js";
import { Level2Date as EdtfDate } from "@rr0/time";
import { NamedPlace } from "@rr0/place";
import { OrganizationPlace } from "../../../place/OrganizationPlace.js";
export class RR0HttpDatasource extends RR0Datasource {
constructor(baseUrl, searchPath, cityService) {
super();
this.baseUrl = baseUrl;
this.searchPath = searchPath;
this.cityService = cityService;
this.http = new HttpSource();
}
getFromRows(context, rows) {
const cases = [];
for (const row of rows) {
if (row.hasChildNodes()) {
cases.push(this.getFromRow(context, row));
}
}
return cases;
}
findRows(doc) {
const rowEls = doc.querySelectorAll("ul.indexed li");
return Array.from(rowEls);
}
getFromRow(context, r) {
const row = r.cloneNode(true);
const caseLink = context.file.name;
const url = new URL(caseLink, this.baseUrl);
const timeEl = row.querySelector("time");
const itemContext = context.clone();
const itemTime = itemContext.time;
if (timeEl) {
url.hash = timeEl.dateTime;
itemTime.updateFromStr(timeEl.dateTime);
timeEl.remove();
}
let place;
const placeEl = row.querySelector(".plac");
if (placeEl) {
place = this.getPlace(itemContext, placeEl);
const toRemove = ["", " À ", " A ", ", "];
Array.from(row.childNodes).forEach(childNode => {
if (childNode.nodeType === 3 && toRemove.includes(childNode.nodeValue)) {
childNode.remove();
}
});
}
const sources = this.getSources(row, itemContext);
const description = this.getDescription(row);
const id = RR0HttpDatasource.id(itemTime.date, place);
return {
type: "event",
eventType: "sighting",
events: [],
url: url.href,
place,
time: itemTime.date,
description,
sources,
id
};
}
async readCases(context) {
const queryUrl = this.queryUrl(context);
const doc = await this.http.get(queryUrl);
const rows = this.findRows(doc);
return this.getFromRows(context, rows);
}
getSources(row, itemContext) {
const sources = [];
const sourceEls = row.querySelectorAll(".source-id");
for (const sourceEl of sourceEls) {
const id = sourceEl.childNodes[0].textContent;
const sourceContent = sourceEl.querySelector(".source-contents");
let title = this.getDescription(sourceContent);
const authorEnd = title.indexOf(":");
const authors = title.substring(0, authorEnd).split("&").map(s => s.trim());
title = title.substring(authorEnd + 1).trim();
sourceEl.remove();
const pubItems = title.split(",");
const timeStr = pubItems[pubItems.length - 1].trim();
let publisher;
let time;
try {
time = EdtfDate.fromString(timeStr);
}
catch (e) {
// console.warn("Could not parse source time", e)
}
if (time) {
pubItems.pop();
}
publisher = pubItems.splice(1, pubItems.length - 1).map(item => item.trim()).join(", ").trim();
const publication = { publisher, time };
title = pubItems[0];
const source = { events: [], title, id, authors, publication, previousSourceRefs: [] };
sources.push(source);
}
return sources;
}
getPlace(context, placeEl) {
const placeStr = placeEl.textContent;
const placeParsed = RR0HttpDatasource.placeRegex.exec(placeStr);
let org;
placeEl.remove();
if (placeParsed) {
const parent = undefined; // TODO: Find region from placeParsed[2]
org = this.cityService.find(context, placeParsed[1], parent);
if (org) {
return new OrganizationPlace(org);
}
else {
context.debug(`Could not find place named "${placeParsed[1]}"`);
}
}
return new NamedPlace(placeStr);
}
getDescription(el) {
const notes = el.querySelectorAll(".note-id");
for (const note of notes) {
const noteContents = note.querySelector(".note-contents");
note.replaceWith(` (${noteContents.textContent})`);
}
return el.textContent.trim().replaceAll("\n", "").replace(/\s{2,}/g, " ").replaceAll(" .", ".");
}
queryUrl(context) {
const time = context.time;
const day = time.getDayOfMonth();
const month = time.getMonth();
const year = time.getYear();
const searchUrl = new URL(this.searchPath, this.baseUrl);
const sign = year < 0 ? "-" : "";
let timeStr = sign + String(Math.abs(year)).padStart(4, "0").split("").join("/");
if (month) {
timeStr = UrlUtil.join(timeStr, String(month).padStart(2, "0"));
if (day) {
timeStr = UrlUtil.join(timeStr, String(day).padStart(2, "0"));
}
}
searchUrl.pathname = UrlUtil.join(searchUrl.pathname, timeStr);
return searchUrl;
}
}