UNPKG

@rr0/cms

Version:

RR0 Content Management System (CMS)

147 lines (146 loc) 5.62 kB
import { HttpSource } from "../HttpSource.js"; import { UrlUtil } from "../../../util/index.js"; import { RR0Datasource } from "./RR0Datasource.js"; import { Level2Date as EdtfDate } from "@rr0/time"; import { NamedPlace } from "@rr0/place"; import { OrganizationPlace } from "../../../place/OrganizationPlace.js"; export class RR0HttpDatasource extends RR0Datasource { constructor(baseUrl, searchPath, cityService) { super(); this.baseUrl = baseUrl; this.searchPath = searchPath; this.cityService = cityService; this.http = new HttpSource(); } getFromRows(context, rows) { const cases = []; for (const row of rows) { if (row.hasChildNodes()) { cases.push(this.getFromRow(context, row)); } } return cases; } findRows(doc) { const rowEls = doc.querySelectorAll("ul.indexed li"); return Array.from(rowEls); } getFromRow(context, r) { const row = r.cloneNode(true); const caseLink = context.file.name; const url = new URL(caseLink, this.baseUrl); const timeEl = row.querySelector("time"); const itemContext = context.clone(); const itemTime = itemContext.time; if (timeEl) { url.hash = timeEl.dateTime; itemTime.updateFromStr(timeEl.dateTime); timeEl.remove(); } let place; const placeEl = row.querySelector(".plac"); if (placeEl) { place = this.getPlace(itemContext, placeEl); const toRemove = ["", " À ", " A ", ", "]; Array.from(row.childNodes).forEach(childNode => { if (childNode.nodeType === 3 && toRemove.includes(childNode.nodeValue)) { childNode.remove(); } }); } const sources = this.getSources(row, itemContext); const description = this.getDescription(row); const id = RR0HttpDatasource.id(itemTime.date, place); return { type: "event", eventType: "sighting", events: [], url: url.href, place, time: itemTime.date, description, sources, id }; } async readCases(context) { const queryUrl = this.queryUrl(context); const doc = await this.http.get(queryUrl); const rows = this.findRows(doc); return this.getFromRows(context, rows); } getSources(row, itemContext) { const sources = []; const sourceEls = row.querySelectorAll(".source-id"); for (const sourceEl of sourceEls) { const id = sourceEl.childNodes[0].textContent; const sourceContent = sourceEl.querySelector(".source-contents"); let title = this.getDescription(sourceContent); const authorEnd = title.indexOf(":"); const authors = title.substring(0, authorEnd).split("&").map(s => s.trim()); title = title.substring(authorEnd + 1).trim(); sourceEl.remove(); const pubItems = title.split(","); const timeStr = pubItems[pubItems.length - 1].trim(); let publisher; let time; try { time = EdtfDate.fromString(timeStr); } catch (e) { // console.warn("Could not parse source time", e) } if (time) { pubItems.pop(); } publisher = pubItems.splice(1, pubItems.length - 1).map(item => item.trim()).join(", ").trim(); const publication = { publisher, time }; title = pubItems[0]; const source = { events: [], title, id, authors, publication, previousSourceRefs: [] }; sources.push(source); } return sources; } getPlace(context, placeEl) { const placeStr = placeEl.textContent; const placeParsed = RR0HttpDatasource.placeRegex.exec(placeStr); let org; placeEl.remove(); if (placeParsed) { const parent = undefined; // TODO: Find region from placeParsed[2] org = this.cityService.find(context, placeParsed[1], parent); if (org) { return new OrganizationPlace(org); } else { context.debug(`Could not find place named "${placeParsed[1]}"`); } } return new NamedPlace(placeStr); } getDescription(el) { const notes = el.querySelectorAll(".note-id"); for (const note of notes) { const noteContents = note.querySelector(".note-contents"); note.replaceWith(` (${noteContents.textContent})`); } return el.textContent.trim().replaceAll("\n", "").replace(/\s{2,}/g, " ").replaceAll(" .", "."); } queryUrl(context) { const time = context.time; const day = time.getDayOfMonth(); const month = time.getMonth(); const year = time.getYear(); const searchUrl = new URL(this.searchPath, this.baseUrl); const sign = year < 0 ? "-" : ""; let timeStr = sign + String(Math.abs(year)).padStart(4, "0").split("").join("/"); if (month) { timeStr = UrlUtil.join(timeStr, String(month).padStart(2, "0")); if (day) { timeStr = UrlUtil.join(timeStr, String(day).padStart(2, "0")); } } searchUrl.pathname = UrlUtil.join(searchUrl.pathname, timeStr); return searchUrl; } }