UNPKG

@rr0/cms

Version:

RR0 Content Management System (CMS)

134 lines (133 loc) 5.27 kB
import { UfoSearchDatasource } from "./UfoSearchDatasource.js"; import { UrlUtil } from "../../../util/index.js"; import { JSDOM } from "jsdom"; import { HttpSource } from "../HttpSource.js"; import { By } from "selenium-webdriver"; import assert from "assert"; import { UfoSearchCaseType } from "./UfoSearchCase.js"; export class UfoSearchHttpDatasource extends UfoSearchDatasource { constructor(baseUrl, searchPath) { super(); this.baseUrl = baseUrl; this.searchPath = searchPath; this.http = new HttpSource(); } queryUrl(year, month, day) { const queryParams = {}; if (year) { queryParams.ey = queryParams.sy = year; } if (month) { queryParams.em = queryParams.sm = month; } if (day) { queryParams.sd = queryParams.ed = day; } const queryParamsStr = UrlUtil.objToQueryParams(queryParams); const searchUrl = new URL(this.searchPath, this.baseUrl); searchUrl.search = queryParamsStr; return searchUrl.href; } async readCases(context) { const day = context.time.getDayOfMonth(); const month = context.time.getMonth(); const year = context.time.getYear(); const queryUrl = this.queryUrl(year, month, day); const driver = await this.http.getDriver(); try { await driver.get(queryUrl); const resultSelector = "#output hr + p"; await driver.findElements(By.css(resultSelector)); const page = await driver.getPageSource(); const doc = new JSDOM(page).window.document.documentElement; const rowEls = doc.querySelectorAll(resultSelector); const rows = Array.from(rowEls); const cases = []; for (const row of rows) { cases.push(await this.getFromRow(context, row)); } return cases; } finally { await this.http.close(); } } setDate(time, dateStr) { const dateFields = /(Late\s+)?(?:(\d{1,2})\/)?(?:(\d{1,2})\/)?(\d+)('s)?(\s+\(approximate\))?\s+#(\d+)/.exec(dateStr); assert.ok(dateFields, `Could not parse date "${dateStr}"`); const yearStr = dateFields[4]; time.setYear(yearStr ? parseInt(yearStr, 10) : undefined); const monthStr = dateFields[2]; time.setMonth(monthStr ? parseInt(monthStr, 10) : undefined); const dayOfMonthStr = dateFields[3]; time.setDayOfMonth(dayOfMonthStr ? parseInt(dayOfMonthStr, 10) : undefined); const approximate = Boolean(dateFields[6]); time.approximate = approximate; } async getFromRow(context, row) { const fieldsHeadings = Array.from(row.querySelectorAll("strong")); const dateLabel = fieldsHeadings.find(heading => heading.textContent.indexOf("Date") >= 0); const itemContext = context.clone(); const time = itemContext.time.reset(); let url; let caseNumber; if (dateLabel) { const dateLink = dateLabel.nextElementSibling; url = new URL(dateLink.href, this.baseUrl); caseNumber = url.hash.substring(1); this.setDate(time, dateLink.textContent); } const timeLabel = fieldsHeadings.find(heading => heading.textContent.indexOf("Time") >= 0); if (timeLabel) { this.setTime(time, timeLabel); } const locationLabel = fieldsHeadings.find(heading => heading.textContent.indexOf("Location") >= 0); let location; if (locationLabel) { location = locationLabel.nextSibling.textContent.trim(); } const descriptionLabel = fieldsHeadings.find(heading => heading.textContent.indexOf("Description") >= 0); let desc; if (descriptionLabel) { desc = descriptionLabel.nextSibling.textContent.trim(); } const typeLabel = fieldsHeadings.find(heading => heading.textContent.indexOf("Type") >= 0); let type; if (typeLabel) { type = typeLabel.nextSibling.textContent.trim(); } let attributes; let extraData; let href = url.href; return { id: caseNumber, time: time.date, url: href, location, desc: type, key_vals: { url: href }, attributes, ref: "", search: "", source: "", source_id: "", type: UfoSearchCaseType.ufoSightings, extraData }; } setTime(dateTime, timeLabel) { const timeValue = timeLabel.nextSibling.textContent; const timeFields = /(~)?(\d{1,2}):(\d{1,2})(\?)?/.exec(timeValue); assert.ok(timeFields, `Could not parse time "${timeValue}"`); const approximateTime = Boolean(timeFields[0] || timeFields[4]); dateTime.date.hour.approximate = approximateTime; const hourStr = timeFields[1]; if (hourStr) { dateTime.setHour(parseInt(hourStr, 10)); } const minutesStr = timeFields[2]; if (minutesStr) { dateTime.setHour(parseInt(minutesStr, 10)); } } }