@rr0/cms
Version:
RR0 Content Management System (CMS)
134 lines (133 loc) • 5.27 kB
JavaScript
import { UfoSearchDatasource } from "./UfoSearchDatasource.js";
import { UrlUtil } from "../../../util/index.js";
import { JSDOM } from "jsdom";
import { HttpSource } from "../HttpSource.js";
import { By } from "selenium-webdriver";
import assert from "assert";
import { UfoSearchCaseType } from "./UfoSearchCase.js";
export class UfoSearchHttpDatasource extends UfoSearchDatasource {
constructor(baseUrl, searchPath) {
super();
this.baseUrl = baseUrl;
this.searchPath = searchPath;
this.http = new HttpSource();
}
queryUrl(year, month, day) {
const queryParams = {};
if (year) {
queryParams.ey = queryParams.sy = year;
}
if (month) {
queryParams.em = queryParams.sm = month;
}
if (day) {
queryParams.sd = queryParams.ed = day;
}
const queryParamsStr = UrlUtil.objToQueryParams(queryParams);
const searchUrl = new URL(this.searchPath, this.baseUrl);
searchUrl.search = queryParamsStr;
return searchUrl.href;
}
async readCases(context) {
const day = context.time.getDayOfMonth();
const month = context.time.getMonth();
const year = context.time.getYear();
const queryUrl = this.queryUrl(year, month, day);
const driver = await this.http.getDriver();
try {
await driver.get(queryUrl);
const resultSelector = "#output hr + p";
await driver.findElements(By.css(resultSelector));
const page = await driver.getPageSource();
const doc = new JSDOM(page).window.document.documentElement;
const rowEls = doc.querySelectorAll(resultSelector);
const rows = Array.from(rowEls);
const cases = [];
for (const row of rows) {
cases.push(await this.getFromRow(context, row));
}
return cases;
}
finally {
await this.http.close();
}
}
setDate(time, dateStr) {
const dateFields = /(Late\s+)?(?:(\d{1,2})\/)?(?:(\d{1,2})\/)?(\d+)('s)?(\s+\(approximate\))?\s+#(\d+)/.exec(dateStr);
assert.ok(dateFields, `Could not parse date "${dateStr}"`);
const yearStr = dateFields[4];
time.setYear(yearStr ? parseInt(yearStr, 10) : undefined);
const monthStr = dateFields[2];
time.setMonth(monthStr ? parseInt(monthStr, 10) : undefined);
const dayOfMonthStr = dateFields[3];
time.setDayOfMonth(dayOfMonthStr ? parseInt(dayOfMonthStr, 10) : undefined);
const approximate = Boolean(dateFields[6]);
time.approximate = approximate;
}
async getFromRow(context, row) {
const fieldsHeadings = Array.from(row.querySelectorAll("strong"));
const dateLabel = fieldsHeadings.find(heading => heading.textContent.indexOf("Date") >= 0);
const itemContext = context.clone();
const time = itemContext.time.reset();
let url;
let caseNumber;
if (dateLabel) {
const dateLink = dateLabel.nextElementSibling;
url = new URL(dateLink.href, this.baseUrl);
caseNumber = url.hash.substring(1);
this.setDate(time, dateLink.textContent);
}
const timeLabel = fieldsHeadings.find(heading => heading.textContent.indexOf("Time") >= 0);
if (timeLabel) {
this.setTime(time, timeLabel);
}
const locationLabel = fieldsHeadings.find(heading => heading.textContent.indexOf("Location") >= 0);
let location;
if (locationLabel) {
location = locationLabel.nextSibling.textContent.trim();
}
const descriptionLabel = fieldsHeadings.find(heading => heading.textContent.indexOf("Description") >= 0);
let desc;
if (descriptionLabel) {
desc = descriptionLabel.nextSibling.textContent.trim();
}
const typeLabel = fieldsHeadings.find(heading => heading.textContent.indexOf("Type") >= 0);
let type;
if (typeLabel) {
type = typeLabel.nextSibling.textContent.trim();
}
let attributes;
let extraData;
let href = url.href;
return {
id: caseNumber,
time: time.date,
url: href,
location,
desc: type,
key_vals: { url: href },
attributes,
ref: "",
search: "",
source: "",
source_id: "",
type: UfoSearchCaseType.ufoSightings,
extraData
};
}
setTime(dateTime, timeLabel) {
const timeValue = timeLabel.nextSibling.textContent;
const timeFields = /(~)?(\d{1,2}):(\d{1,2})(\?)?/.exec(timeValue);
assert.ok(timeFields, `Could not parse time "${timeValue}"`);
const approximateTime = Boolean(timeFields[0] || timeFields[4]);
dateTime.date.hour.approximate = approximateTime;
const hourStr = timeFields[1];
if (hourStr) {
dateTime.setHour(parseInt(hourStr, 10));
}
const minutesStr = timeFields[2];
if (minutesStr) {
dateTime.setHour(parseInt(minutesStr, 10));
}
}
}