@openactive/rpde-validator
Version:
A library to walk and validate an RPDE feed
351 lines (315 loc) • 9.3 kB
JavaScript
const {
ValidationErrorCategory,
ValidationErrorSeverity,
ValidationError,
} = require('@openactive/data-model-validator');
const { URL } = require('url');
const FeedLog = require('./feed-log');
const RpdeNode = require('./rpde-node');
const Rules = require('./rules');
const LastPageHelper = require('./helpers/last-page-helper');
const UrlHelper = require('./helpers/url-helper');
const RpdeErrorType = require('./errors/rpde-error-type');
const { version } = require('../package.json');
class FeedChecker {
constructor(url, options = {}) {
this.url = url;
this.isLastPage = false;
this.pageIndex = 0;
this.log = new FeedLog(url);
this.lastTimestamp = null;
this.firstId = null;
this.lastId = null;
this.lastChangeNumber = null;
this.httpRules = [];
this.pageRules = [];
this.preLastPageRules = [];
this.lastPageRules = [];
this.logCallback = options.logCallback;
this.userAgent = options.userAgent || `RPDE_Validator/${version} (+https://validator.openactive.io/rpde)`;
this.pageLimit = 20;
this.requestDelayMs = 0;
this.timeoutMs = 10000;
const overrideOptions = [
'pageLimit',
'requestDelayMs',
'timeoutMs',
];
for (const param of overrideOptions) {
if (
typeof options[param] === 'number'
&& !Number.isNaN(options[param])
) {
this[param] = options[param];
}
}
for (let index = 0; index < Rules.http.length; index += 1) {
this.httpRules.push(new Rules.http[index]());
}
for (let index = 0; index < Rules.page.length; index += 1) {
this.pageRules.push(new Rules.page[index]());
}
for (let index = 0; index < Rules.preLastPage.length; index += 1) {
this.preLastPageRules.push(new Rules.preLastPage[index]());
}
for (let index = 0; index < Rules.lastPage.length; index += 1) {
this.lastPageRules.push(new Rules.lastPage[index]());
}
}
logMessage(msg, extra = {}) {
if (typeof this.logCallback === 'function') {
this.logCallback(
{
percentage: this.percentageComplete(),
verbosity: 1,
...extra,
message: msg,
},
);
}
}
percentageComplete() {
return (this.pageIndex / (this.pageLimit + 1)) * 100;
}
addError(err) {
this.log.addError(err);
}
walk(urlOverride) {
const url = urlOverride || this.url;
return this.load(url, false)
.then((json) => {
if (typeof json !== 'object' || json === null) {
return null;
}
const nextUrl = UrlHelper.deriveUrl(json.next, url);
this.logMessage(
`Next URL is ${nextUrl}`,
{
verbosity: 2,
},
);
if (
nextUrl === url
&& json.items instanceof Array
&& json.items.length === 0
) {
this.logMessage(
`${url} is the last page`,
{
verbosity: 3,
},
);
this.isLastPage = true;
}
const node = new RpdeNode(
url,
json,
this.log,
this.pageIndex,
this.isLastPage,
);
this.logMessage(`Applying page rules to ${url}`);
for (const rule of this.pageRules) {
this.logMessage(
`Applying rule ${rule.meta.name} to ${url}`,
{
verbosity: 2,
},
);
rule.validate(node);
}
this.lastTimestamp = UrlHelper.getParam('afterTimestamp', json.next, url);
this.lastChangeNumber = UrlHelper.getParam('afterChangeNumber', json.next, url);
this.lastId = UrlHelper.getParam('afterId', json.next, url);
if (this.firstId === null) {
this.firstId = this.lastId;
}
this.pageIndex += 1;
if (
nextUrl !== url
&& UrlHelper.isUrl(nextUrl)
&& this.pageIndex < this.pageLimit
) {
return new Promise(
(resolve) => {
if (this.requestDelayMs > 0) {
this.logMessage(
`Waiting for ${this.requestDelayMs}ms before next request`,
{
verbosity: 2,
},
);
}
setTimeout(resolve, this.requestDelayMs);
},
).then(() => this.walk(nextUrl));
}
const afterNode = new RpdeNode(
this.url,
json,
this.log,
this.pageIndex,
this.isLastPage,
);
for (const rule of this.pageRules) {
rule.after(afterNode);
}
return this.testLastPage();
});
}
testLastPage() {
const preNode = new RpdeNode(
this.url,
{
lastTimestamp: this.lastTimestamp,
lastId: this.lastId,
lastChangeNumber: this.lastChangeNumber,
},
this.log,
);
this.logMessage(`Applying pre last page rules to ${this.url}`);
for (const rule of this.preLastPageRules) {
this.logMessage(
`Applying rule ${rule.meta.name}`,
{
verbosity: 2,
},
);
rule.validate(preNode);
}
const currentUrl = new URL(this.url);
let lastPageUrl = `${currentUrl.origin}${currentUrl.pathname}?`;
let hasUrl = false;
if (this.lastTimestamp !== null) {
const value = LastPageHelper.calculateLastPageTimestamp(this.lastTimestamp);
if (value !== null) {
lastPageUrl = `${lastPageUrl}afterTimestamp=${value}`;
if (this.firstId !== null) {
lastPageUrl = `${lastPageUrl}&afterId=${this.firstId}`;
}
hasUrl = true;
}
} else if (this.lastChangeNumber !== null) {
const value = LastPageHelper.calculateLastPageChangeNumber();
if (value !== null) {
lastPageUrl = `${lastPageUrl}afterChangeNumber=${value}`;
hasUrl = true;
}
}
if (hasUrl) {
return this.load(lastPageUrl, true)
.then((json) => {
const lastPageNode = new RpdeNode(
lastPageUrl,
json,
this.log,
this.pageIndex,
true,
);
this.logMessage(`Applying last page rules to ${lastPageUrl}`);
for (const rule of this.lastPageRules) {
this.logMessage(
`Applying rule ${rule.meta.name} to ${lastPageUrl}`,
{
verbosity: 2,
},
);
rule.validate(lastPageNode);
}
if (typeof json === 'object' && typeof json.next !== 'undefined') {
const nextUrlRaw = UrlHelper.deriveUrl(json.next, lastPageUrl);
return this.load(nextUrlRaw, true)
.then((nextJson) => {
const nextNode = new RpdeNode(
nextUrlRaw,
nextJson,
this.log,
this.pageIndex,
true,
);
nextNode.previousNode = lastPageNode;
this.logMessage(`Applying last page rules to ${nextUrlRaw}`);
for (const rule of this.lastPageRules) {
this.logMessage(
`Applying rule ${rule.meta.name} to ${nextUrlRaw}`,
{
verbosity: 2,
},
);
rule.validate(nextNode);
}
});
}
return null;
});
}
return null;
}
load(url, isLastPage) {
this.logMessage(`Loading ${url}...`);
this.log.addPage(url);
const options = {
headers: {
'User-Agent': this.userAgent,
},
};
let res;
return UrlHelper.fetch(url, options, this.timeoutMs).then(
(response) => {
this.logMessage(`Loaded ${url}`);
res = response;
return res.text();
},
).then(
(body) => {
const node = new RpdeNode(
url,
{
contentType: res.headers.get('content-type'),
cacheControl: res.headers.get('cache-control'),
status: res.status,
body,
},
this.log,
undefined,
isLastPage,
);
this.logMessage(`Applying HTTP rules to ${url}`);
for (const rule of this.httpRules) {
this.logMessage(
`Applying rule ${rule.meta.name} to ${url}`,
{
verbosity: 2,
},
);
rule.validate(node);
}
let json;
try {
json = JSON.parse(body);
} catch (e) {
return null;
}
return json;
},
).catch(
(e) => {
if (e.name === 'TimeoutError') {
this.log.addPageError(
url,
new ValidationError(
{
category: ValidationErrorCategory.INTERNAL,
type: RpdeErrorType.HTTP_ERROR,
severity: ValidationErrorSeverity.FAILURE,
message: e.message,
},
),
);
}
return undefined;
},
);
}
}
module.exports = FeedChecker;