modelscrape
Version:
Cheerio-based scraping helper to scrape websites based on models/templates.
53 lines (40 loc) • 1.46 kB
JavaScript
const { ParamError, ModelError } = require("../errors");
function _hasProperty(object) {
return function(propName) {
if (!object.hasOwnProperty(propName))
throw Error(`Property {${propName} not found.}`);
};
}
const PARAM_URL_REGEX = /[(http(s)):\/\/(www\.)?a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)/;
const PARAM_REQ_PROPS = ["url", "queryObjects"];
function validateParam(param) {
PARAM_REQ_PROPS.forEach(_hasProperty(param));
if (!PARAM_URL_REGEX.test(param.url)) throw ParamError.invalidURL();
if (!Array.isArray(param.queryObjects))
throw ParamError.invalidQueryObjects();
return param;
}
const QUERY_REQ_PROPS = ["name", "endpoint", "collections"];
function validateQuery(query) {
QUERY_REQ_PROPS.forEach(_hasProperty(query));
if (!Array.isArray(query.collections))
throw ModelError.invalidProperty("collections");
return query;
}
function validatePageCollections(collections) {
if (!Array.isArray(collections))
throw ModelError.invalidPropertyType("collections", "Array");
return mapper => collections.map(mapper);
}
function validateEntityProps(props) {
if (!Array.isArray(props))
throw ModelError.invalidPropertyType("props", "Array");
return mapper => props.map(mapper);
}
module.exports = {
validateParam,
validateQuery,
validatePageCollections,
validateEntityProps
};
;