maxun-core
Version:
Core package for Maxun, responsible for data extraction
152 lines (151 loc) • 6.66 kB
JavaScript
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const joi_1 = __importDefault(require("joi"));
const logic_1 = require("./types/logic");
/**
* Class for static processing the workflow files/objects.
*/
class Preprocessor {
static validateWorkflow(workflow) {
const regex = joi_1.default.object({
$regex: joi_1.default.string().required(),
});
const whereSchema = joi_1.default.object({
url: [joi_1.default.string().uri(), regex],
selectors: joi_1.default.array().items(joi_1.default.string()),
cookies: joi_1.default.object({}).pattern(joi_1.default.string(), joi_1.default.string()),
$after: [joi_1.default.string(), regex],
$before: [joi_1.default.string(), regex],
$and: joi_1.default.array().items(joi_1.default.link('#whereSchema')),
$or: joi_1.default.array().items(joi_1.default.link('#whereSchema')),
$not: joi_1.default.link('#whereSchema'),
}).id('whereSchema');
const schema = joi_1.default.object({
meta: joi_1.default.object({
name: joi_1.default.string(),
desc: joi_1.default.string(),
}),
workflow: joi_1.default.array().items(joi_1.default.object({
id: joi_1.default.string(),
where: whereSchema.required(),
what: joi_1.default.array().items({
action: joi_1.default.string().required(),
args: joi_1.default.array().items(joi_1.default.any()),
}).required(),
})).required(),
});
const { error } = schema.validate(workflow);
return error;
}
/**
* Extracts parameter names from the workflow.
* @param {WorkflowFile} workflow The given workflow
* @returns {String[]} List of parameters' names.
*/
static getParams(workflow) {
const getParamsRecurse = (object) => {
if (typeof object === 'object') {
// Recursion base case
if (object.$param) {
return [object.$param];
}
// Recursion general case
return Object.values(object)
.reduce((p, v) => [...p, ...getParamsRecurse(v)], []);
}
return [];
};
return getParamsRecurse(workflow.workflow);
}
/**
* List all the selectors used in the given workflow (only literal "selector"
* field in WHERE clauses so far)
*/
// TODO : add recursive selector search (also in click/fill etc. events?)
static extractSelectors(workflow) {
/**
* Given a Where condition, this function extracts
* all the existing selectors from it (recursively).
*/
const selectorsFromCondition = (where) => {
var _a;
// the `selectors` field is either on the top level
let out = (_a = where.selectors) !== null && _a !== void 0 ? _a : [];
if (!Array.isArray(out)) {
out = [out];
}
// or nested in the "operator" array
logic_1.operators.forEach((op) => {
let condWhere = where[op];
if (condWhere) {
condWhere = Array.isArray(condWhere) ? condWhere : [condWhere];
(condWhere).forEach((subWhere) => {
out = [...out, ...selectorsFromCondition(subWhere)];
});
}
});
return out;
};
// Iterate through all the steps and extract the selectors from all of them.
return workflow.reduce((p, step) => [
...p,
...selectorsFromCondition(step.where).filter((x) => !p.includes(x)),
], []);
}
/**
* Recursively crawl `object` and initializes params - replaces the `{$param : paramName}` objects
* with the defined value.
* @returns {Workflow} Copy of the given workflow, modified (the initial workflow is left untouched).
*/
static initWorkflow(workflow, params) {
const paramNames = this.getParams({ workflow });
if (Object.keys(params !== null && params !== void 0 ? params : {}).sort().join(',') !== paramNames.sort().join(',')) {
throw new Error(`Provided parameters do not match the workflow parameters
provided: ${Object.keys(params !== null && params !== void 0 ? params : {}).sort().join(',')},
expected: ${paramNames.sort().join(',')}
`);
}
/**
* A recursive method for initializing special `{key: value}` syntax objects in the workflow.
* @param object Workflow to initialize (or a part of it).
* @param k key to look for ($regex, $param)
* @param f function mutating the special `{}` syntax into
* its true representation (RegExp...)
* @returns Updated object
*/
const initSpecialRecurse = (object, k, f) => {
if (!object || typeof object !== 'object') {
return object;
}
const out = object;
// for every key (child) of the object
Object.keys(object).forEach((key) => {
// if the field has only one key, which is `k`
if (Object.keys(object[key]).length === 1 && object[key][k]) {
// process the current special tag (init param, hydrate regex...)
out[key] = f(object[key][k]);
}
else {
initSpecialRecurse(object[key], k, f);
}
});
return out;
};
// TODO: do better deep copy, this is hideous.
let workflowCopy = JSON.parse(JSON.stringify(workflow));
if (params) {
workflowCopy = initSpecialRecurse(workflowCopy, '$param', (paramName) => {
if (params && params[paramName]) {
return params[paramName];
}
throw new SyntaxError(`Unspecified parameter found ${paramName}.`);
});
}
workflowCopy = initSpecialRecurse(workflowCopy, '$regex', (regex) => new RegExp(regex));
return workflowCopy;
}
}
exports.default = Preprocessor;
;