UNPKG

maxun-core

Version:

Core package for Maxun, responsible for data extraction

152 lines (151 loc) 6.66 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); const joi_1 = __importDefault(require("joi")); const logic_1 = require("./types/logic"); /** * Class for static processing the workflow files/objects. */ class Preprocessor { static validateWorkflow(workflow) { const regex = joi_1.default.object({ $regex: joi_1.default.string().required(), }); const whereSchema = joi_1.default.object({ url: [joi_1.default.string().uri(), regex], selectors: joi_1.default.array().items(joi_1.default.string()), cookies: joi_1.default.object({}).pattern(joi_1.default.string(), joi_1.default.string()), $after: [joi_1.default.string(), regex], $before: [joi_1.default.string(), regex], $and: joi_1.default.array().items(joi_1.default.link('#whereSchema')), $or: joi_1.default.array().items(joi_1.default.link('#whereSchema')), $not: joi_1.default.link('#whereSchema'), }).id('whereSchema'); const schema = joi_1.default.object({ meta: joi_1.default.object({ name: joi_1.default.string(), desc: joi_1.default.string(), }), workflow: joi_1.default.array().items(joi_1.default.object({ id: joi_1.default.string(), where: whereSchema.required(), what: joi_1.default.array().items({ action: joi_1.default.string().required(), args: joi_1.default.array().items(joi_1.default.any()), }).required(), })).required(), }); const { error } = schema.validate(workflow); return error; } /** * Extracts parameter names from the workflow. * @param {WorkflowFile} workflow The given workflow * @returns {String[]} List of parameters' names. */ static getParams(workflow) { const getParamsRecurse = (object) => { if (typeof object === 'object') { // Recursion base case if (object.$param) { return [object.$param]; } // Recursion general case return Object.values(object) .reduce((p, v) => [...p, ...getParamsRecurse(v)], []); } return []; }; return getParamsRecurse(workflow.workflow); } /** * List all the selectors used in the given workflow (only literal "selector" * field in WHERE clauses so far) */ // TODO : add recursive selector search (also in click/fill etc. events?) static extractSelectors(workflow) { /** * Given a Where condition, this function extracts * all the existing selectors from it (recursively). */ const selectorsFromCondition = (where) => { var _a; // the `selectors` field is either on the top level let out = (_a = where.selectors) !== null && _a !== void 0 ? _a : []; if (!Array.isArray(out)) { out = [out]; } // or nested in the "operator" array logic_1.operators.forEach((op) => { let condWhere = where[op]; if (condWhere) { condWhere = Array.isArray(condWhere) ? condWhere : [condWhere]; (condWhere).forEach((subWhere) => { out = [...out, ...selectorsFromCondition(subWhere)]; }); } }); return out; }; // Iterate through all the steps and extract the selectors from all of them. return workflow.reduce((p, step) => [ ...p, ...selectorsFromCondition(step.where).filter((x) => !p.includes(x)), ], []); } /** * Recursively crawl `object` and initializes params - replaces the `{$param : paramName}` objects * with the defined value. * @returns {Workflow} Copy of the given workflow, modified (the initial workflow is left untouched). */ static initWorkflow(workflow, params) { const paramNames = this.getParams({ workflow }); if (Object.keys(params !== null && params !== void 0 ? params : {}).sort().join(',') !== paramNames.sort().join(',')) { throw new Error(`Provided parameters do not match the workflow parameters provided: ${Object.keys(params !== null && params !== void 0 ? params : {}).sort().join(',')}, expected: ${paramNames.sort().join(',')} `); } /** * A recursive method for initializing special `{key: value}` syntax objects in the workflow. * @param object Workflow to initialize (or a part of it). * @param k key to look for ($regex, $param) * @param f function mutating the special `{}` syntax into * its true representation (RegExp...) * @returns Updated object */ const initSpecialRecurse = (object, k, f) => { if (!object || typeof object !== 'object') { return object; } const out = object; // for every key (child) of the object Object.keys(object).forEach((key) => { // if the field has only one key, which is `k` if (Object.keys(object[key]).length === 1 && object[key][k]) { // process the current special tag (init param, hydrate regex...) out[key] = f(object[key][k]); } else { initSpecialRecurse(object[key], k, f); } }); return out; }; // TODO: do better deep copy, this is hideous. let workflowCopy = JSON.parse(JSON.stringify(workflow)); if (params) { workflowCopy = initSpecialRecurse(workflowCopy, '$param', (paramName) => { if (params && params[paramName]) { return params[paramName]; } throw new SyntaxError(`Unspecified parameter found ${paramName}.`); }); } workflowCopy = initSpecialRecurse(workflowCopy, '$regex', (regex) => new RegExp(regex)); return workflowCopy; } } exports.default = Preprocessor;