UNPKG

maxun-core

Version:

Core package for Maxun, responsible for data extraction

107 lines (106 loc) 3.73 kB
/// <reference types="node" /> import { Page } from 'playwright'; import { EventEmitter } from 'events'; import { WorkflowFile, ParamType } from './types/workflow'; /** * Extending the Window interface for custom scraping functions. */ declare global { interface Window { scrape: (selector: string | null) => Record<string, string>[]; scrapeSchema: (schema: Record<string, { selector: string; tag: string; attribute: string; }>) => Record<string, any>; scrapeList: (config: { listSelector: string; fields: any; limit?: number; pagination: any; }) => Record<string, any>[]; scrapeListAuto: (listSelector: string) => { selector: string; innerText: string; }[]; scrollDown: (pages?: number) => void; scrollUp: (pages?: number) => void; } } /** * Defines optional intepreter options (passed in constructor) */ interface InterpreterOptions { mode?: string; maxRepeats: number; maxConcurrency: number; serializableCallback: (output: any) => (void | Promise<void>); binaryCallback: (output: any, mimeType: string) => (void | Promise<void>); debug: boolean; debugChannel: Partial<{ activeId: (id: number) => void; debugMessage: (msg: string) => void; setActionType: (type: string) => void; }>; } /** * Class for running the Smart Workflows. */ export default class Interpreter extends EventEmitter { private workflow; private initializedWorkflow; private options; private concurrency; private stopper; private log; private blocker; private cumulativeResults; constructor(workflow: WorkflowFile, options?: Partial<InterpreterOptions>); private applyAdBlocker; private disableAdBlocker; private getSelectors; /** * Returns the context object from given Page and the current workflow.\ * \ * `workflow` is used for selector extraction - function searches for used selectors to * look for later in the page's context. * @param page Playwright Page object * @param workflow Current **initialized** workflow (array of where-what pairs). * @returns {PageState} State of the current page. */ private getState; /** * Tests if the given action is applicable with the given context. * @param where Tested *where* condition * @param context Current browser context. * @returns True if `where` is applicable in the given context, false otherwise */ private applicable; /** * Given a Playwright's page object and a "declarative" list of actions, this function * calls all mentioned functions on the Page object.\ * \ * Manipulates the iterator indexes (experimental feature, likely to be removed in * the following versions of maxun-core) * @param page Playwright Page object * @param steps Array of actions. */ private carryOutSteps; private handlePagination; private getMatchingActionId; private removeShadowSelectors; private removeSpecialSelectors; private runLoop; private ensureScriptsLoaded; /** * Spawns a browser context and runs given workflow. * \ * Resolves after the playback is finished. * @param {Page} [page] Page to run the workflow on. * @param {ParamType} params Workflow specific, set of parameters * for the `{$param: nameofparam}` fields. */ run(page: Page, params?: ParamType): Promise<void>; stop(): Promise<void>; } export {};