UNPKG

@skypilot/scraper

Version:
150 lines (149 loc) 4.71 kB
import type { Integer } from '@skypilot/common-types'; import type { ElementHandle } from 'playwright'; import type { State } from 'src/scraper/State'; import { SliceRange } from 'src/lib/array/sliceArray'; import type { Dict, Href } from './pseudotypes'; export interface BrowserOptions { delayInMs?: Integer; headless?: boolean; userAgent?: string; variableDelayInMs?: Integer; } export interface ClickOptions { button?: 'left' | 'right' | 'middle'; /** * defaults to 1. See [UIEvent.detail]. */ clickCount?: number; /** * Time to wait between `mousedown` and `mouseup` in milliseconds. Defaults to 0. */ delay?: number; /** * Whether to bypass the [actionability](https://playwright.dev/docs/actionability) checks. Defaults to `false`. */ force?: boolean; /** * Modifier keys to press. Ensures that only these modifiers are pressed during the operation, and then restores * current modifiers back. If not specified, currently pressed modifiers are used. */ modifiers?: Array<'Alt' | 'Control' | 'Meta' | 'Shift'>; /** * Actions that initiate navigations are waiting for these navigations to happen and for pages to start loading. You * can opt out of waiting via setting this flag. You would only need this option in the exceptional cases such as * navigating to inaccessible pages. Defaults to `false`. */ noWaitAfter?: boolean; /** * A point to use relative to the top-left corner of element padding box. If not specified, uses some visible point * of the element. */ position?: { x: number; y: number; }; /** * Maximum time in milliseconds, defaults to 30 seconds, pass `0` to disable timeout. The default value can be changed * by using the * [browserContext.setDefaultTimeout(timeout)](https://playwright.dev/docs/api/class-browsercontext#browsercontextsetdefaulttimeouttimeout) * or [page.setDefaultTimeout(timeout)](https://playwright.dev/docs/api/class-page#pagesetdefaulttimeouttimeout) methods. */ timeout?: number; throwOnWaitTimeout?: boolean; waitTimeoutInMs?: Integer; } export declare type FlexQueryDict = Record<Exclude<string, ''>, string | Query>; export interface NavOptions { addUrlToState?: boolean; newPage?: boolean; retryLimit?: Integer; state?: State; waitUntil?: 'load' | 'domcontentloaded' | 'networkidle'; validate?: (url: string) => { messages?: string[]; } | null | undefined; verbose?: boolean; } export declare type QueryDict = Record<Exclude<string, ''>, Query>; export interface QueryOptions { baseRef?: ElementHandle; state?: State; statePath?: string; transform?: (queryResults: any, scraperContext: ScraperContext) => any; updateState?: boolean; } export declare type QueryResult<Q extends Record<string, unknown> = Record<string, unknown>> = Record<keyof Q, string | string[]>; export interface RunOptions { baseRef?: ElementHandle; collectionName?: string; displayLog?: boolean; nth?: Integer; retryLimit?: Integer; state?: State; statePath?: string; verbose?: boolean; } export interface RunOnOptions { baseRef?: ElementHandle; collectionName?: string; state?: State; } export declare type ClickCommand = { action: 'click'; query: Query; options?: ClickOptions; }; export declare type FollowCommand = { action: 'follow'; query: Query; options?: NavOptions; }; export declare type GoToCommand = { action: 'goTo'; url: Href; options?: NavOptions; }; export declare type QueryCommand = { action: 'query'; queryDict: QueryDict; options?: QueryOptions; }; export declare type RunOnAllCommand = { action: 'runOnAll'; query: Query; commands: ScraperCommand[]; options?: RunOnOptions; }; export declare type SetCommand = { action: 'set'; state: Dict; }; export declare type WriteCommand = { action: 'write'; collectionName?: string; }; export declare type ScraperCommand = ClickCommand | FollowCommand | GoToCommand | QueryCommand | RunOnAllCommand | SetCommand | WriteCommand; export interface ScraperContext { browser?: { version?: string; }; page: { title?: string; url?: string; }; } export interface Query { baseRef?: ElementHandle; attr?: string; nth?: Integer; sel: string; limit?: Integer; scope?: 'one' | 'all'; slice?: SliceRange; transform?: (input: any, scraperContext: ScraperContext) => any; noTrim?: boolean; } export interface StateOptions { state?: State; statePath?: string; }