UNPKG

@letsscrapedata/controller

Version:

Unified browser / HTML controller interfaces that support patchright, camoufox, playwright, puppeteer and cheerio

1,412 lines (1,406 loc) 65.3 kB
import EventEmitter from 'node:events'; import { Browser, BrowserContext, Locator, ElementHandle as ElementHandle$1, Frame, Page, Response, APIRequestContext, BrowserType, FrameLocator } from 'playwright'; import { Browser as Browser$1, BrowserContext as BrowserContext$1, ElementHandle, Frame as Frame$1, Page as Page$1, HTTPResponse } from 'puppeteer'; import { Browser as Browser$2, BrowserContext as BrowserContext$2, Locator as Locator$1, ElementHandle as ElementHandle$2, Frame as Frame$2, Page as Page$2, Response as Response$1, APIRequestContext as APIRequestContext$1, BrowserType as BrowserType$1 } from 'patchright'; import { Serializable } from 'node:child_process'; import { LogFunction } from '@letsscrapedata/utils'; import { Serializable as Serializable$1 } from 'child_process'; type BrowserControllerType = "playwright" | "puppeteer" | "patchright" | "camoufox"; type PlaywrightBrowserControllerType = "playwright" | "patchright" | "camoufox"; type AllBrowser = Browser | Browser$1 | Browser$2; type AllBrowserContext = BrowserContext | BrowserContext$1 | BrowserContext$2; type AllElement = Locator | ElementHandle | Locator$1; type AllElementHandle = ElementHandle$1 | ElementHandle | ElementHandle$2; type AllFrame = Frame | Frame$1 | Frame$2; type AllPage = Page | Page$1 | Page$2; type AllResponse = Response | HTTPResponse | Response$1; type AllApiRequestContext = APIRequestContext | APIRequestContext$1; type CheerioNode = cheerio.Cheerio; type BrowserCreationMethod = "launch" | "connect"; type BrowserContextCreationMethod = "launch" | "new"; type BrowserContextStatus = "free" | "busy" | "closed"; type LsdBrowserType = "chromium" | "firefox" | "webkit"; /** * events emittd by controllers, such as playwright */ declare enum ControllerEvent { BROWSERCONTEXT_CLOSE = "close", BROWSERCONTEXT_PAGE = "page",// PlaywrightBrowserControllerType BROWSERCONTEXT_TARGETCREATED = "targetcreated",// puppeteer BROWSER_DISCONNECTED = "disconnected", PAGE_CLOSE = "close", PAGE_POUP = "popup", PAGE_REQUEST = "request",// puppeteer (use route in playwright) PAGE_RESPONSE = "response" } /** * user could listen to them if needed ** eventData: LsdPage */ declare enum LsdPageEvent { PAGE_CLOSE = "pageClose", PAGE_POPUP = "pagePopup" } /** * listened by LsdBrowserContext; user does NOT need to listen to them ** eventData: LsdPage */ declare enum LsdBrowserContextEvent { PAGE_CLOSE = "pageClose" } /** * listened by LsdBrowser; user does NOT need to listen to them ** eventData: LsdBrowserContext */ declare enum LsdBrowserEvent { BROWSER_CONTEXT_CLOSE = "browserContextClose" } interface BrowserContextRequirements { browserControllerTypes: BrowserControllerType[]; browserTypes: LsdBrowserType[]; browserHeadlesses: boolean[]; browserIncognitos: boolean[]; } interface ProxyInController { proxyUrl: string; username?: string; password?: string; proxyId?: string; } interface ClientCertificate { origin: string; cert?: Buffer; certPath?: string; key?: Buffer; keyPath?: string; pfx?: Buffer; pfxPath?: string; passphrase?: string; } interface LsdApiContextOptions { proxy?: ProxyInController; /** * storageState */ stateData?: BrowserStateData; userAgent?: string; timeout?: number; ignoreHTTPSErrors?: boolean; extraHTTPHeaders?: Record<string, string>; httpCredentials?: { username: string; password: string; origin?: string; send?: 'always' | 'unauthorized'; }; clientCertificates?: ClientCertificate[]; } interface LsdFetchOptions { /** * @default "get" */ method?: "delete" | "get" | "post" | "put"; params?: Record<string, string | number | boolean>; headers?: Record<string, string>; data?: any; form?: Record<string, string | number | boolean>; timeout?: number; failOnStatusCode?: boolean; ignoreHTTPSErrors?: boolean; maxRedirects?: number; maxRetries?: number; } interface LsdApiResponse { headers: Record<string, string>; status: number; statusText: string; text: string; url: string; } interface LsdApiContext { fetch(url: string, options?: LsdFetchOptions): Promise<LsdApiResponse>; stateData(): Promise<BrowserStateData>; /** * destroyed LsdApiContext cannot be used again, or throw an exeception */ destroy(): Promise<boolean>; } interface PlaywrightBrowserTypes { chromium: BrowserType; firefox: BrowserType; webkit: BrowserType; } interface PatchrightBrowserTypes { chromium: BrowserType$1; firefox: BrowserType$1; webkit: BrowserType$1; } interface BrowserOptions { /** * Interval between closing free pages (seconds) if greater than 0 * @default 300 */ closeFreePagesIntervalSeconds?: number; /** * max browserContexts per browser * @default 10 */ maxBrowserContextsPerBrowser?: number; /** * max pages per browserContext * @default 20 */ maxPagesPerBrowserContext?: number; /** * pages, that are free more than this, will be closed * @default 900 */ maxPageFreeSeconds?: number; /** * @default true */ maxViewportOfNewPage?: boolean; /** * The proxy actually used by the connected browser; for efficient web scraping, please pass an accurate value * * this will used as default proxy when creating new browserContexts later * @default null */ proxy?: ProxyInController | null; /** * Maximum time in milliseconds to wait for the browser instance to start. Pass 0 to disable timeout. * * default 30_000 (30 seconds) */ timeout?: number; } /** * * puppeteer: launchServer is the same as launch * * playwright: use "launchServer" if it's necessary to call LsdBrowser.pid()/pidUsage() */ type BrowserLaunchMethod = "launch" | "launchServer"; interface LsdLaunchOptions extends BrowserOptions { args?: string[]; executablePath?: string; /** * @default true */ maxWindowSize?: boolean; /** * @default true */ headless?: boolean; /** * min number of browserContexts * @default 1 */ minBrowserContexts?: number; /** * @default false for puppeteer, true for playwright */ incognito?: boolean; /** * workaround for chromium issue on windows: https://github.com/microsoft/playwright/issues/17252 (same in puppeteer) * @default false */ proxyPerBrowserContext?: boolean; /** * userAgent of the new browserContext that is created during launching the browser, valid only in puppeteer: * * playwright does not create a browserContext during launching browser * * puppeteer creates a default/new browserContext during launching browser */ userAgent?: string; userDataDir?: string; /** * @default "launch" */ launchMethod?: BrowserLaunchMethod; /** * Calculates longitude, latitude, timezone, country, & locale based on the IP address. Pass the target IP address(string) to use, or true to find the IP address automatically. ** valid only in camfoufox: geoip ** please install Camoufox with the geoip extra * @default false */ geoip?: boolean | string; /** * Whether to inject scripts into the main world when prefixed with mw. ** valid only in camfoufox: main_world_eval * @default false */ mainWorldEval?: boolean; /** * List of Firefox addons to use. Must be paths to extracted addons. ** valid only in camfoufox: addons * @default [] */ addons?: string[]; } interface LsdConnectOptions extends BrowserOptions { /** * url that starts with "http://", such as "http://localhost:9222/" */ browserUrl: string; /** * executable path of connected browser, optional * @default "" that means unkown */ executablePath?: string; /** * whether the connected browser is headless * @default false */ headless?: boolean; /** * whether the connected browser is headless * @default false */ incognito?: boolean; /** * userAgent of the current browserContexts that were created before connecting to the browser, valid only in puppeteer: * * playwright: does not support page.setUserAgent * * puppeteer: supports page.setUserAgent */ userAgent?: string; } type LsdBrowserContextOptions = { proxy: ProxyInController | null; /** * @default browser.options.maxViewportOfNewPage */ maxViewportOfNewPage?: boolean; /** * userAgent of the browserContext: * * playwright: set when creating the new browserContext * * puppeteer: set when creating the new page in the browserContext */ userAgent?: string; }; type PageStatus = "free" | "busy" | "closed"; /** * newpage: open by browserContext.newPage() * popup: open by clicking etc, important * manual: open by manual operation * launch: open by puppeteer.launch * connect: opened pages before connecting to browser * other: unkown */ /** * * launch: open when creating browserContext(includes launching browser) * * connect: open before connected * * newpage: open by browserContext.newPage() * * popup: popup page * * manual: open by adding new page manually * * other: */ type PageOpenType = "newpage" | "popup" | "manual" | "launch" | "connect" | "other"; interface PageInfo { /** * browser index in all browsers, that starts from 1 * @default 0 */ browserIdx: number; /** * browserContext index in the same browser, that starts from 1 * @default 0 */ browserContextIdx: number; /** * page index in the same browserContext, that starts from 1 * @default 0 */ pageIdx: number; /** * how the page is opened * @default other */ openType: PageOpenType; /** * @default current unix time */ openTime: number; /** * page's status: free or busy * @default current unix time */ lastStatusUpdateTime: number; /** * taskId that are using this page * @default 0 */ taskId: number; /** * @default 0 */ relatedId: number; /** * customised page information */ misc: Record<string, string | number>; } interface UpdatablePageInfo { /** * page's status: free or busy * @default current unix time */ lastStatusUpdateTime?: number; /** * taskId that are using this page * @default 0 */ taskId?: number; /** * @default 0 */ relatedId?: number; /** * customised page information */ misc?: Record<string, string | number>; } interface PageExtInPuppeteer extends Page$1 { pageInfo?: PageInfo; } interface PageExtInPlaywright extends Page { pageInfo?: PageInfo; } interface PageExtInPatchright extends Page$2 { pageInfo?: PageInfo; } interface FrameAddScriptTagOptions { /** * URL of the script to be added. */ url?: string; /** * Path to a JavaScript file to be injected into the frame. * * @remarks * If `path` is a relative path, it is resolved relative to the current * working directory (`process.cwd()` in Node.js). */ path?: string; /** * JavaScript to be injected into the frame. */ content?: string; /** * Sets the `type` of the script. Use `module` in order to load an ES2015 module. */ type?: string; /** * Sets the `id` of the script. * * supported only in puppeteer */ id?: string; } type MouseClickType = "click" | "evaluate"; interface MouseClickOptions { /** * Which button will be pressed. * @default left */ button?: "left" | "right" | "middle"; /** * Number of clicks to perform. * * puppeteer: count (clickCount deprecated) * @default 1 */ clickCount?: 1 | 2 | 3; /** * Time to wait between mousedown and mouseup in milliseconds. * @default 0 */ delay?: number; /** * A point to use relative to the top-left corner of element padding box. If not specified, uses some visible point of the element. * * puppeteer: offset */ position?: { x: number; y: number; }; /** * @default [] * * puppeteer: not supported, ignored */ modifiers?: Array<"Alt" | "Control" | "Meta" | "Shift">; /** * @default click */ clickType?: MouseClickType; } interface PageMouseClickOptions { /** * Which button will be pressed. * @default left */ button?: "left" | "right" | "middle"; /** * Number of clicks to perform. * * puppeteer: count (clickCount deprecated) * @default 1 */ clickCount?: 1 | 2 | 3; /** * Time to wait between mousedown and mouseup in milliseconds. * @default 0 */ delay?: number; } interface SelectOptions { /** * Which attribute of select option to match */ type: "value" | "label" | "index"; /** * Matches by option.value */ values?: string[]; /** * Matches by the index, that starts from 0 */ indexes?: number[]; /** * Matches by option.label */ labels?: string[]; } /** * * playwright: "load" | "domcontentloaded" | "networkidle" | "commit", "networkidle0" | "networkidle2" => "networkidle"; * * puppeteer: "load" | "domcontentloaded" | "networkidle0" | "networkidle2", "networkidle" => "networkidle0", "commit" ignored; */ type NavigationWaitUntil = "load" | "domcontentloaded" | "networkidle" | "commit" | "networkidle0" | "networkidle2"; interface GotoOptions { referer?: string; timeout?: number; waitUntil?: NavigationWaitUntil; } /** * * src takes precedence over selector, at least one of them must be defined * * it is recommended to use srcPrefix because selector is valid only in puppeteer */ interface IframeOption { /** * * string: iframe.src starts with this string * * RegExp: iframe.src matches this RegExp */ src?: string | RegExp; /** * id of iframe */ id?: string; /** * CSS selector or XPath */ selector?: string; } type KeyInput = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | 'Power' | 'Eject' | 'Abort' | 'Help' | 'Backspace' | 'Tab' | 'Numpad5' | 'NumpadEnter' | 'Enter' | '\r' | '\n' | 'ShiftLeft' | 'ShiftRight' | 'ControlLeft' | 'ControlRight' | 'AltLeft' | 'AltRight' | 'Pause' | 'CapsLock' | 'Escape' | 'Convert' | 'NonConvert' | 'Space' | 'Numpad9' | 'PageUp' | 'Numpad3' | 'PageDown' | 'End' | 'Numpad1' | 'Home' | 'Numpad7' | 'ArrowLeft' | 'Numpad4' | 'Numpad8' | 'ArrowUp' | 'ArrowRight' | 'Numpad6' | 'Numpad2' | 'ArrowDown' | 'Select' | 'Open' | 'PrintScreen' | 'Insert' | 'Numpad0' | 'Delete' | 'NumpadDecimal' | 'Digit0' | 'Digit1' | 'Digit2' | 'Digit3' | 'Digit4' | 'Digit5' | 'Digit6' | 'Digit7' | 'Digit8' | 'Digit9' | 'KeyA' | 'KeyB' | 'KeyC' | 'KeyD' | 'KeyE' | 'KeyF' | 'KeyG' | 'KeyH' | 'KeyI' | 'KeyJ' | 'KeyK' | 'KeyL' | 'KeyM' | 'KeyN' | 'KeyO' | 'KeyP' | 'KeyQ' | 'KeyR' | 'KeyS' | 'KeyT' | 'KeyU' | 'KeyV' | 'KeyW' | 'KeyX' | 'KeyY' | 'KeyZ' | 'MetaLeft' | 'MetaRight' | 'ContextMenu' | 'NumpadMultiply' | 'NumpadAdd' | 'NumpadSubtract' | 'NumpadDivide' | 'F1' | 'F2' | 'F3' | 'F4' | 'F5' | 'F6' | 'F7' | 'F8' | 'F9' | 'F10' | 'F11' | 'F12' | 'F13' | 'F14' | 'F15' | 'F16' | 'F17' | 'F18' | 'F19' | 'F20' | 'F21' | 'F22' | 'F23' | 'F24' | 'NumLock' | 'ScrollLock' | 'AudioVolumeMute' | 'AudioVolumeDown' | 'AudioVolumeUp' | 'MediaTrackNext' | 'MediaTrackPrevious' | 'MediaStop' | 'MediaPlayPause' | 'Semicolon' | 'Equal' | 'NumpadEqual' | 'Comma' | 'Minus' | 'Period' | 'Slash' | 'Backquote' | 'BracketLeft' | 'Backslash' | 'BracketRight' | 'Quote' | 'AltGraph' | 'Props' | 'Cancel' | 'Clear' | 'Shift' | 'Control' | 'Alt' | 'Accept' | 'ModeChange' | ' ' | 'Print' | 'Execute' | '\u0000' | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' | 'y' | 'z' | 'Meta' | '*' | '+' | '-' | '/' | ';' | '=' | ',' | '.' | '`' | '[' | '\\' | ']' | "'" | 'Attn' | 'CrSel' | 'ExSel' | 'EraseEof' | 'Play' | 'ZoomOut' | ')' | '!' | '@' | '#' | '$' | '%' | '^' | '&' | '(' | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z' | ':' | '<' | '_' | '>' | '?' | '~' | '{' | '|' | '}' | '"' | 'SoftLeft' | 'SoftRight' | 'Camera' | 'Call' | 'EndCall' | 'VolumeDown' | 'VolumeUp'; interface KeyPressOptions { /** * Time to wait between keydown and keyup in milliseconds. Defaults to 0. */ delay?: number; } interface InputOptions { /** * Time to wait between mousedown and mouseup in milliseconds. * * playwright: not supported, ignored * @default 0 */ delay?: number; /** * whether to replace the current value * @default false */ replace?: boolean; /** * whether to press Enter after input the value * @default false */ enter?: boolean; } interface LsdElement { /** * * @return the value of a specified attribute on the element * @param attributeName */ attribute(attributeName: string): Promise<string>; /** * @returns the attribute names of the element */ attributeNames(): Promise<string[]>; /** * This method returns the bounding box of the element (relative to the main frame), or null if the element is not part of the layout (example: display: none). */ boundingBox(): Promise<{ x: number; y: number; width: number; height: number; } | null>; dataset(): Promise<Record<string, string>>; /** * In order to be compatible with various browser controllers, if you need to use this function, please follow the following conventions: * * If the element is in an iframe, use the descendant type when locating the iframe, not the child type! * * * Reason: args of <page/frame | locator>.evaluate are different in Playwright * * When there is only one parameter: element.evaluate(arg=>statements, val) * * When there are one or more parameters: element.evaluate(([arg1, arg2]=>statements, [val1, val2]) * @param func * @param args * @param isolated default true; whether to run in isolated context; only valid for patchwright and camoufox */ evaluate(func: Function | string, args?: any[], isolated?: boolean): Promise<Serializable>; /** * @returns the first element matching the given CSS selector or XPath * @param selectorOrXpath CSS selector or XPath; if this parameter is an array, each selectorOrXpath in the array will be tried until elements are selected * @param iframeOptions default [], options to select decendant frame * @param absolute valid only if iframeOptions.length===0 */ findElement(selectorOrXpath: string | string[], iframeOptions?: IframeOption[], absolute?: boolean): Promise<LsdElement | null>; /** * @returns elements matching the given CSS selector or XPath * @param selectorOrXpath CSS selector or XPath; if this parameter is an array, each selectorOrXpath in the array will be tried until elements are selected * @param iframeOptions default [], options to select decendant frame * @param absolute valid only if iframeOptions.length===0 */ findElements(selectorOrXpath: string | string[], iframeOptions?: IframeOption[], absolute?: boolean): Promise<LsdElement[]>; /** * @returns whether the element has the specified attribute or not * @param attributeName */ hasAttribute(attributeName: string): Promise<boolean>; /** * @returns the HTML or XML markup contained within the element */ innerHtml(): Promise<string>; /** * @returns innerText of element * @param onlyChild default false, whether to include only the text of the child text nodes */ innerText(onlyChild?: boolean): Promise<string>; /** * @returns the serialized HTML fragment describing the element including its descendants */ outerHtml(): Promise<string>; textContent(): Promise<string>; /** * Click this element. * @param options default {button: "left", count: 1, delay: 0, modifies: []} */ click(options?: MouseClickOptions): Promise<boolean>; focus(): Promise<boolean>; hover(): Promise<boolean>; /** * * playwright: fill * * puppeteer: type */ input(value: string, options?: InputOptions): Promise<boolean>; press(key: KeyInput, options: KeyPressOptions): Promise<boolean>; screenshot(options?: ScreenshotOptions): Promise<Buffer>; scrollIntoView(): Promise<boolean>; select(options: SelectOptions): Promise<boolean>; setAttribute(attributeName: string, newValue: string): Promise<boolean>; _origElement(): AllElement; } interface ViewportSize { height: number; width: number; } interface CookieItem { name: string; value: string; domain: string; path: string; expires: number; httpOnly: boolean; secure: boolean; sameSite: 'Strict' | 'Lax' | 'None'; } interface LocalStorageItem { name: string; value: string; } interface LocalStorageOrigin { origin: string; localStorage: LocalStorageItem[]; } interface BrowserStateData { cookies: CookieItem[]; localStorage: LocalStorageOrigin[]; } /** * copy from playwright export type URLMatch = string | RegExp | ((url: URL) => boolean); export type RouteHandlerCallback = (route: RouteInPlaywright, request: RequestInPlayWright) => Promise<any> | void; export type RequestHandlerCallback = (request: RequestInPlayWright) => Promise<any> | void; */ type RequestResourceType = "document" | "stylesheet" | "image" | "media" | "font" | "script" | "texttrack" | "xhr" | "fetch" | "eventsource" | "websocket" | "manifest" | "other"; type RequestMethod = "DELETE" | "GET" | "POST" | "PUT" | "CONNECT" | "HEAD" | "OPTIONS" | "PATCH" | "TRACE"; interface RequestMatch { methods?: RequestMethod[]; postData?: RegExp; resourceTypes?: RequestResourceType[]; url?: RegExp; } type RequestInterceptionAction = "abort" | "fulfill"; interface RequestInterceptionOption { /** * Requests that match all conditions will be intercepted; all requests will be intercepted if no condition. */ requestMatch?: RequestMatch; /** * * abort: Aborts the HTTP request * * fulfill: Fulfills a request with the value of "fulfill" */ action: RequestInterceptionAction; /** * required when action is "fulfill" * @default `<html><body><h1>${request.url()}</h1></body></html>` */ fulfill?: string; } interface ResponseMatch { /** * min length of response.text() */ minLength?: number; /** * max length of response.text() */ maxLength?: number; } type ResponseHandlerOptions = Record<string, any>; interface ResponsePageData { pageUrl: string; cookies: string; } type ResponseHandler = (response: AllResponse, options: ResponseHandlerOptions, pageData: ResponsePageData) => Promise<void> | void; interface ResponseInterceptionItem { /** * page.url() */ pageUrl: string; /** * request.method() */ requestMethod: RequestMethod; /** * request.url() */ requestUrl: string; /** * request.postData() */ requestData: string; /** * response.text() */ responseData: string; } interface ResponseInterceptionOption { requestMatch?: RequestMatch; responseMatch?: ResponseMatch; /** * the ResponseInterceptionData will be pushed into this array if Array.isArray(cacheArray) */ responseItems?: ResponseInterceptionItem[]; /** * handler will be called if handler is a function */ handler?: ResponseHandler; /** * valid only if handler is a function * @default {} */ handlerOptions?: ResponseHandlerOptions; } interface PDFMargin { top?: string | number; bottom?: string | number; left?: string | number; right?: string | number; } /** * @public */ type LowerCasePaperFormat = 'letter' | 'legal' | 'tabloid' | 'ledger' | 'a0' | 'a1' | 'a2' | 'a3' | 'a4' | 'a5' | 'a6'; /** * All the valid paper format types when printing a PDF. * * @remarks * * The sizes of each format are as follows: * * - `Letter`: 8.5in x 11in * * - `Legal`: 8.5in x 14in * * - `Tabloid`: 11in x 17in * * - `Ledger`: 17in x 11in * * - `A0`: 33.1in x 46.8in * * - `A1`: 23.4in x 33.1in * * - `A2`: 16.54in x 23.4in * * - `A3`: 11.7in x 16.54in * * - `A4`: 8.27in x 11.7in * * - `A5`: 5.83in x 8.27in * * - `A6`: 4.13in x 5.83in * * @public */ type PaperFormat = Uppercase<LowerCasePaperFormat> | Capitalize<LowerCasePaperFormat> | LowerCasePaperFormat; /** * Valid options to configure PDF generation via {@link Page.pdf}. * @public */ interface PDFOptions { /** * Whether to show the header and footer. * @defaultValue `false` */ displayHeaderFooter?: boolean; /** * HTML template for the print footer. Has the same constraints and support * for special classes as {@link PDFOptions | PDFOptions.headerTemplate}. */ footerTemplate?: string; /** * @remarks * If set, this takes priority over the `width` and `height` options. * @defaultValue `letter`. */ format?: PaperFormat; /** * HTML template for the print header. Should be valid HTML with the following * classes used to inject values into them: * * - `date` formatted print date * * - `title` document title * * - `url` document location * * - `pageNumber` current page number * * - `totalPages` total pages in the document */ headerTemplate?: string; /** * Sets the height of paper. You can pass in a number or a string with a unit. */ height?: string | number; /** * Whether to print in landscape orientation. * @defaultValue `false` */ landscape?: boolean; /** * Set the PDF margins. * @defaultValue `undefined` no margins are set. */ margin?: PDFMargin; /** * Hides default white background and allows generating pdfs with transparency. * @defaultValue `false` */ /** * Generate document outline. * * @remarks * If this is enabled the PDF will also be tagged (accessible) * Currently only works in old Headless (headless = 'shell') * crbug/840455#c47 * * @defaultValue `false` * @experimental */ outline?: boolean; /** * Paper ranges to print, e.g. `1-5, 8, 11-13`. * @defaultValue The empty string, which means all pages are printed. */ pageRanges?: string; /** * The path to save the file to. * * @remarks * * If the path is relative, it's resolved relative to the current working directory. * * @defaultValue `undefined`, which means the PDF will not be written to disk. */ path?: string; /** * Give any CSS `@page` size declared in the page priority over what is * declared in the `width` or `height` or `format` option. * @defaultValue `false`, which will scale the content to fit the paper size. */ preferCSSPageSize?: boolean; /** * Set to `true` to print background graphics. * @defaultValue `false` */ printBackground?: boolean; /** * Scales the rendering of the web page. Amount must be between `0.1` and `2`. * @defaultValue `1` */ scale?: number; /** * Generate tagged (accessible) PDF. * @defaultValue `true` * @experimental */ tagged?: boolean; /** * Timeout in milliseconds. Pass `0` to disable timeout. * @defaultValue `30_000` */ /** * Sets the width of paper. You can pass in a number or a string with a unit. */ width?: string | number; } /** * @public * not supported by puppeteer: animations, caret, mask, maskColor, scale, style, timeout */ interface ScreenshotOptions { /** * Capture the screenshot beyond the viewport. * * @defaultValue `false` if there is no `clip`. `true` otherwise. */ /** * Specifies the region of the page to clip. */ clip?: { /** * x-coordinate of top-left corner of clip area */ x: number; /** * y-coordinate of top-left corner of clip area */ y: number; /** * the width of the element in pixels. */ width: number; /** * the height of the element in pixels. */ height: number; }; /** * Encoding of the image. * * @defaultValue `'binary'` */ /** * Capture the screenshot from the surface, rather than the view. * * @defaultValue `true` */ /** * When `true`, takes a screenshot of the full page. * * @defaultValue `false` */ fullPage?: boolean; /** * Hides default white background and allows capturing screenshots with transparency. * * @defaultValue `false` */ omitBackground?: boolean; /** * @defaultValue `false` */ /** * Quality of the image, between 0-100. Not applicable to `png` images. */ quality?: number; /** * The file path to save the image to. The screenshot type will be inferred * from file extension. If path is a relative path, then it is resolved * relative to current working directory. If no path is provided, the image * won't be saved to the disk. */ path?: string; /** * @defaultValue `'png' */ type?: 'png' | 'jpeg'; } type WaitElementState = "attached" | "detached" | "hidden" | "visible"; interface WaitElementOptions { /** * @default 30_000 ms */ timeout?: number; /** * @default "visible" */ state?: WaitElementState; } interface WaitNavigationOptions { /** * only supported in playwright by now * @default "" */ url?: string | RegExp; /** * @default 30_000 ms */ timeout?: number; /** * @default "visible" */ waitUntil?: NavigationWaitUntil; } interface LsdPage extends EventEmitter { /** * Adds a script which would be evaluated in one of the following scenarios: * * Whenever the page is navigated. * * Whenever the child frame is attached or navigated. In this case, the script is evaluated in the context of the newly attached frame. * @param scriptOrFunc * @param arg */ addPreloadScript(scriptOrFunc: string | Function, arg?: Serializable): Promise<boolean>; /** * Adds a `<script>` tag into the page with the desired URL or content. * @param options */ addScriptTag(options: FrameAddScriptTagOptions): Promise<AllElementHandle>; /** * Get the LsdApiContext associated with this page's LsdBrowserContext * * only vaild in playwright */ apiContext(): LsdApiContext; bringToFront(): Promise<boolean>; browserContext(): LsdBrowserContext; /** * clear the cookies of the current page(url) * * Prerequisites: page must has a valid url, such as by calling goto(url) */ clearCookies(): Promise<boolean>; /** * clear the localStorage of the current page(url) * * Prerequisites: page must has a valid url, such as by calling goto(url) */ clearLocalStorage(): Promise<boolean>; /** * Clear all request interceptions on the page */ clearRequestInterceptions(): Promise<boolean>; /** * Clear all response interceptions on the page */ clearResponseInterceptions(): Promise<boolean>; /** * clear the stateData of the current page(url): * * stateData: cookies, localStorage, indexedDB * * Prerequisites: page must has a valid url, such as by calling goto(url) */ clearStateData(): Promise<boolean>; /** * Only free page can be closed! */ close(): Promise<boolean>; /** * Should the page be closed when it is freed? * * Sometimes, in order to avoid being used again, you need to close the page. * * valid only in browser page * @default false, please call setCloseWhenFree to change it */ closeWhenFree(): boolean; /** * Get the full HTML content of the page or decendant frame * @param iframeOptions default [], selectors of decendant frames */ content(iframeOptions?: IframeOption[]): Promise<string>; cookies(): Promise<CookieItem[]>; /** * In order to be compatible with various browser controllers, if you need to use this function, please follow the following conventions: * * When there is only one parameter: element.evaluate(arg=>statements, val) * * When there are one or more parameters: element.evaluate(([arg1, arg2]=>statements, [val1, val2]) * @param func * @param args * @param isolated default true; whether to run in isolated context; only valid for patchwright and camoufox */ evaluate(func: Function | string, args?: any[]): Promise<any>; /** * The method adds a function called `name` on the page's `window` object. * When called, the function executes `callbackFunction` in node.js and * returns a `Promise` which resolves to the return value of `callbackFunction`. * * Reminder: It is not recommended to use this function because it is easy to be detected !!! * @param name Name of the function on the window object * @param callbackFunction Callback function which will be called in node.js context */ exposeFunction(name: string, callbackFunction: Function): Promise<void>; /** * @returns the first element matching the given CSS selector or XPath * @param selectorOrXpath CSS selector or XPath; if this parameter is an array, each selectorOrXpath in the array will be tried until elements are selected * @param iframeOptions default [], options to select decendant frame * @param iframeType default "child", "descendant" is valid only if selectorOrXpath is string , iframeOptions.length is 1 and iframeOptions[0].src is string or RegExp * @example * * findElement("body", [{src: iframe.src}], "descendant"): to get the body element of a descendant iframe with src */ findElement(selectorOrXpath: string | string[], iframeOptions?: IframeOption[], iframeType?: "child" | "descendant"): Promise<LsdElement | null>; /** * @returns elements matching the given CSS selector or XPath * @param selectorOrXpath CSS selector or XPath; if this parameter is an array, each selectorOrXpath in the array will be tried until elements are selected * @param iframeOptions default [], options to select decendant frame */ findElements(selectorOrXpath: string | string[], iframeOptions?: IframeOption[]): Promise<LsdElement[]>; /** * Free a busy page. All request and response interceptions will be cleared. */ free(): Promise<boolean>; /** * @returns whether the element has the specified attribute or not * @param attributeName */ goto(url: string, options?: GotoOptions): Promise<boolean>; id(): string; isFree(): boolean; /** * valid only in CheerioPage * @param html * @param isHtml default true */ load(html: string, isHtml?: boolean): boolean; localStroage(): Promise<LocalStorageOrigin[]>; mainFrame(): AllFrame; maximizeViewport(): Promise<boolean>; mouseClick(x: number, y: number, options?: PageMouseClickOptions): Promise<boolean>; mouseDown(): Promise<boolean>; mouseMove(x: number, y: number): Promise<boolean>; mouseUp(): Promise<boolean>; mouseWheel(deltaX?: number, delterY?: number): Promise<boolean>; pageHeight(): Promise<number>; pageInfo(): PageInfo; pageWidth(): Promise<number>; pdf(options?: PDFOptions): Promise<Buffer>; reload(): Promise<boolean>; screenshot(options?: ScreenshotOptions): Promise<Buffer>; scrollBy(x: number, y: number): Promise<boolean>; scrollTo(x: number, y: number): Promise<boolean>; /** * * Send a CDP message over the current(not detached) or new CDP session * @param method protocol method name * @param params default null(ignored), method parameters * @param detach default true, whether to detach the CDPSession from target */ sendCDPMessage(method: string, params?: object | null, detach?: boolean): Promise<any>; /** * set new value of closeWhenFree, refer to closeWhenFree * * valid only in browser page * @param closeWhenFree */ setCloseWhenFree(closeWhenFree: boolean): boolean; setCookies(cookies: CookieItem[]): Promise<boolean>; setExtraHTTPHeaders(headers: Record<string, string>): Promise<boolean>; /** * set localStorage on the current web page(page.url()) * @param localStorageItems */ setLocalStroage(localStorageItems: LocalStorageItem[]): Promise<boolean>; setPageInfo(pageInfo: UpdatablePageInfo): boolean; /** * Intercept requests that meet the conditions(requestMatch) to perform an action(action and fulfill). * @param options */ setRequestInterception(options: RequestInterceptionOption | RequestInterceptionOption[]): Promise<boolean>; /** * Intercept responses that meet the conditions(requestMatch and responseMatch) to perform actions(cacheArray and handler ) * @param options */ setResponseInterception(options: ResponseInterceptionOption | ResponseInterceptionOption[]): Promise<boolean>; /** * Shortcut for LsdPage.browserContext().setStateData(stateData) * @param stateData */ setStateData(stateData: BrowserStateData): Promise<boolean>; /** * valid only in puppeteer * @param userAgent */ setUserAgent(userAgent: string): Promise<boolean>; setViewportSize(viewPortSize: ViewportSize): Promise<boolean>; stateData(): Promise<BrowserStateData>; status(): PageStatus; title(): Promise<string>; url(): string; /** * start to use this free page */ use(): boolean; /** * * @param selector CSS selector, not XPath * @param options */ waitForElement(selector: string, options?: WaitElementOptions): Promise<boolean>; /** * * @param options */ waitForNavigation(options: WaitNavigationOptions): Promise<boolean>; /** * obj=window?.[key1]...?.[keyn] * @return obj ? JSON.stringify(obj) : "" * @param keys */ windowMember(keys: string[]): Promise<string>; _origPage(): AllPage; } interface LsdBrowserContext extends EventEmitter { /** * Get the LsdApiContext associated with this LsdBrowserContext * * only vaild in playwright */ apiContext(): LsdApiContext; browser(): LsdBrowser; /** * close this BrowserContext * * For BrowserContext that cannot be closed directly, it is only marked as closed; it will be closed when browser is closed. * * refer to " Error: Non-incognito profiles cannot be closed" in puppeteer */ close(): Promise<boolean>; /** * close pages that are free more than maxPageFreeSeconds if maxPageFreeSeconds > 0 * * but the last page in the browserContext will not be closed * @default 0 the default maxPageFreeSeconds of the browserContext will be used */ closeFreePages(maxPageFreeSeconds?: number): Promise<boolean>; /** * doest this browser meet browserContextRequirements (incognitos ignored in browser)? * @param browserContextRequirements */ doesMeetBrowserContextRequirements(browserContextRequirements: BrowserContextRequirements): boolean; /** * Free a busy LsdBrowserContext. * @param clearStateData default false */ free(clearStateData?: boolean): boolean; /** * get a free page from current pages or by creating a new page */ getPage(always?: boolean): Promise<LsdPage | null>; /** * whether can get a number of free page(s) * * refer to getPage() * @param pageNum default 1, the number of free pages */ hasFreePage(pageNum?: number): boolean; id(): string; isFree(): boolean; isIncognito(): boolean; creationMethod(): BrowserContextCreationMethod; page(pageIdx: number): LsdPage | null; pages(): LsdPage[]; proxy(): ProxyInController | null; setStateData(stateData: BrowserStateData): Promise<boolean>; status(): BrowserContextStatus; /** * start to use this LsdBrowserContext */ use(): boolean; _origBrowserContext(): AllBrowserContext; } interface LsdBrowser extends EventEmitter { newBrowserContext(options?: LsdBrowserContextOptions): Promise<LsdBrowserContext | null>; /** * 1. launched: close all browserContexts and this browser * 2. connected: * * in puppeteer: close all browserContexts and this browser??? * * in playwright: only browserContexts created by newContext will be closed, browser is disconnected and browser will not be closed */ close(): Promise<boolean>; browserContexts(): LsdBrowserContext[]; browserControllerType(): BrowserControllerType; browserCreationMethod(): BrowserCreationMethod; browserType(): LsdBrowserType; createTime(): number; /** * doest this browser meet browserContextRequirements (incognitos ignored in browser)? * @param browserContextRequirements */ doesMeetBrowserContextRequirements(browserContextRequirements: BrowserContextRequirements): boolean; /** * @returns * 1. launched: actual executable path * 2. connected: exectuablePath in LsdConnectOptions, default ""(unkown) */ executablePath(): string; /** * get a free BrowserContext from current free browserContexts or new browserContext */ id(): string; isConnected(): boolean; isHeadless(): boolean; options(): LsdLaunchOptions | LsdConnectOptions; /** * * puppeteer: return pid of connected or launched browser * * playwright: return pid of connected browser that is launched manually or using launchServer, or else return 0 */ pid(): number; /** * get the cpu utility(%) and memory usage(MB) of browser processes if pid is greater than 0 (refer to pid()) */ pidUsage(): Promise<{ cpu: number; memory: number; }>; proxy(): ProxyInController | null; version(): Promise<string>; _origBrowser(): AllBrowser; } interface LsdBrowserController$1 { /** * launch a new browser using related browser controller * @param browserControllerType * @param browserType * @param options */ launch(browserControllerType: BrowserControllerType, browserType: LsdBrowserType, options?: LsdLaunchOptions): Promise<LsdBrowser>; /** * connect to the current browser using related browser controller * @param browserControllerType * @param browserType * @param options */ connect(browserControllerType: BrowserControllerType, browserType: LsdBrowserType, options?: LsdConnectOptions): Promise<LsdBrowser>; /** * use special plugin, such as playwrightExtra.chromium.use(StealthPlugin() ** import puppeteerExtra from "puppeteer-extra"; ** import * as playwrightExtra from "playwright-extra"; ** import StealthPlugin from "puppeteer-extra-plugin-stealth"; * @param browserControllerType * @param browserType * @param plugin */ setBrowserPlugin(browserControllerType: BrowserControllerType, browserType: LsdBrowserType, plugin: any): boolean; /** * Create a new LsdApiContext, valid in playwright; */ newApiContext(options?: LsdApiContextOptions): Promise<LsdApiContext>; } declare function setControllerLogFun(logFun: LogFunction): boolean; declare class PlaywrightBrowser extends EventEmitter implements LsdBrowser { #private; static doesSupport(browserType: LsdBrowserType): boolean; constructor(browser: Browser, browserType: LsdBrowserType, browserCreateMethod: BrowserCreationMethod, options: LsdLaunchOptions | LsdConnectOptions, browserIdx?: number, pid?: number); newBrowserContext(options?: LsdBrowserContextOptions): Promise<LsdBrowserContext | null>; close(): Promise<boolean>; browserContexts(): LsdBrowserContext[]; browserControllerType(): BrowserControllerType; browserCreationMethod(): BrowserCreationMethod; browserType(): LsdBrowserType; createTime(): number; doesMeetBrowserContextRequirements(browserContextRequirements: BrowserContextRequirements): boolean; executablePath(): string; id(): string; isConnected(): boolean; isHeadless(): boolean; options(): LsdLaunchOptions | LsdConnectOptions; pid(): number; pidUsage(): Promise<{ cpu: number; memory: number; }>; proxy(): ProxyInController | null; version(): Promise<string>; _origBrowser(): AllBrowser; } declare class PlaywrightBrowserContext extends EventEmitter implements LsdBrowserContext { #private; constructor(lsdBrowser: LsdBrowser, browserContext: BrowserContext, browserContextCreationMethod: BrowserContextCreationMethod, incognito?: boolean, proxy?: ProxyInController | null, browserIdx?: number, browserContextIdx?: number, maxPagesPerBrowserContext?: number, maxPageFreeSeconds?: number, maxViewportOfNewPage?: boolean); apiContext(): LsdApiContext; browser(): LsdBrowser; close(): Promise<boolean>; closeFreePages(maxPageFreeSeconds?: number): Promise<boolean>; creationMethod(): BrowserContextCreationMethod; doesMeetBrowserContextRequirements(browserContextRequirements: BrowserContextRequirements): boolean; getPage(always?: boolean): Promise<LsdPage | null>; free(clearStateData?: boolean): boolean; hasFreePage(pageNum?: number): boolean; id(): string; isFree(): boolean; isIncognito(): boolean; page(pageIdx: number): LsdPage | null; pages(): LsdPage[]; proxy(): ProxyInController | null; setStateData(stateData: BrowserStateData): Promise<boolean>; status(): BrowserContextStatus; use(): boolean; _origBrowserContext(): AllBrowserContext; } declare class PlaywrightPage extends EventEmitter implements LsdPage { #private; constructor(browserContext: LsdBrowserContext, page: Page, pageInfo?: PageInfo); addPreloadScript(scriptOrFunc: string | Function, arg?: Serializable$1): Promise<boolean>; addScriptTag(options: FrameAddScriptTagOptions): Promise<AllElementHandle>; apiContext(): LsdApiContext; bringToFront(): Promise<boolean>; browserContext(): LsdBrowserContext; clearCookies(): Promise<boolean>; clearLocalStorage(): Promise<boolean>; clearRequestInterceptions(): Promise<boolean>; clearResponseInterceptions(): Promise<boolean>; clearStateData(): Promise<boolean>; close(): Promise<boolean>; closeWhenFree(): boolean; content(iframeOptions?: IframeOption[]): Promise<string>; cookies(): Promise<CookieItem[]>; documentHeight(): Promise<number>; evaluate(func: Function | string, args?: any[]): Promise<any>; exposeFunction(name: string, callbackFunction: Function): Promise<void>; findElement(selectorOrXpath: string | string[], iframeOptions?: IframeOption[]): Promise<LsdElement | null>; findElements(selectorOrXpath: string | string[], iframeOptions?: IframeOption[]): Promise<LsdElement[]>; free(): Promise<boolean>; goto(url: string, options?: GotoOptions | undefined): Promise<boolean>; id(): string; isFree(): boolean; localStroage(): Promise<LocalStorageOrigin[]>; load(): boolean; mainFrame(): AllFrame; maximizeViewport(): Promise<boolean>; mouseClick(x: number, y: number, options?: PageMouseClickOptions): Promise<boolean>; mouseDown(): Promise<boolean>; mouseMove(x: number, y: number): Promise<boolean>; mouseUp(): Promise<boolean>; mouseWheel(deltaX?: number, deltaY?: number): Promise<boolean>; pageHeight(): Promise<number>; pageInfo(): PageInfo; pageWidth(): Promise<number>; pdf(options?: PDFOptions | undefined): Promise<Buffer>; reload(): Promise<boolean>; screenshot(options?: ScreenshotOptions): Promise<Buffer>; scrollBy(x: number, y: number): Promise<boolean>; scrollTo(x: number, y: number): Promise<boolean>; sendCDPMessage(method: string, params?: object | null, detach?: boolean): Promise<any>; setCloseWhenFree(closeWhenFree: boolean): boolean; setCookies(cookies: CookieItem[]): Promise<boolean>; setExtraHTTPHeaders(headers: Record<string, string>): Promise<boolean>; setLocalStroage(localStorageItems: LocalStorageItem[]): Promise<boolean>; setPageInfo(pageInfo: UpdatablePageInfo): boolean; setRequestInterception(options: RequestInterceptionOption | RequestInterceptionOption[]): Promise<boolean>; setResponseInterception(options: ResponseInterceptionOption | ResponseInterceptionOption[]): Promise<boolean>; setStateData(stateData: BrowserStateData): Promise<boolean>; setUserAgent(userAgent: string): Promise<boolean>; setViewportSize(viewPortSize: ViewportSize): Promise<boolean>; stateData(): Promise<BrowserStateData>; status(): PageStatus; title(): Promise<string>; url(): string; use(): boolean; waitForElement(selector: string, options?: WaitElementOpt