@letsscrapedata/controller
Version:
Unified browser / HTML controller interfaces that support patchright, camoufox, playwright, puppeteer and cheerio
1,412 lines (1,406 loc) • 65.3 kB
text/typescript
import EventEmitter from 'node:events';
import { Browser, BrowserContext, Locator, ElementHandle as ElementHandle$1, Frame, Page, Response, APIRequestContext, BrowserType, FrameLocator } from 'playwright';
import { Browser as Browser$1, BrowserContext as BrowserContext$1, ElementHandle, Frame as Frame$1, Page as Page$1, HTTPResponse } from 'puppeteer';
import { Browser as Browser$2, BrowserContext as BrowserContext$2, Locator as Locator$1, ElementHandle as ElementHandle$2, Frame as Frame$2, Page as Page$2, Response as Response$1, APIRequestContext as APIRequestContext$1, BrowserType as BrowserType$1 } from 'patchright';
import { Serializable } from 'node:child_process';
import { LogFunction } from '@letsscrapedata/utils';
import { Serializable as Serializable$1 } from 'child_process';
type BrowserControllerType = "playwright" | "puppeteer" | "patchright" | "camoufox";
type PlaywrightBrowserControllerType = "playwright" | "patchright" | "camoufox";
type AllBrowser = Browser | Browser$1 | Browser$2;
type AllBrowserContext = BrowserContext | BrowserContext$1 | BrowserContext$2;
type AllElement = Locator | ElementHandle | Locator$1;
type AllElementHandle = ElementHandle$1 | ElementHandle | ElementHandle$2;
type AllFrame = Frame | Frame$1 | Frame$2;
type AllPage = Page | Page$1 | Page$2;
type AllResponse = Response | HTTPResponse | Response$1;
type AllApiRequestContext = APIRequestContext | APIRequestContext$1;
type CheerioNode = cheerio.Cheerio;
type BrowserCreationMethod = "launch" | "connect";
type BrowserContextCreationMethod = "launch" | "new";
type BrowserContextStatus = "free" | "busy" | "closed";
type LsdBrowserType = "chromium" | "firefox" | "webkit";
/**
* events emittd by controllers, such as playwright
*/
declare enum ControllerEvent {
BROWSERCONTEXT_CLOSE = "close",
BROWSERCONTEXT_PAGE = "page",// PlaywrightBrowserControllerType
BROWSERCONTEXT_TARGETCREATED = "targetcreated",// puppeteer
BROWSER_DISCONNECTED = "disconnected",
PAGE_CLOSE = "close",
PAGE_POUP = "popup",
PAGE_REQUEST = "request",// puppeteer (use route in playwright)
PAGE_RESPONSE = "response"
}
/**
* user could listen to them if needed
** eventData: LsdPage
*/
declare enum LsdPageEvent {
PAGE_CLOSE = "pageClose",
PAGE_POPUP = "pagePopup"
}
/**
* listened by LsdBrowserContext; user does NOT need to listen to them
** eventData: LsdPage
*/
declare enum LsdBrowserContextEvent {
PAGE_CLOSE = "pageClose"
}
/**
* listened by LsdBrowser; user does NOT need to listen to them
** eventData: LsdBrowserContext
*/
declare enum LsdBrowserEvent {
BROWSER_CONTEXT_CLOSE = "browserContextClose"
}
interface BrowserContextRequirements {
browserControllerTypes: BrowserControllerType[];
browserTypes: LsdBrowserType[];
browserHeadlesses: boolean[];
browserIncognitos: boolean[];
}
interface ProxyInController {
proxyUrl: string;
username?: string;
password?: string;
proxyId?: string;
}
interface ClientCertificate {
origin: string;
cert?: Buffer;
certPath?: string;
key?: Buffer;
keyPath?: string;
pfx?: Buffer;
pfxPath?: string;
passphrase?: string;
}
interface LsdApiContextOptions {
proxy?: ProxyInController;
/**
* storageState
*/
stateData?: BrowserStateData;
userAgent?: string;
timeout?: number;
ignoreHTTPSErrors?: boolean;
extraHTTPHeaders?: Record<string, string>;
httpCredentials?: {
username: string;
password: string;
origin?: string;
send?: 'always' | 'unauthorized';
};
clientCertificates?: ClientCertificate[];
}
interface LsdFetchOptions {
/**
* @default "get"
*/
method?: "delete" | "get" | "post" | "put";
params?: Record<string, string | number | boolean>;
headers?: Record<string, string>;
data?: any;
form?: Record<string, string | number | boolean>;
timeout?: number;
failOnStatusCode?: boolean;
ignoreHTTPSErrors?: boolean;
maxRedirects?: number;
maxRetries?: number;
}
interface LsdApiResponse {
headers: Record<string, string>;
status: number;
statusText: string;
text: string;
url: string;
}
interface LsdApiContext {
fetch(url: string, options?: LsdFetchOptions): Promise<LsdApiResponse>;
stateData(): Promise<BrowserStateData>;
/**
* destroyed LsdApiContext cannot be used again, or throw an exeception
*/
destroy(): Promise<boolean>;
}
interface PlaywrightBrowserTypes {
chromium: BrowserType;
firefox: BrowserType;
webkit: BrowserType;
}
interface PatchrightBrowserTypes {
chromium: BrowserType$1;
firefox: BrowserType$1;
webkit: BrowserType$1;
}
interface BrowserOptions {
/**
* Interval between closing free pages (seconds) if greater than 0
* @default 300
*/
closeFreePagesIntervalSeconds?: number;
/**
* max browserContexts per browser
* @default 10
*/
maxBrowserContextsPerBrowser?: number;
/**
* max pages per browserContext
* @default 20
*/
maxPagesPerBrowserContext?: number;
/**
* pages, that are free more than this, will be closed
* @default 900
*/
maxPageFreeSeconds?: number;
/**
* @default true
*/
maxViewportOfNewPage?: boolean;
/**
* The proxy actually used by the connected browser; for efficient web scraping, please pass an accurate value
* * this will used as default proxy when creating new browserContexts later
* @default null
*/
proxy?: ProxyInController | null;
/**
* Maximum time in milliseconds to wait for the browser instance to start. Pass 0 to disable timeout.
* * default 30_000 (30 seconds)
*/
timeout?: number;
}
/**
* * puppeteer: launchServer is the same as launch
* * playwright: use "launchServer" if it's necessary to call LsdBrowser.pid()/pidUsage()
*/
type BrowserLaunchMethod = "launch" | "launchServer";
interface LsdLaunchOptions extends BrowserOptions {
args?: string[];
executablePath?: string;
/**
* @default true
*/
maxWindowSize?: boolean;
/**
* @default true
*/
headless?: boolean;
/**
* min number of browserContexts
* @default 1
*/
minBrowserContexts?: number;
/** * @default false for puppeteer, true for playwright
*/
incognito?: boolean;
/**
* workaround for chromium issue on windows: https://github.com/microsoft/playwright/issues/17252 (same in puppeteer)
* @default false
*/
proxyPerBrowserContext?: boolean;
/**
* userAgent of the new browserContext that is created during launching the browser, valid only in puppeteer:
* * playwright does not create a browserContext during launching browser
* * puppeteer creates a default/new browserContext during launching browser
*/
userAgent?: string;
userDataDir?: string;
/**
* @default "launch"
*/
launchMethod?: BrowserLaunchMethod;
/**
* Calculates longitude, latitude, timezone, country, & locale based on the IP address. Pass the target IP address(string) to use, or true to find the IP address automatically.
** valid only in camfoufox: geoip
** please install Camoufox with the geoip extra
* @default false
*/
geoip?: boolean | string;
/**
* Whether to inject scripts into the main world when prefixed with mw.
** valid only in camfoufox: main_world_eval
* @default false
*/
mainWorldEval?: boolean;
/**
* List of Firefox addons to use. Must be paths to extracted addons.
** valid only in camfoufox: addons
* @default []
*/
addons?: string[];
}
interface LsdConnectOptions extends BrowserOptions {
/**
* url that starts with "http://", such as "http://localhost:9222/"
*/
browserUrl: string;
/**
* executable path of connected browser, optional
* @default "" that means unkown
*/
executablePath?: string;
/**
* whether the connected browser is headless
* @default false
*/
headless?: boolean;
/**
* whether the connected browser is headless
* @default false
*/
incognito?: boolean;
/**
* userAgent of the current browserContexts that were created before connecting to the browser, valid only in puppeteer:
* * playwright: does not support page.setUserAgent
* * puppeteer: supports page.setUserAgent
*/
userAgent?: string;
}
type LsdBrowserContextOptions = {
proxy: ProxyInController | null;
/**
* @default browser.options.maxViewportOfNewPage
*/
maxViewportOfNewPage?: boolean;
/**
* userAgent of the browserContext:
* * playwright: set when creating the new browserContext
* * puppeteer: set when creating the new page in the browserContext
*/
userAgent?: string;
};
type PageStatus = "free" | "busy" | "closed";
/**
* newpage: open by browserContext.newPage()
* popup: open by clicking etc, important
* manual: open by manual operation
* launch: open by puppeteer.launch
* connect: opened pages before connecting to browser
* other: unkown
*/
/**
* * launch: open when creating browserContext(includes launching browser)
* * connect: open before connected
* * newpage: open by browserContext.newPage()
* * popup: popup page
* * manual: open by adding new page manually
* * other:
*/
type PageOpenType = "newpage" | "popup" | "manual" | "launch" | "connect" | "other";
interface PageInfo {
/**
* browser index in all browsers, that starts from 1
* @default 0
*/
browserIdx: number;
/**
* browserContext index in the same browser, that starts from 1
* @default 0
*/
browserContextIdx: number;
/**
* page index in the same browserContext, that starts from 1
* @default 0
*/
pageIdx: number;
/**
* how the page is opened
* @default other
*/
openType: PageOpenType;
/**
* @default current unix time
*/
openTime: number;
/**
* page's status: free or busy
* @default current unix time
*/
lastStatusUpdateTime: number;
/**
* taskId that are using this page
* @default 0
*/
taskId: number;
/**
* @default 0
*/
relatedId: number;
/**
* customised page information
*/
misc: Record<string, string | number>;
}
interface UpdatablePageInfo {
/**
* page's status: free or busy
* @default current unix time
*/
lastStatusUpdateTime?: number;
/**
* taskId that are using this page
* @default 0
*/
taskId?: number;
/**
* @default 0
*/
relatedId?: number;
/**
* customised page information
*/
misc?: Record<string, string | number>;
}
interface PageExtInPuppeteer extends Page$1 {
pageInfo?: PageInfo;
}
interface PageExtInPlaywright extends Page {
pageInfo?: PageInfo;
}
interface PageExtInPatchright extends Page$2 {
pageInfo?: PageInfo;
}
interface FrameAddScriptTagOptions {
/**
* URL of the script to be added.
*/
url?: string;
/**
* Path to a JavaScript file to be injected into the frame.
*
* @remarks
* If `path` is a relative path, it is resolved relative to the current
* working directory (`process.cwd()` in Node.js).
*/
path?: string;
/**
* JavaScript to be injected into the frame.
*/
content?: string;
/**
* Sets the `type` of the script. Use `module` in order to load an ES2015 module.
*/
type?: string;
/**
* Sets the `id` of the script.
* * supported only in puppeteer
*/
id?: string;
}
type MouseClickType = "click" | "evaluate";
interface MouseClickOptions {
/**
* Which button will be pressed.
* @default left
*/
button?: "left" | "right" | "middle";
/**
* Number of clicks to perform.
* * puppeteer: count (clickCount deprecated)
* @default 1
*/
clickCount?: 1 | 2 | 3;
/**
* Time to wait between mousedown and mouseup in milliseconds.
* @default 0
*/
delay?: number;
/**
* A point to use relative to the top-left corner of element padding box. If not specified, uses some visible point of the element.
* * puppeteer: offset
*/
position?: {
x: number;
y: number;
};
/**
* @default []
* * puppeteer: not supported, ignored
*/
modifiers?: Array<"Alt" | "Control" | "Meta" | "Shift">;
/**
* @default click
*/
clickType?: MouseClickType;
}
interface PageMouseClickOptions {
/**
* Which button will be pressed.
* @default left
*/
button?: "left" | "right" | "middle";
/**
* Number of clicks to perform.
* * puppeteer: count (clickCount deprecated)
* @default 1
*/
clickCount?: 1 | 2 | 3;
/**
* Time to wait between mousedown and mouseup in milliseconds.
* @default 0
*/
delay?: number;
}
interface SelectOptions {
/**
* Which attribute of select option to match
*/
type: "value" | "label" | "index";
/**
* Matches by option.value
*/
values?: string[];
/**
* Matches by the index, that starts from 0
*/
indexes?: number[];
/**
* Matches by option.label
*/
labels?: string[];
}
/**
* * playwright: "load" | "domcontentloaded" | "networkidle" | "commit", "networkidle0" | "networkidle2" => "networkidle";
* * puppeteer: "load" | "domcontentloaded" | "networkidle0" | "networkidle2", "networkidle" => "networkidle0", "commit" ignored;
*/
type NavigationWaitUntil = "load" | "domcontentloaded" | "networkidle" | "commit" | "networkidle0" | "networkidle2";
interface GotoOptions {
referer?: string;
timeout?: number;
waitUntil?: NavigationWaitUntil;
}
/**
* * src takes precedence over selector, at least one of them must be defined
* * it is recommended to use srcPrefix because selector is valid only in puppeteer
*/
interface IframeOption {
/**
* * string: iframe.src starts with this string
* * RegExp: iframe.src matches this RegExp
*/
src?: string | RegExp;
/**
* id of iframe
*/
id?: string;
/**
* CSS selector or XPath
*/
selector?: string;
}
type KeyInput = '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | 'Power' | 'Eject' | 'Abort' | 'Help' | 'Backspace' | 'Tab' | 'Numpad5' | 'NumpadEnter' | 'Enter' | '\r' | '\n' | 'ShiftLeft' | 'ShiftRight' | 'ControlLeft' | 'ControlRight' | 'AltLeft' | 'AltRight' | 'Pause' | 'CapsLock' | 'Escape' | 'Convert' | 'NonConvert' | 'Space' | 'Numpad9' | 'PageUp' | 'Numpad3' | 'PageDown' | 'End' | 'Numpad1' | 'Home' | 'Numpad7' | 'ArrowLeft' | 'Numpad4' | 'Numpad8' | 'ArrowUp' | 'ArrowRight' | 'Numpad6' | 'Numpad2' | 'ArrowDown' | 'Select' | 'Open' | 'PrintScreen' | 'Insert' | 'Numpad0' | 'Delete' | 'NumpadDecimal' | 'Digit0' | 'Digit1' | 'Digit2' | 'Digit3' | 'Digit4' | 'Digit5' | 'Digit6' | 'Digit7' | 'Digit8' | 'Digit9' | 'KeyA' | 'KeyB' | 'KeyC' | 'KeyD' | 'KeyE' | 'KeyF' | 'KeyG' | 'KeyH' | 'KeyI' | 'KeyJ' | 'KeyK' | 'KeyL' | 'KeyM' | 'KeyN' | 'KeyO' | 'KeyP' | 'KeyQ' | 'KeyR' | 'KeyS' | 'KeyT' | 'KeyU' | 'KeyV' | 'KeyW' | 'KeyX' | 'KeyY' | 'KeyZ' | 'MetaLeft' | 'MetaRight' | 'ContextMenu' | 'NumpadMultiply' | 'NumpadAdd' | 'NumpadSubtract' | 'NumpadDivide' | 'F1' | 'F2' | 'F3' | 'F4' | 'F5' | 'F6' | 'F7' | 'F8' | 'F9' | 'F10' | 'F11' | 'F12' | 'F13' | 'F14' | 'F15' | 'F16' | 'F17' | 'F18' | 'F19' | 'F20' | 'F21' | 'F22' | 'F23' | 'F24' | 'NumLock' | 'ScrollLock' | 'AudioVolumeMute' | 'AudioVolumeDown' | 'AudioVolumeUp' | 'MediaTrackNext' | 'MediaTrackPrevious' | 'MediaStop' | 'MediaPlayPause' | 'Semicolon' | 'Equal' | 'NumpadEqual' | 'Comma' | 'Minus' | 'Period' | 'Slash' | 'Backquote' | 'BracketLeft' | 'Backslash' | 'BracketRight' | 'Quote' | 'AltGraph' | 'Props' | 'Cancel' | 'Clear' | 'Shift' | 'Control' | 'Alt' | 'Accept' | 'ModeChange' | ' ' | 'Print' | 'Execute' | '\u0000' | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' | 'y' | 'z' | 'Meta' | '*' | '+' | '-' | '/' | ';' | '=' | ',' | '.' | '`' | '[' | '\\' | ']' | "'" | 'Attn' | 'CrSel' | 'ExSel' | 'EraseEof' | 'Play' | 'ZoomOut' | ')' | '!' | '@' | '#' | '$' | '%' | '^' | '&' | '(' | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z' | ':' | '<' | '_' | '>' | '?' | '~' | '{' | '|' | '}' | '"' | 'SoftLeft' | 'SoftRight' | 'Camera' | 'Call' | 'EndCall' | 'VolumeDown' | 'VolumeUp';
interface KeyPressOptions {
/**
* Time to wait between keydown and keyup in milliseconds. Defaults to 0.
*/
delay?: number;
}
interface InputOptions {
/**
* Time to wait between mousedown and mouseup in milliseconds.
* * playwright: not supported, ignored
* @default 0
*/
delay?: number;
/**
* whether to replace the current value
* @default false
*/
replace?: boolean;
/**
* whether to press Enter after input the value
* @default false
*/
enter?: boolean;
}
interface LsdElement {
/**
*
* @return the value of a specified attribute on the element
* @param attributeName
*/
attribute(attributeName: string): Promise<string>;
/**
* @returns the attribute names of the element
*/
attributeNames(): Promise<string[]>;
/**
* This method returns the bounding box of the element (relative to the main frame), or null if the element is not part of the layout (example: display: none).
*/
boundingBox(): Promise<{
x: number;
y: number;
width: number;
height: number;
} | null>;
dataset(): Promise<Record<string, string>>;
/**
* In order to be compatible with various browser controllers, if you need to use this function, please follow the following conventions:
* * If the element is in an iframe, use the descendant type when locating the iframe, not the child type!
* * * Reason: args of <page/frame | locator>.evaluate are different in Playwright
* * When there is only one parameter: element.evaluate(arg=>statements, val)
* * When there are one or more parameters: element.evaluate(([arg1, arg2]=>statements, [val1, val2])
* @param func
* @param args
* @param isolated default true; whether to run in isolated context; only valid for patchwright and camoufox
*/
evaluate(func: Function | string, args?: any[], isolated?: boolean): Promise<Serializable>;
/**
* @returns the first element matching the given CSS selector or XPath
* @param selectorOrXpath CSS selector or XPath; if this parameter is an array, each selectorOrXpath in the array will be tried until elements are selected
* @param iframeOptions default [], options to select decendant frame
* @param absolute valid only if iframeOptions.length===0
*/
findElement(selectorOrXpath: string | string[], iframeOptions?: IframeOption[], absolute?: boolean): Promise<LsdElement | null>;
/**
* @returns elements matching the given CSS selector or XPath
* @param selectorOrXpath CSS selector or XPath; if this parameter is an array, each selectorOrXpath in the array will be tried until elements are selected
* @param iframeOptions default [], options to select decendant frame
* @param absolute valid only if iframeOptions.length===0
*/
findElements(selectorOrXpath: string | string[], iframeOptions?: IframeOption[], absolute?: boolean): Promise<LsdElement[]>;
/**
* @returns whether the element has the specified attribute or not
* @param attributeName
*/
hasAttribute(attributeName: string): Promise<boolean>;
/**
* @returns the HTML or XML markup contained within the element
*/
innerHtml(): Promise<string>;
/**
* @returns innerText of element
* @param onlyChild default false, whether to include only the text of the child text nodes
*/
innerText(onlyChild?: boolean): Promise<string>;
/**
* @returns the serialized HTML fragment describing the element including its descendants
*/
outerHtml(): Promise<string>;
textContent(): Promise<string>;
/**
* Click this element.
* @param options default {button: "left", count: 1, delay: 0, modifies: []}
*/
click(options?: MouseClickOptions): Promise<boolean>;
focus(): Promise<boolean>;
hover(): Promise<boolean>;
/**
* * playwright: fill
* * puppeteer: type
*/
input(value: string, options?: InputOptions): Promise<boolean>;
press(key: KeyInput, options: KeyPressOptions): Promise<boolean>;
screenshot(options?: ScreenshotOptions): Promise<Buffer>;
scrollIntoView(): Promise<boolean>;
select(options: SelectOptions): Promise<boolean>;
setAttribute(attributeName: string, newValue: string): Promise<boolean>;
_origElement(): AllElement;
}
interface ViewportSize {
height: number;
width: number;
}
interface CookieItem {
name: string;
value: string;
domain: string;
path: string;
expires: number;
httpOnly: boolean;
secure: boolean;
sameSite: 'Strict' | 'Lax' | 'None';
}
interface LocalStorageItem {
name: string;
value: string;
}
interface LocalStorageOrigin {
origin: string;
localStorage: LocalStorageItem[];
}
interface BrowserStateData {
cookies: CookieItem[];
localStorage: LocalStorageOrigin[];
}
/**
* copy from playwright
export type URLMatch = string | RegExp | ((url: URL) => boolean);
export type RouteHandlerCallback = (route: RouteInPlaywright, request: RequestInPlayWright) => Promise<any> | void;
export type RequestHandlerCallback = (request: RequestInPlayWright) => Promise<any> | void;
*/
type RequestResourceType = "document" | "stylesheet" | "image" | "media" | "font" | "script" | "texttrack" | "xhr" | "fetch" | "eventsource" | "websocket" | "manifest" | "other";
type RequestMethod = "DELETE" | "GET" | "POST" | "PUT" | "CONNECT" | "HEAD" | "OPTIONS" | "PATCH" | "TRACE";
interface RequestMatch {
methods?: RequestMethod[];
postData?: RegExp;
resourceTypes?: RequestResourceType[];
url?: RegExp;
}
type RequestInterceptionAction = "abort" | "fulfill";
interface RequestInterceptionOption {
/**
* Requests that match all conditions will be intercepted; all requests will be intercepted if no condition.
*/
requestMatch?: RequestMatch;
/**
* * abort: Aborts the HTTP request
* * fulfill: Fulfills a request with the value of "fulfill"
*/
action: RequestInterceptionAction;
/**
* required when action is "fulfill"
* @default `<html><body><h1>${request.url()}</h1></body></html>`
*/
fulfill?: string;
}
interface ResponseMatch {
/**
* min length of response.text()
*/
minLength?: number;
/**
* max length of response.text()
*/
maxLength?: number;
}
type ResponseHandlerOptions = Record<string, any>;
interface ResponsePageData {
pageUrl: string;
cookies: string;
}
type ResponseHandler = (response: AllResponse, options: ResponseHandlerOptions, pageData: ResponsePageData) => Promise<void> | void;
interface ResponseInterceptionItem {
/**
* page.url()
*/
pageUrl: string;
/**
* request.method()
*/
requestMethod: RequestMethod;
/**
* request.url()
*/
requestUrl: string;
/**
* request.postData()
*/
requestData: string;
/**
* response.text()
*/
responseData: string;
}
interface ResponseInterceptionOption {
requestMatch?: RequestMatch;
responseMatch?: ResponseMatch;
/**
* the ResponseInterceptionData will be pushed into this array if Array.isArray(cacheArray)
*/
responseItems?: ResponseInterceptionItem[];
/**
* handler will be called if handler is a function
*/
handler?: ResponseHandler;
/**
* valid only if handler is a function
* @default {}
*/
handlerOptions?: ResponseHandlerOptions;
}
interface PDFMargin {
top?: string | number;
bottom?: string | number;
left?: string | number;
right?: string | number;
}
/**
* @public
*/
type LowerCasePaperFormat = 'letter' | 'legal' | 'tabloid' | 'ledger' | 'a0' | 'a1' | 'a2' | 'a3' | 'a4' | 'a5' | 'a6';
/**
* All the valid paper format types when printing a PDF.
*
* @remarks
*
* The sizes of each format are as follows:
*
* - `Letter`: 8.5in x 11in
*
* - `Legal`: 8.5in x 14in
*
* - `Tabloid`: 11in x 17in
*
* - `Ledger`: 17in x 11in
*
* - `A0`: 33.1in x 46.8in
*
* - `A1`: 23.4in x 33.1in
*
* - `A2`: 16.54in x 23.4in
*
* - `A3`: 11.7in x 16.54in
*
* - `A4`: 8.27in x 11.7in
*
* - `A5`: 5.83in x 8.27in
*
* - `A6`: 4.13in x 5.83in
*
* @public
*/
type PaperFormat = Uppercase<LowerCasePaperFormat> | Capitalize<LowerCasePaperFormat> | LowerCasePaperFormat;
/**
* Valid options to configure PDF generation via {@link Page.pdf}.
* @public
*/
interface PDFOptions {
/**
* Whether to show the header and footer.
* @defaultValue `false`
*/
displayHeaderFooter?: boolean;
/**
* HTML template for the print footer. Has the same constraints and support
* for special classes as {@link PDFOptions | PDFOptions.headerTemplate}.
*/
footerTemplate?: string;
/**
* @remarks
* If set, this takes priority over the `width` and `height` options.
* @defaultValue `letter`.
*/
format?: PaperFormat;
/**
* HTML template for the print header. Should be valid HTML with the following
* classes used to inject values into them:
*
* - `date` formatted print date
*
* - `title` document title
*
* - `url` document location
*
* - `pageNumber` current page number
*
* - `totalPages` total pages in the document
*/
headerTemplate?: string;
/**
* Sets the height of paper. You can pass in a number or a string with a unit.
*/
height?: string | number;
/**
* Whether to print in landscape orientation.
* @defaultValue `false`
*/
landscape?: boolean;
/**
* Set the PDF margins.
* @defaultValue `undefined` no margins are set.
*/
margin?: PDFMargin;
/**
* Hides default white background and allows generating pdfs with transparency.
* @defaultValue `false`
*/
/**
* Generate document outline.
*
* @remarks
* If this is enabled the PDF will also be tagged (accessible)
* Currently only works in old Headless (headless = 'shell')
* crbug/840455#c47
*
* @defaultValue `false`
* @experimental
*/
outline?: boolean;
/**
* Paper ranges to print, e.g. `1-5, 8, 11-13`.
* @defaultValue The empty string, which means all pages are printed.
*/
pageRanges?: string;
/**
* The path to save the file to.
*
* @remarks
*
* If the path is relative, it's resolved relative to the current working directory.
*
* @defaultValue `undefined`, which means the PDF will not be written to disk.
*/
path?: string;
/**
* Give any CSS `@page` size declared in the page priority over what is
* declared in the `width` or `height` or `format` option.
* @defaultValue `false`, which will scale the content to fit the paper size.
*/
preferCSSPageSize?: boolean;
/**
* Set to `true` to print background graphics.
* @defaultValue `false`
*/
printBackground?: boolean;
/**
* Scales the rendering of the web page. Amount must be between `0.1` and `2`.
* @defaultValue `1`
*/
scale?: number;
/**
* Generate tagged (accessible) PDF.
* @defaultValue `true`
* @experimental
*/
tagged?: boolean;
/**
* Timeout in milliseconds. Pass `0` to disable timeout.
* @defaultValue `30_000`
*/
/**
* Sets the width of paper. You can pass in a number or a string with a unit.
*/
width?: string | number;
}
/**
* @public
* not supported by puppeteer: animations, caret, mask, maskColor, scale, style, timeout
*/
interface ScreenshotOptions {
/**
* Capture the screenshot beyond the viewport.
*
* @defaultValue `false` if there is no `clip`. `true` otherwise.
*/
/**
* Specifies the region of the page to clip.
*/
clip?: {
/**
* x-coordinate of top-left corner of clip area
*/
x: number;
/**
* y-coordinate of top-left corner of clip area
*/
y: number;
/**
* the width of the element in pixels.
*/
width: number;
/**
* the height of the element in pixels.
*/
height: number;
};
/**
* Encoding of the image.
*
* @defaultValue `'binary'`
*/
/**
* Capture the screenshot from the surface, rather than the view.
*
* @defaultValue `true`
*/
/**
* When `true`, takes a screenshot of the full page.
*
* @defaultValue `false`
*/
fullPage?: boolean;
/**
* Hides default white background and allows capturing screenshots with transparency.
*
* @defaultValue `false`
*/
omitBackground?: boolean;
/**
* @defaultValue `false`
*/
/**
* Quality of the image, between 0-100. Not applicable to `png` images.
*/
quality?: number;
/**
* The file path to save the image to. The screenshot type will be inferred
* from file extension. If path is a relative path, then it is resolved
* relative to current working directory. If no path is provided, the image
* won't be saved to the disk.
*/
path?: string;
/**
* @defaultValue `'png'
*/
type?: 'png' | 'jpeg';
}
type WaitElementState = "attached" | "detached" | "hidden" | "visible";
interface WaitElementOptions {
/**
* @default 30_000 ms
*/
timeout?: number;
/**
* @default "visible"
*/
state?: WaitElementState;
}
interface WaitNavigationOptions {
/**
* only supported in playwright by now
* @default ""
*/
url?: string | RegExp;
/**
* @default 30_000 ms
*/
timeout?: number;
/**
* @default "visible"
*/
waitUntil?: NavigationWaitUntil;
}
interface LsdPage extends EventEmitter {
/**
* Adds a script which would be evaluated in one of the following scenarios:
* * Whenever the page is navigated.
* * Whenever the child frame is attached or navigated. In this case, the script is evaluated in the context of the newly attached frame.
* @param scriptOrFunc
* @param arg
*/
addPreloadScript(scriptOrFunc: string | Function, arg?: Serializable): Promise<boolean>;
/**
* Adds a `<script>` tag into the page with the desired URL or content.
* @param options
*/
addScriptTag(options: FrameAddScriptTagOptions): Promise<AllElementHandle>;
/**
* Get the LsdApiContext associated with this page's LsdBrowserContext
* * only vaild in playwright
*/
apiContext(): LsdApiContext;
bringToFront(): Promise<boolean>;
browserContext(): LsdBrowserContext;
/**
* clear the cookies of the current page(url)
* * Prerequisites: page must has a valid url, such as by calling goto(url)
*/
clearCookies(): Promise<boolean>;
/**
* clear the localStorage of the current page(url)
* * Prerequisites: page must has a valid url, such as by calling goto(url)
*/
clearLocalStorage(): Promise<boolean>;
/**
* Clear all request interceptions on the page
*/
clearRequestInterceptions(): Promise<boolean>;
/**
* Clear all response interceptions on the page
*/
clearResponseInterceptions(): Promise<boolean>;
/**
* clear the stateData of the current page(url):
* * stateData: cookies, localStorage, indexedDB
* * Prerequisites: page must has a valid url, such as by calling goto(url)
*/
clearStateData(): Promise<boolean>;
/**
* Only free page can be closed!
*/
close(): Promise<boolean>;
/**
* Should the page be closed when it is freed?
* * Sometimes, in order to avoid being used again, you need to close the page.
* * valid only in browser page
* @default false, please call setCloseWhenFree to change it
*/
closeWhenFree(): boolean;
/**
* Get the full HTML content of the page or decendant frame
* @param iframeOptions default [], selectors of decendant frames
*/
content(iframeOptions?: IframeOption[]): Promise<string>;
cookies(): Promise<CookieItem[]>;
/**
* In order to be compatible with various browser controllers, if you need to use this function, please follow the following conventions:
* * When there is only one parameter: element.evaluate(arg=>statements, val)
* * When there are one or more parameters: element.evaluate(([arg1, arg2]=>statements, [val1, val2])
* @param func
* @param args
* @param isolated default true; whether to run in isolated context; only valid for patchwright and camoufox
*/
evaluate(func: Function | string, args?: any[]): Promise<any>;
/**
* The method adds a function called `name` on the page's `window` object.
* When called, the function executes `callbackFunction` in node.js and
* returns a `Promise` which resolves to the return value of `callbackFunction`.
* * Reminder: It is not recommended to use this function because it is easy to be detected !!!
* @param name Name of the function on the window object
* @param callbackFunction Callback function which will be called in node.js context
*/
exposeFunction(name: string, callbackFunction: Function): Promise<void>;
/**
* @returns the first element matching the given CSS selector or XPath
* @param selectorOrXpath CSS selector or XPath; if this parameter is an array, each selectorOrXpath in the array will be tried until elements are selected
* @param iframeOptions default [], options to select decendant frame
* @param iframeType default "child", "descendant" is valid only if selectorOrXpath is string , iframeOptions.length is 1 and iframeOptions[0].src is string or RegExp
* @example
* * findElement("body", [{src: iframe.src}], "descendant"): to get the body element of a descendant iframe with src
*/
findElement(selectorOrXpath: string | string[], iframeOptions?: IframeOption[], iframeType?: "child" | "descendant"): Promise<LsdElement | null>;
/**
* @returns elements matching the given CSS selector or XPath
* @param selectorOrXpath CSS selector or XPath; if this parameter is an array, each selectorOrXpath in the array will be tried until elements are selected
* @param iframeOptions default [], options to select decendant frame
*/
findElements(selectorOrXpath: string | string[], iframeOptions?: IframeOption[]): Promise<LsdElement[]>;
/**
* Free a busy page. All request and response interceptions will be cleared.
*/
free(): Promise<boolean>;
/**
* @returns whether the element has the specified attribute or not
* @param attributeName
*/
goto(url: string, options?: GotoOptions): Promise<boolean>;
id(): string;
isFree(): boolean;
/**
* valid only in CheerioPage
* @param html
* @param isHtml default true
*/
load(html: string, isHtml?: boolean): boolean;
localStroage(): Promise<LocalStorageOrigin[]>;
mainFrame(): AllFrame;
maximizeViewport(): Promise<boolean>;
mouseClick(x: number, y: number, options?: PageMouseClickOptions): Promise<boolean>;
mouseDown(): Promise<boolean>;
mouseMove(x: number, y: number): Promise<boolean>;
mouseUp(): Promise<boolean>;
mouseWheel(deltaX?: number, delterY?: number): Promise<boolean>;
pageHeight(): Promise<number>;
pageInfo(): PageInfo;
pageWidth(): Promise<number>;
pdf(options?: PDFOptions): Promise<Buffer>;
reload(): Promise<boolean>;
screenshot(options?: ScreenshotOptions): Promise<Buffer>;
scrollBy(x: number, y: number): Promise<boolean>;
scrollTo(x: number, y: number): Promise<boolean>;
/**
*
* Send a CDP message over the current(not detached) or new CDP session
* @param method protocol method name
* @param params default null(ignored), method parameters
* @param detach default true, whether to detach the CDPSession from target
*/
sendCDPMessage(method: string, params?: object | null, detach?: boolean): Promise<any>;
/**
* set new value of closeWhenFree, refer to closeWhenFree
* * valid only in browser page
* @param closeWhenFree
*/
setCloseWhenFree(closeWhenFree: boolean): boolean;
setCookies(cookies: CookieItem[]): Promise<boolean>;
setExtraHTTPHeaders(headers: Record<string, string>): Promise<boolean>;
/**
* set localStorage on the current web page(page.url())
* @param localStorageItems
*/
setLocalStroage(localStorageItems: LocalStorageItem[]): Promise<boolean>;
setPageInfo(pageInfo: UpdatablePageInfo): boolean;
/**
* Intercept requests that meet the conditions(requestMatch) to perform an action(action and fulfill).
* @param options
*/
setRequestInterception(options: RequestInterceptionOption | RequestInterceptionOption[]): Promise<boolean>;
/**
* Intercept responses that meet the conditions(requestMatch and responseMatch) to perform actions(cacheArray and handler )
* @param options
*/
setResponseInterception(options: ResponseInterceptionOption | ResponseInterceptionOption[]): Promise<boolean>;
/**
* Shortcut for LsdPage.browserContext().setStateData(stateData)
* @param stateData
*/
setStateData(stateData: BrowserStateData): Promise<boolean>;
/**
* valid only in puppeteer
* @param userAgent
*/
setUserAgent(userAgent: string): Promise<boolean>;
setViewportSize(viewPortSize: ViewportSize): Promise<boolean>;
stateData(): Promise<BrowserStateData>;
status(): PageStatus;
title(): Promise<string>;
url(): string;
/**
* start to use this free page
*/
use(): boolean;
/**
*
* @param selector CSS selector, not XPath
* @param options
*/
waitForElement(selector: string, options?: WaitElementOptions): Promise<boolean>;
/**
*
* @param options
*/
waitForNavigation(options: WaitNavigationOptions): Promise<boolean>;
/**
* obj=window?.[key1]...?.[keyn]
* @return obj ? JSON.stringify(obj) : ""
* @param keys
*/
windowMember(keys: string[]): Promise<string>;
_origPage(): AllPage;
}
interface LsdBrowserContext extends EventEmitter {
/**
* Get the LsdApiContext associated with this LsdBrowserContext
* * only vaild in playwright
*/
apiContext(): LsdApiContext;
browser(): LsdBrowser;
/**
* close this BrowserContext
* * For BrowserContext that cannot be closed directly, it is only marked as closed; it will be closed when browser is closed.
* * refer to " Error: Non-incognito profiles cannot be closed" in puppeteer
*/
close(): Promise<boolean>;
/**
* close pages that are free more than maxPageFreeSeconds if maxPageFreeSeconds > 0
* * but the last page in the browserContext will not be closed
* @default 0 the default maxPageFreeSeconds of the browserContext will be used
*/
closeFreePages(maxPageFreeSeconds?: number): Promise<boolean>;
/**
* doest this browser meet browserContextRequirements (incognitos ignored in browser)?
* @param browserContextRequirements
*/
doesMeetBrowserContextRequirements(browserContextRequirements: BrowserContextRequirements): boolean;
/**
* Free a busy LsdBrowserContext.
* @param clearStateData default false
*/
free(clearStateData?: boolean): boolean;
/**
* get a free page from current pages or by creating a new page
*/
getPage(always?: boolean): Promise<LsdPage | null>;
/**
* whether can get a number of free page(s)
* * refer to getPage()
* @param pageNum default 1, the number of free pages
*/
hasFreePage(pageNum?: number): boolean;
id(): string;
isFree(): boolean;
isIncognito(): boolean;
creationMethod(): BrowserContextCreationMethod;
page(pageIdx: number): LsdPage | null;
pages(): LsdPage[];
proxy(): ProxyInController | null;
setStateData(stateData: BrowserStateData): Promise<boolean>;
status(): BrowserContextStatus;
/**
* start to use this LsdBrowserContext
*/
use(): boolean;
_origBrowserContext(): AllBrowserContext;
}
interface LsdBrowser extends EventEmitter {
newBrowserContext(options?: LsdBrowserContextOptions): Promise<LsdBrowserContext | null>;
/**
* 1. launched: close all browserContexts and this browser
* 2. connected:
* * in puppeteer: close all browserContexts and this browser???
* * in playwright: only browserContexts created by newContext will be closed, browser is disconnected and browser will not be closed
*/
close(): Promise<boolean>;
browserContexts(): LsdBrowserContext[];
browserControllerType(): BrowserControllerType;
browserCreationMethod(): BrowserCreationMethod;
browserType(): LsdBrowserType;
createTime(): number;
/**
* doest this browser meet browserContextRequirements (incognitos ignored in browser)?
* @param browserContextRequirements
*/
doesMeetBrowserContextRequirements(browserContextRequirements: BrowserContextRequirements): boolean;
/**
* @returns
* 1. launched: actual executable path
* 2. connected: exectuablePath in LsdConnectOptions, default ""(unkown)
*/
executablePath(): string;
/**
* get a free BrowserContext from current free browserContexts or new browserContext
*/
id(): string;
isConnected(): boolean;
isHeadless(): boolean;
options(): LsdLaunchOptions | LsdConnectOptions;
/**
* * puppeteer: return pid of connected or launched browser
* * playwright: return pid of connected browser that is launched manually or using launchServer, or else return 0
*/
pid(): number;
/**
* get the cpu utility(%) and memory usage(MB) of browser processes if pid is greater than 0 (refer to pid())
*/
pidUsage(): Promise<{
cpu: number;
memory: number;
}>;
proxy(): ProxyInController | null;
version(): Promise<string>;
_origBrowser(): AllBrowser;
}
interface LsdBrowserController$1 {
/**
* launch a new browser using related browser controller
* @param browserControllerType
* @param browserType
* @param options
*/
launch(browserControllerType: BrowserControllerType, browserType: LsdBrowserType, options?: LsdLaunchOptions): Promise<LsdBrowser>;
/**
* connect to the current browser using related browser controller
* @param browserControllerType
* @param browserType
* @param options
*/
connect(browserControllerType: BrowserControllerType, browserType: LsdBrowserType, options?: LsdConnectOptions): Promise<LsdBrowser>;
/**
* use special plugin, such as playwrightExtra.chromium.use(StealthPlugin()
** import puppeteerExtra from "puppeteer-extra";
** import * as playwrightExtra from "playwright-extra";
** import StealthPlugin from "puppeteer-extra-plugin-stealth";
* @param browserControllerType
* @param browserType
* @param plugin
*/
setBrowserPlugin(browserControllerType: BrowserControllerType, browserType: LsdBrowserType, plugin: any): boolean;
/**
* Create a new LsdApiContext, valid in playwright;
*/
newApiContext(options?: LsdApiContextOptions): Promise<LsdApiContext>;
}
declare function setControllerLogFun(logFun: LogFunction): boolean;
declare class PlaywrightBrowser extends EventEmitter implements LsdBrowser {
#private;
static doesSupport(browserType: LsdBrowserType): boolean;
constructor(browser: Browser, browserType: LsdBrowserType, browserCreateMethod: BrowserCreationMethod, options: LsdLaunchOptions | LsdConnectOptions, browserIdx?: number, pid?: number);
newBrowserContext(options?: LsdBrowserContextOptions): Promise<LsdBrowserContext | null>;
close(): Promise<boolean>;
browserContexts(): LsdBrowserContext[];
browserControllerType(): BrowserControllerType;
browserCreationMethod(): BrowserCreationMethod;
browserType(): LsdBrowserType;
createTime(): number;
doesMeetBrowserContextRequirements(browserContextRequirements: BrowserContextRequirements): boolean;
executablePath(): string;
id(): string;
isConnected(): boolean;
isHeadless(): boolean;
options(): LsdLaunchOptions | LsdConnectOptions;
pid(): number;
pidUsage(): Promise<{
cpu: number;
memory: number;
}>;
proxy(): ProxyInController | null;
version(): Promise<string>;
_origBrowser(): AllBrowser;
}
declare class PlaywrightBrowserContext extends EventEmitter implements LsdBrowserContext {
#private;
constructor(lsdBrowser: LsdBrowser, browserContext: BrowserContext, browserContextCreationMethod: BrowserContextCreationMethod, incognito?: boolean, proxy?: ProxyInController | null, browserIdx?: number, browserContextIdx?: number, maxPagesPerBrowserContext?: number, maxPageFreeSeconds?: number, maxViewportOfNewPage?: boolean);
apiContext(): LsdApiContext;
browser(): LsdBrowser;
close(): Promise<boolean>;
closeFreePages(maxPageFreeSeconds?: number): Promise<boolean>;
creationMethod(): BrowserContextCreationMethod;
doesMeetBrowserContextRequirements(browserContextRequirements: BrowserContextRequirements): boolean;
getPage(always?: boolean): Promise<LsdPage | null>;
free(clearStateData?: boolean): boolean;
hasFreePage(pageNum?: number): boolean;
id(): string;
isFree(): boolean;
isIncognito(): boolean;
page(pageIdx: number): LsdPage | null;
pages(): LsdPage[];
proxy(): ProxyInController | null;
setStateData(stateData: BrowserStateData): Promise<boolean>;
status(): BrowserContextStatus;
use(): boolean;
_origBrowserContext(): AllBrowserContext;
}
declare class PlaywrightPage extends EventEmitter implements LsdPage {
#private;
constructor(browserContext: LsdBrowserContext, page: Page, pageInfo?: PageInfo);
addPreloadScript(scriptOrFunc: string | Function, arg?: Serializable$1): Promise<boolean>;
addScriptTag(options: FrameAddScriptTagOptions): Promise<AllElementHandle>;
apiContext(): LsdApiContext;
bringToFront(): Promise<boolean>;
browserContext(): LsdBrowserContext;
clearCookies(): Promise<boolean>;
clearLocalStorage(): Promise<boolean>;
clearRequestInterceptions(): Promise<boolean>;
clearResponseInterceptions(): Promise<boolean>;
clearStateData(): Promise<boolean>;
close(): Promise<boolean>;
closeWhenFree(): boolean;
content(iframeOptions?: IframeOption[]): Promise<string>;
cookies(): Promise<CookieItem[]>;
documentHeight(): Promise<number>;
evaluate(func: Function | string, args?: any[]): Promise<any>;
exposeFunction(name: string, callbackFunction: Function): Promise<void>;
findElement(selectorOrXpath: string | string[], iframeOptions?: IframeOption[]): Promise<LsdElement | null>;
findElements(selectorOrXpath: string | string[], iframeOptions?: IframeOption[]): Promise<LsdElement[]>;
free(): Promise<boolean>;
goto(url: string, options?: GotoOptions | undefined): Promise<boolean>;
id(): string;
isFree(): boolean;
localStroage(): Promise<LocalStorageOrigin[]>;
load(): boolean;
mainFrame(): AllFrame;
maximizeViewport(): Promise<boolean>;
mouseClick(x: number, y: number, options?: PageMouseClickOptions): Promise<boolean>;
mouseDown(): Promise<boolean>;
mouseMove(x: number, y: number): Promise<boolean>;
mouseUp(): Promise<boolean>;
mouseWheel(deltaX?: number, deltaY?: number): Promise<boolean>;
pageHeight(): Promise<number>;
pageInfo(): PageInfo;
pageWidth(): Promise<number>;
pdf(options?: PDFOptions | undefined): Promise<Buffer>;
reload(): Promise<boolean>;
screenshot(options?: ScreenshotOptions): Promise<Buffer>;
scrollBy(x: number, y: number): Promise<boolean>;
scrollTo(x: number, y: number): Promise<boolean>;
sendCDPMessage(method: string, params?: object | null, detach?: boolean): Promise<any>;
setCloseWhenFree(closeWhenFree: boolean): boolean;
setCookies(cookies: CookieItem[]): Promise<boolean>;
setExtraHTTPHeaders(headers: Record<string, string>): Promise<boolean>;
setLocalStroage(localStorageItems: LocalStorageItem[]): Promise<boolean>;
setPageInfo(pageInfo: UpdatablePageInfo): boolean;
setRequestInterception(options: RequestInterceptionOption | RequestInterceptionOption[]): Promise<boolean>;
setResponseInterception(options: ResponseInterceptionOption | ResponseInterceptionOption[]): Promise<boolean>;
setStateData(stateData: BrowserStateData): Promise<boolean>;
setUserAgent(userAgent: string): Promise<boolean>;
setViewportSize(viewPortSize: ViewportSize): Promise<boolean>;
stateData(): Promise<BrowserStateData>;
status(): PageStatus;
title(): Promise<string>;
url(): string;
use(): boolean;
waitForElement(selector: string, options?: WaitElementOpt