@crawlee/playwright
Version:
The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.
141 lines • 7.75 kB
TypeScript
import type { BrowserLaunchContext } from '@crawlee/browser';
import { BrowserLauncher, Configuration } from '@crawlee/browser';
import { PlaywrightPlugin } from '@crawlee/browser-pool';
// @ts-ignore optional peer dependency or compatibility with es2022
import type { Browser, BrowserType, LaunchOptions } from 'playwright';
/**
* Apify extends the launch options of Playwright.
* You can use any of the Playwright compatible
* [`LaunchOptions`](https://playwright.dev/docs/api/class-browsertype#browsertypelaunchoptions)
* options by providing the `launchOptions` property.
*
* **Example:**
* ```js
* // launch a headless Chrome (not Chromium)
* const launchContext = {
* // Apify helpers
* useChrome: true,
* proxyUrl: 'http://user:password@some.proxy.com'
* // Native Playwright options
* launchOptions: {
* headless: true,
* args: ['--some-flag'],
* }
* }
* ```
*/
export interface PlaywrightLaunchContext extends BrowserLaunchContext<LaunchOptions, BrowserType> {
/**
* `browserType.launch` [options](https://playwright.dev/docs/api/class-browsertype#browser-type-launch) or
* `browserType.launchContextOptions` [options](https://playwright.dev/docs/api/class-browsertype#browser-type-launch-persistent-context)
*/
launchOptions?: LaunchOptions & Parameters<BrowserType['launchPersistentContext']>[1];
/**
* URL to a HTTP proxy server. It must define the port number,
* and it may also contain proxy username and password.
*
* Example: `http://bob:pass123@proxy.example.com:1234`.
*/
proxyUrl?: string;
/**
* If `true` and `executablePath` is not set,
* Playwright will launch full Google Chrome browser available on the machine
* rather than the bundled Chromium. The path to Chrome executable
* is taken from the `CRAWLEE_CHROME_EXECUTABLE_PATH` environment variable if provided,
* or defaults to the typical Google Chrome executable location specific for the operating system.
* By default, this option is `false`.
* @default false
*/
useChrome?: boolean;
/**
* With this option selected, all pages will be opened in a new incognito browser context.
* This means they will not share cookies nor cache and their resources will not be throttled by one another.
* @default false
*/
useIncognitoPages?: boolean;
/**
* @experimental
* Like `useIncognitoPages`, but for persistent contexts, so cache is used for faster loading.
* Works best with Firefox. Unstable on Chromium.
*/
experimentalContainers?: boolean;
/**
* Sets the [User Data Directory](https://chromium.googlesource.com/chromium/src/+/master/docs/user_data_dir.md) path.
* The user data directory contains profile data such as history, bookmarks, and cookies, as well as other per-installation local state.
* If not specified, a temporary directory is used instead.
*/
userDataDir?: string;
/**
* By default this function uses `require("playwright").chromium`.
* If you want to use a different browser you can pass it by this property as e.g. `require("playwright").firefox`
*/
launcher?: BrowserType;
}
/**
* `PlaywrightLauncher` is based on the `BrowserLauncher`. It launches `playwright` browser instance.
* @ignore
*/
export declare class PlaywrightLauncher extends BrowserLauncher<PlaywrightPlugin> {
readonly config: Configuration;
protected static optionsShape: {
// @ts-ignore optional peer dependency or compatibility with es2022
launcher: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
launchContextOptions: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
proxyUrl: import("ow").StringPredicate & import("ow").BasePredicate<string | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
useChrome: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
useIncognitoPages: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
browserPerProxy: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
experimentalContainers: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
userDataDir: import("ow").StringPredicate & import("ow").BasePredicate<string | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
launchOptions: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
userAgent: import("ow").StringPredicate & import("ow").BasePredicate<string | undefined>;
};
/**
* All `PlaywrightLauncher` parameters are passed via this launchContext object.
*/
constructor(launchContext?: PlaywrightLaunchContext, config?: Configuration);
}
/**
* Launches headless browsers using Playwright pre-configured to work within the Apify platform.
* The function has the same return value as `browserType.launch()`.
* See [Playwright documentation](https://playwright.dev/docs/api/class-browsertype) for more details.
*
* The `launchPlaywright()` function alters the following Playwright options:
*
* - Passes the setting from the `CRAWLEE_HEADLESS` environment variable to the `headless` option,
* unless it was already defined by the caller or `CRAWLEE_XVFB` environment variable is set to `1`.
* Note that Apify Actor cloud platform automatically sets `CRAWLEE_HEADLESS=1` to all running actors.
* - Takes the `proxyUrl` option, validates it and adds it to `launchOptions` in a proper format.
* The proxy URL must define a port number and have one of the following schemes: `http://`,
* `https://`, `socks4://` or `socks5://`.
* If the proxy is HTTP (i.e. has the `http://` scheme) and contains username or password,
* the `launchPlaywright` functions sets up an anonymous proxy HTTP
* to make the proxy work with headless Chrome. For more information, read the
* [blog post about proxy-chain library](https://blog.apify.com/how-to-make-headless-chrome-and-puppeteer-use-a-proxy-server-with-authentication-249a21a79212).
*
* To use this function, you need to have the [Playwright](https://www.npmjs.com/package/playwright)
* NPM package installed in your project.
* When running on the Apify Platform, you can achieve that simply
* by using the `apify/actor-node-playwright-*` base Docker image for your actor - see
* [Apify Actor documentation](https://docs.apify.com/actor/build#base-images)
* for details.
*
* @param [launchContext]
* Optional settings passed to `browserType.launch()`. In addition to
* [Playwright's options](https://playwright.dev/docs/api/class-browsertype?_highlight=launch#browsertypelaunchoptions)
* the object may contain our own {@apilink PlaywrightLaunchContext} that enable additional features.
* @param [config]
* @returns
* Promise that resolves to Playwright's `Browser` instance.
*/
export declare function launchPlaywright(launchContext?: PlaywrightLaunchContext, config?: Configuration): Promise<Browser>;
//# sourceMappingURL=playwright-launcher.d.ts.map