@crawlee/browser
Version:
The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.
122 lines • 6 kB
TypeScript
import { Configuration } from '@crawlee/basic';
import type { BrowserPlugin, BrowserPluginOptions } from '@crawlee/browser-pool';
import type { Constructor, Dictionary } from '@crawlee/utils';
export interface BrowserLaunchContext<TOptions, Launcher> extends BrowserPluginOptions<TOptions> {
/**
* URL to an HTTP proxy server. It must define the port number,
* and it may also contain proxy username and password.
*
* @example
* `http://bob:pass123@proxy.example.com:1234`.
*/
proxyUrl?: string;
/**
* If `true` and the `executablePath` option of {@link BrowserLaunchContext.launchOptions|`launchOptions`} is not set,
* the launcher will launch full Google Chrome browser available on the machine
* rather than the bundled Chromium. The path to Chrome executable
* is taken from the `CRAWLEE_CHROME_EXECUTABLE_PATH` environment variable if provided,
* or defaults to the typical Google Chrome executable location specific for the operating system.
* @default false
*/
useChrome?: boolean;
/**
* If set to `true`, the crawler respects the proxy url generated for the given request.
* This aligns the browser-based crawlers with the `HttpCrawler`.
*
* Might cause performance issues, as Crawlee might launch too many browser instances.
*/
browserPerProxy?: boolean;
/**
* With this option selected, all pages will be opened in a new incognito browser context.
* This means they will not share cookies nor cache and their resources will not be throttled by one another.
* @default false
*/
useIncognitoPages?: boolean;
/**
* @experimental
* Like `useIncognitoPages`, but for persistent contexts, so cache is used for faster loading.
* Works best with Firefox. Unstable on Chromium.
*/
experimentalContainers?: boolean;
/**
* Sets the [User Data Directory](https://chromium.googlesource.com/chromium/src/+/master/docs/user_data_dir.md) path.
* The user data directory contains profile data such as history, bookmarks, and cookies, as well as other per-installation local state.
* If not specified, a temporary directory is used instead.
*/
userDataDir?: string;
/**
* The `User-Agent` HTTP header used by the browser.
* If not provided, the function sets `User-Agent` to a reasonable default
* to reduce the chance of detection of the crawler.
*/
userAgent?: string;
/**
* The type of browser to be launched.
* By default, `chromium` is used. Other browsers like `webkit` or `firefox` can be used.
*
* @example
* ```ts
* // import the browser from the library first
// @ts-ignore optional peer dependency or compatibility with es2022
* import { firefox } from 'playwright';
* ```
*
* For more details, check out the [example](https://crawlee.dev/js/docs/examples/playwright-crawler-firefox).
*/
launcher?: Launcher;
}
/**
* Abstract class for creating browser launchers, such as `PlaywrightLauncher` and `PuppeteerLauncher`.
* @ignore
*/
export declare abstract class BrowserLauncher<Plugin extends BrowserPlugin, Launcher = Plugin['library'], T extends Constructor<Plugin> = Constructor<Plugin>, LaunchOptions extends Dictionary<any> | undefined = Partial<Parameters<Plugin['launch']>[0]>, LaunchResult extends ReturnType<Plugin['launch']> = ReturnType<Plugin['launch']>> {
readonly config: Configuration;
launcher: Launcher;
proxyUrl?: string;
useChrome?: boolean;
launchOptions: Dictionary;
otherLaunchContextProps: Dictionary;
Plugin: T;
userAgent?: string;
protected static optionsShape: {
// @ts-ignore optional peer dependency or compatibility with es2022
proxyUrl: import("ow").StringPredicate & import("ow").BasePredicate<string | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
useChrome: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
useIncognitoPages: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
browserPerProxy: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
experimentalContainers: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
userDataDir: import("ow").StringPredicate & import("ow").BasePredicate<string | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
launchOptions: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
// @ts-ignore optional peer dependency or compatibility with es2022
userAgent: import("ow").StringPredicate & import("ow").BasePredicate<string | undefined>;
};
static requireLauncherOrThrow<T>(launcher: string, apifyImageName: string): T;
/**
* All `BrowserLauncher` parameters are passed via an launchContext object.
*/
constructor(launchContext: BrowserLaunchContext<LaunchOptions, Launcher>, config?: Configuration);
/**
* @ignore
*/
createBrowserPlugin(): Plugin;
/**
* Launches a browser instance based on the plugin.
* @returns Browser instance.
*/
launch(): LaunchResult;
createLaunchOptions(): Dictionary;
protected _getDefaultHeadlessOption(): boolean;
protected _getChromeExecutablePath(): string;
/**
* Gets a typical path to Chrome executable, depending on the current operating system.
*/
protected _getTypicalChromeExecutablePath(): string;
protected _validateProxyUrlProtocol(proxyUrl?: string): void;
}
//# sourceMappingURL=browser-launcher.d.ts.map