UNPKG

@crawlee/puppeteer

Version:

The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.

129 lines 6.77 kB
import type { BrowserLaunchContext } from '@crawlee/browser'; import { BrowserLauncher, Configuration } from '@crawlee/browser'; import { PuppeteerPlugin } from '@crawlee/browser-pool'; // @ts-ignore optional peer dependency or compatibility with es2022 import type { Browser } from 'puppeteer'; /** * Apify extends the launch options of Puppeteer. * You can use any of the Puppeteer compatible * [`LaunchOptions`](https://pptr.dev/api/puppeteer.launchoptions) * options by providing the `launchOptions` property. * * **Example:** * ```js * // launch a headless Chrome (not Chromium) * const launchContext = { * // Apify helpers * useChrome: true, * proxyUrl: 'http://user:password@some.proxy.com' * // Native Puppeteer options * launchOptions: { * headless: true, * args: ['--some-flag'], * } * } * ``` */ export interface PuppeteerLaunchContext extends BrowserLaunchContext<PuppeteerPlugin['launchOptions'], unknown> { /** * `puppeteer.launch` [options](https://pptr.dev/api/puppeteer.launchoptions) */ launchOptions?: PuppeteerPlugin['launchOptions']; /** * URL to a HTTP proxy server. It must define the port number, * and it may also contain proxy username and password. * * Example: `http://bob:pass123@proxy.example.com:1234`. */ proxyUrl?: string; /** * If `true` and `executablePath` is not set, * Puppeteer will launch full Google Chrome browser available on the machine * rather than the bundled Chromium. The path to Chrome executable * is taken from the `CRAWLEE_CHROME_EXECUTABLE_PATH` environment variable if provided, * or defaults to the typical Google Chrome executable location specific for the operating system. * By default, this option is `false`. * @default false */ useChrome?: boolean; /** * Already required module (`Object`). This enables usage of various Puppeteer * wrappers such as `puppeteer-extra`. * * Take caution, because it can cause all kinds of unexpected errors and weird behavior. * Crawlee is not tested with any other library besides `puppeteer` itself. */ launcher?: unknown; /** * With this option selected, all pages will be opened in a new incognito browser context. * This means they will not share cookies nor cache and their resources will not be throttled by one another. * @default false */ useIncognitoPages?: boolean; } /** * `PuppeteerLauncher` is based on the `BrowserLauncher`. It launches `puppeteer` browser instance. * @ignore */ export declare class PuppeteerLauncher extends BrowserLauncher<PuppeteerPlugin, unknown> { readonly config: Configuration; protected static optionsShape: { // @ts-ignore optional peer dependency or compatibility with es2022 launcher: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>; // @ts-ignore optional peer dependency or compatibility with es2022 proxyUrl: import("ow").StringPredicate & import("ow").BasePredicate<string | undefined>; // @ts-ignore optional peer dependency or compatibility with es2022 useChrome: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>; // @ts-ignore optional peer dependency or compatibility with es2022 useIncognitoPages: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>; // @ts-ignore optional peer dependency or compatibility with es2022 browserPerProxy: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>; // @ts-ignore optional peer dependency or compatibility with es2022 experimentalContainers: import("ow").BooleanPredicate & import("ow").BasePredicate<boolean | undefined>; // @ts-ignore optional peer dependency or compatibility with es2022 userDataDir: import("ow").StringPredicate & import("ow").BasePredicate<string | undefined>; // @ts-ignore optional peer dependency or compatibility with es2022 launchOptions: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>; // @ts-ignore optional peer dependency or compatibility with es2022 userAgent: import("ow").StringPredicate & import("ow").BasePredicate<string | undefined>; }; /** * All `PuppeteerLauncher` parameters are passed via an launchContext object. */ constructor(launchContext?: PuppeteerLaunchContext, config?: Configuration); protected _getDefaultHeadlessOption(): boolean; } /** * Launches headless Chrome using Puppeteer pre-configured to work within the Apify platform. * The function has the same argument and the return value as `puppeteer.launch()`. * See [Puppeteer documentation](https://pptr.dev/api/puppeteer.launchoptions) for more details. * * The `launchPuppeteer()` function alters the following Puppeteer options: * * - Passes the setting from the `CRAWLEE_HEADLESS` environment variable to the `headless` option, * unless it was already defined by the caller or `CRAWLEE_XVFB` environment variable is set to `1`. * Note that Apify Actor cloud platform automatically sets `CRAWLEE_HEADLESS=1` to all running actors. * - Takes the `proxyUrl` option, validates it and adds it to `args` as `--proxy-server=XXX`. * The proxy URL must define a port number and have one of the following schemes: `http://`, * `https://`, `socks4://` or `socks5://`. * If the proxy is HTTP (i.e. has the `http://` scheme) and contains username or password, * the `launchPuppeteer` functions sets up an anonymous proxy HTTP * to make the proxy work with headless Chrome. For more information, read the * [blog post about proxy-chain library](https://blog.apify.com/how-to-make-headless-chrome-and-puppeteer-use-a-proxy-server-with-authentication-249a21a79212). * * To use this function, you need to have the [puppeteer](https://www.npmjs.com/package/puppeteer) * NPM package installed in your project. * When running on the Apify cloud, you can achieve that simply * by using the `apify/actor-node-chrome` base Docker image for your actor - see * [Apify Actor documentation](https://docs.apify.com/actor/build#base-images) * for details. * * @param [launchContext] * All `PuppeteerLauncher` parameters are passed via an launchContext object. * If you want to pass custom `puppeteer.launch(options)` options you can use the `PuppeteerLaunchContext.launchOptions` property. * @param [config] * @returns * Promise that resolves to Puppeteer's `Browser` instance. */ export declare function launchPuppeteer(launchContext?: PuppeteerLaunchContext, config?: Configuration): Promise<Browser>; //# sourceMappingURL=puppeteer-launcher.d.ts.map