@crawlee/puppeteer
Version:
The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.
83 lines • 4.08 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.PuppeteerLauncher = void 0;
exports.launchPuppeteer = launchPuppeteer;
const tslib_1 = require("tslib");
const browser_1 = require("@crawlee/browser");
const browser_pool_1 = require("@crawlee/browser-pool");
const ow_1 = tslib_1.__importDefault(require("ow"));
/**
* `PuppeteerLauncher` is based on the `BrowserLauncher`. It launches `puppeteer` browser instance.
* @ignore
*/
class PuppeteerLauncher extends browser_1.BrowserLauncher {
/**
* All `PuppeteerLauncher` parameters are passed via an launchContext object.
*/
constructor(launchContext = {}, config = browser_1.Configuration.getGlobalConfig()) {
(0, ow_1.default)(launchContext, 'PuppeteerLauncher', ow_1.default.object.exactShape(PuppeteerLauncher.optionsShape));
const { launcher = browser_1.BrowserLauncher.requireLauncherOrThrow('puppeteer', 'apify/actor-node-puppeteer-chrome'), ...browserLauncherOptions } = launchContext;
super({
...browserLauncherOptions,
launcher,
}, config);
Object.defineProperty(this, "config", {
enumerable: true,
configurable: true,
writable: true,
value: config
});
this.Plugin = browser_pool_1.PuppeteerPlugin;
}
_getDefaultHeadlessOption() {
const headless = super._getDefaultHeadlessOption();
return headless ? 'new' : headless;
}
}
exports.PuppeteerLauncher = PuppeteerLauncher;
Object.defineProperty(PuppeteerLauncher, "optionsShape", {
enumerable: true,
configurable: true,
writable: true,
value: {
...browser_1.BrowserLauncher.optionsShape,
launcher: ow_1.default.optional.object,
}
});
/**
* Launches headless Chrome using Puppeteer pre-configured to work within the Apify platform.
* The function has the same argument and the return value as `puppeteer.launch()`.
* See [Puppeteer documentation](https://pptr.dev/api/puppeteer.launchoptions) for more details.
*
* The `launchPuppeteer()` function alters the following Puppeteer options:
*
* - Passes the setting from the `CRAWLEE_HEADLESS` environment variable to the `headless` option,
* unless it was already defined by the caller or `CRAWLEE_XVFB` environment variable is set to `1`.
* Note that Apify Actor cloud platform automatically sets `CRAWLEE_HEADLESS=1` to all running actors.
* - Takes the `proxyUrl` option, validates it and adds it to `args` as `--proxy-server=XXX`.
* The proxy URL must define a port number and have one of the following schemes: `http://`,
* `https://`, `socks4://` or `socks5://`.
* If the proxy is HTTP (i.e. has the `http://` scheme) and contains username or password,
* the `launchPuppeteer` functions sets up an anonymous proxy HTTP
* to make the proxy work with headless Chrome. For more information, read the
* [blog post about proxy-chain library](https://blog.apify.com/how-to-make-headless-chrome-and-puppeteer-use-a-proxy-server-with-authentication-249a21a79212).
*
* To use this function, you need to have the [puppeteer](https://www.npmjs.com/package/puppeteer)
* NPM package installed in your project.
* When running on the Apify cloud, you can achieve that simply
* by using the `apify/actor-node-chrome` base Docker image for your actor - see
* [Apify Actor documentation](https://docs.apify.com/actor/build#base-images)
* for details.
*
* @param [launchContext]
* All `PuppeteerLauncher` parameters are passed via an launchContext object.
* If you want to pass custom `puppeteer.launch(options)` options you can use the `PuppeteerLaunchContext.launchOptions` property.
* @param [config]
* @returns
* Promise that resolves to Puppeteer's `Browser` instance.
*/
async function launchPuppeteer(launchContext, config = browser_1.Configuration.getGlobalConfig()) {
const puppeteerLauncher = new PuppeteerLauncher(launchContext, config);
return puppeteerLauncher.launch();
}
//# sourceMappingURL=puppeteer-launcher.js.map