UNPKG

@lyuboslavlyubenov/se-scraper

Version:

A module using puppeteer to scrape several search engines such as Google, Bing and Duckduckgo

55 lines (44 loc) 1.77 kB
const { Browser } = require('puppeteer-cluster/dist/concurrency/builtInConcurrency'); const debug = require('debug')('se-scraper:CustomConcurrency'); const { timeoutExecute } = require('puppeteer-cluster/dist/util'); const BROWSER_TIMEOUT = 5000; class CustomConcurrency extends Browser { async init() {} async close() {} async workerInstance() { const options = this.options.perBrowserOptions.shift(); debug('Launch puppeteer instance with options=%o', options); let chrome = await this.puppeteer.launch(options); let page; let context; return { jobInstance: async () => { await timeoutExecute(BROWSER_TIMEOUT, (async () => { context = await chrome.createIncognitoBrowserContext(); page = await context.newPage(); })()); return { resources: { page, }, close: async () => { await timeoutExecute(BROWSER_TIMEOUT, context.close()); }, }; }, close: async () => { await chrome.close(); }, repair: async () => { debug('Starting repair'); try { // will probably fail, but just in case the repair was not necessary await chrome.close(); } catch (e) {} // just relaunch as there is only one page per browser chrome = await this.puppeteer.launch(options); }, }; } }; module.exports = CustomConcurrency;