UNPKG

scrapper-tools

Version:

Its in development but I use it in all my web automation project.

221 lines 10.3 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); const puppeteer_core_1 = __importDefault(require("puppeteer-core")); const chrome_paths_1 = __importDefault(require("chrome-paths")); const async_lock_1 = __importDefault(require("async-lock")); const consoleMessage_1 = __importDefault(require("../consoleMessage")); const pageStealth_1 = __importDefault(require("./pageStealth")); const functionToInject_1 = __importDefault(require("../functionToInject")); let lock = new async_lock_1.default(); let defaultConfig = { browserHandle: null, proxy: null, headless: false, userDataDir: null, windowSize: { width: 595, height: 842 }, blockFonts: false, blockImages: false, blockCSS: false, defaultNavigationTimeout: 30 * 1000, extensions: [], showPageError: false, args: [], hooks: [], }; let config = { default: Object.assign({}, defaultConfig), }; function loadHooks(hooks, name, ...args) { return __awaiter(this, void 0, void 0, function* () { hooks.filter((v) => v.name === name).forEach((v) => __awaiter(this, void 0, void 0, function* () { return yield v.action(...args); })); }); } function browser(instanceName) { return __awaiter(this, void 0, void 0, function* () { return yield lock .acquire("instance_" + instanceName, function () { return __awaiter(this, void 0, void 0, function* () { if (config[instanceName].browserHandle) return config[instanceName].browserHandle; let args = [ `--window-size=${config[instanceName].windowSize.width},${config[instanceName].windowSize.height}`, "--no-sandbox", "--disable-setuid-sandbox", "--disable-web-security", "--disable-features=site-per-process", "--ignore-certificate-errors", "--enable-features=NetworkService", "--allow-running-insecure-content", "--enable-automation", "--disable-background-timer-throttling", "--disable-backgrounding-occluded-windows", "--disable-renderer-backgrounding", ...config[instanceName].args, ]; if (config[instanceName].proxy) { args.push(`--proxy-server=${config[instanceName].proxy}`); } if (config[instanceName].extensions.length > 0) { args.push(`--disable-extensions-except=${config[instanceName].extensions.join(",")}`, `--load-extension=${config[instanceName].extensions.join(",")}`); } let launchOptions = { userDataDir: config[instanceName].userDataDir, headless: config[instanceName].headless, args, ignoreDefaultArgs: ["--enable-automation"], defaultViewport: null, ignoreHTTPSErrors: true, }; launchOptions.executablePath = chrome_paths_1.default.chrome; config[instanceName].browserHandle = yield puppeteer_core_1.default.launch(launchOptions); return config[instanceName].browserHandle; }); }) .catch((err) => console.log("Error on starting new page: Lock Error ->", err)); }); } function makePageFaster(page, instanceName) { return __awaiter(this, void 0, void 0, function* () { let instanceConfig = config[instanceName]; yield loadHooks(instanceConfig["hooks"], "make_page_faster", page); yield page.setDefaultNavigationTimeout(instanceConfig.defaultNavigationTimeout); yield page.setDefaultTimeout(instanceConfig.defaultNavigationTimeout); const session = yield page.target().createCDPSession(); yield page.setBypassCSP(true); yield pageStealth_1.default(page); yield page.addScriptTag({ content: `${functionToInject_1.default.waitForElement} ${functionToInject_1.default.waitForElementToBeRemoved} ${functionToInject_1.default.delay}`, }); if (instanceConfig.showPageError === true) { page.on("error", (err) => { consoleMessage_1.default.error("Error happen at the page: ", err); }); page.on("pageerror", (pageerr) => { consoleMessage_1.default.error("Page Error occurred: ", pageerr); }); } if (instanceConfig.blockCSS || instanceConfig.blockFonts || instanceConfig.blockImages) { yield page.setRequestInterception(true); page.on("request", (request) => { if ((instanceConfig.blockImages && request.resourceType() === "image") || (instanceConfig.blockFonts && request.resourceType() === "font") || (instanceConfig.blockCSS && request.resourceType() === "stylesheet")) { request.abort(); } else { request.continue(); } }); } yield session.send("Page.enable"); yield session.send("Page.setWebLifecycleState", { state: "active", }); return page; }); } exports.default = (instanceName = "default") => { return { init: (useCurrentDefaultConfig = true) => __awaiter(void 0, void 0, void 0, function* () { if (useCurrentDefaultConfig) { config[instanceName] = Object.assign({}, config.default); } else { config[instanceName] = Object.assign({}, defaultConfig); } }), getBrowserHandle: () => __awaiter(void 0, void 0, void 0, function* () { return yield browser(instanceName); }), newPage: () => __awaiter(void 0, void 0, void 0, function* () { consoleMessage_1.default.info("Fast Page", "Launching new page "); let brow = yield browser(instanceName); let page = yield brow.newPage(); yield makePageFaster(page, instanceName); return page; }), closeBrowser: () => __awaiter(void 0, void 0, void 0, function* () { consoleMessage_1.default.info("Fast Page", "Requesting to close browser "); return yield lock .acquire("instance_close_" + instanceName, function () { return __awaiter(this, void 0, void 0, function* () { if (config[instanceName].browserHandle) { let bHandle = yield browser(instanceName); yield bHandle.close(); } config[instanceName].browserHandle = null; return "closed"; }); }) .catch((err) => console.log("Error on closing browser: Lock Error ->", err)); }), setProxy: (value) => { consoleMessage_1.default.info("Fast Page", "Setting proxy to ", value); config[instanceName].proxy = value; }, setShowPageError: (value) => { consoleMessage_1.default.info("Fast Page", "Setting show page error to ", value); config[instanceName].showPageError = value; }, setHeadless: (value = false) => { consoleMessage_1.default.info("Fast Page", "Setting headless to ", value); config[instanceName].headless = value; }, setUserDataDir: (value) => { consoleMessage_1.default.info("Fast Page", "Storing chrome cache in ", value); config[instanceName].userDataDir = value; }, setWindowSizeArg: (value) => { consoleMessage_1.default.info("Fast Page", "Setting window size to ", value); config[instanceName].windowSize = value; }, set2captchaToken: (value) => { consoleMessage_1.default.info("Fast Page", "Setting 2captcha token to ", value); config[instanceName].twoCaptchaToken = value; }, setExtensionsPaths: (value) => { config[instanceName].extensions = value; }, setDefaultNavigationTimeout: (value) => { consoleMessage_1.default.info("Fast Page", "Default navigation timeout", value); config[instanceName].defaultNavigationTimeout = value; }, blockImages: (value = true) => { consoleMessage_1.default.info("Fast Page", "Block Image", value); config[instanceName].blockImages = value; }, blockFonts: (value = true) => { consoleMessage_1.default.info("Fast Page", "Block Font", value); config[instanceName].blockFonts = value; }, blockCSS: (value = true) => { consoleMessage_1.default.info("Fast Page", "Block CSS", value); config[instanceName].blockCSS = value; }, getConfig(instanceName = null) { if (instanceName === null) { return config; } return config[instanceName]; }, addHook(name, action) { config[instanceName].hooks.push({ name, action }); }, addArg(arg) { config[instanceName].args.push(arg); }, }; }; //# sourceMappingURL=index.js.map