scrapper-tools
Version:
Its in development but I use it in all my web automation project.
221 lines • 10.3 kB
JavaScript
;
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const puppeteer_core_1 = __importDefault(require("puppeteer-core"));
const chrome_paths_1 = __importDefault(require("chrome-paths"));
const async_lock_1 = __importDefault(require("async-lock"));
const consoleMessage_1 = __importDefault(require("../consoleMessage"));
const pageStealth_1 = __importDefault(require("./pageStealth"));
const functionToInject_1 = __importDefault(require("../functionToInject"));
let lock = new async_lock_1.default();
let defaultConfig = {
browserHandle: null,
proxy: null,
headless: false,
userDataDir: null,
windowSize: { width: 595, height: 842 },
blockFonts: false,
blockImages: false,
blockCSS: false,
defaultNavigationTimeout: 30 * 1000,
extensions: [],
showPageError: false,
args: [],
hooks: [],
};
let config = {
default: Object.assign({}, defaultConfig),
};
function loadHooks(hooks, name, ...args) {
return __awaiter(this, void 0, void 0, function* () {
hooks.filter((v) => v.name === name).forEach((v) => __awaiter(this, void 0, void 0, function* () { return yield v.action(...args); }));
});
}
function browser(instanceName) {
return __awaiter(this, void 0, void 0, function* () {
return yield lock
.acquire("instance_" + instanceName, function () {
return __awaiter(this, void 0, void 0, function* () {
if (config[instanceName].browserHandle)
return config[instanceName].browserHandle;
let args = [
`--window-size=${config[instanceName].windowSize.width},${config[instanceName].windowSize.height}`,
"--no-sandbox",
"--disable-setuid-sandbox",
"--disable-web-security",
"--disable-features=site-per-process",
"--ignore-certificate-errors",
"--enable-features=NetworkService",
"--allow-running-insecure-content",
"--enable-automation",
"--disable-background-timer-throttling",
"--disable-backgrounding-occluded-windows",
"--disable-renderer-backgrounding",
...config[instanceName].args,
];
if (config[instanceName].proxy) {
args.push(`--proxy-server=${config[instanceName].proxy}`);
}
if (config[instanceName].extensions.length > 0) {
args.push(`--disable-extensions-except=${config[instanceName].extensions.join(",")}`, `--load-extension=${config[instanceName].extensions.join(",")}`);
}
let launchOptions = {
userDataDir: config[instanceName].userDataDir,
headless: config[instanceName].headless,
args,
ignoreDefaultArgs: ["--enable-automation"],
defaultViewport: null,
ignoreHTTPSErrors: true,
};
launchOptions.executablePath = chrome_paths_1.default.chrome;
config[instanceName].browserHandle = yield puppeteer_core_1.default.launch(launchOptions);
return config[instanceName].browserHandle;
});
})
.catch((err) => console.log("Error on starting new page: Lock Error ->", err));
});
}
function makePageFaster(page, instanceName) {
return __awaiter(this, void 0, void 0, function* () {
let instanceConfig = config[instanceName];
yield loadHooks(instanceConfig["hooks"], "make_page_faster", page);
yield page.setDefaultNavigationTimeout(instanceConfig.defaultNavigationTimeout);
yield page.setDefaultTimeout(instanceConfig.defaultNavigationTimeout);
const session = yield page.target().createCDPSession();
yield page.setBypassCSP(true);
yield pageStealth_1.default(page);
yield page.addScriptTag({
content: `${functionToInject_1.default.waitForElement} ${functionToInject_1.default.waitForElementToBeRemoved} ${functionToInject_1.default.delay}`,
});
if (instanceConfig.showPageError === true) {
page.on("error", (err) => {
consoleMessage_1.default.error("Error happen at the page: ", err);
});
page.on("pageerror", (pageerr) => {
consoleMessage_1.default.error("Page Error occurred: ", pageerr);
});
}
if (instanceConfig.blockCSS || instanceConfig.blockFonts || instanceConfig.blockImages) {
yield page.setRequestInterception(true);
page.on("request", (request) => {
if ((instanceConfig.blockImages && request.resourceType() === "image") ||
(instanceConfig.blockFonts && request.resourceType() === "font") ||
(instanceConfig.blockCSS && request.resourceType() === "stylesheet")) {
request.abort();
}
else {
request.continue();
}
});
}
yield session.send("Page.enable");
yield session.send("Page.setWebLifecycleState", {
state: "active",
});
return page;
});
}
exports.default = (instanceName = "default") => {
return {
init: (useCurrentDefaultConfig = true) => __awaiter(void 0, void 0, void 0, function* () {
if (useCurrentDefaultConfig) {
config[instanceName] = Object.assign({}, config.default);
}
else {
config[instanceName] = Object.assign({}, defaultConfig);
}
}),
getBrowserHandle: () => __awaiter(void 0, void 0, void 0, function* () {
return yield browser(instanceName);
}),
newPage: () => __awaiter(void 0, void 0, void 0, function* () {
consoleMessage_1.default.info("Fast Page", "Launching new page ");
let brow = yield browser(instanceName);
let page = yield brow.newPage();
yield makePageFaster(page, instanceName);
return page;
}),
closeBrowser: () => __awaiter(void 0, void 0, void 0, function* () {
consoleMessage_1.default.info("Fast Page", "Requesting to close browser ");
return yield lock
.acquire("instance_close_" + instanceName, function () {
return __awaiter(this, void 0, void 0, function* () {
if (config[instanceName].browserHandle) {
let bHandle = yield browser(instanceName);
yield bHandle.close();
}
config[instanceName].browserHandle = null;
return "closed";
});
})
.catch((err) => console.log("Error on closing browser: Lock Error ->", err));
}),
setProxy: (value) => {
consoleMessage_1.default.info("Fast Page", "Setting proxy to ", value);
config[instanceName].proxy = value;
},
setShowPageError: (value) => {
consoleMessage_1.default.info("Fast Page", "Setting show page error to ", value);
config[instanceName].showPageError = value;
},
setHeadless: (value = false) => {
consoleMessage_1.default.info("Fast Page", "Setting headless to ", value);
config[instanceName].headless = value;
},
setUserDataDir: (value) => {
consoleMessage_1.default.info("Fast Page", "Storing chrome cache in ", value);
config[instanceName].userDataDir = value;
},
setWindowSizeArg: (value) => {
consoleMessage_1.default.info("Fast Page", "Setting window size to ", value);
config[instanceName].windowSize = value;
},
set2captchaToken: (value) => {
consoleMessage_1.default.info("Fast Page", "Setting 2captcha token to ", value);
config[instanceName].twoCaptchaToken = value;
},
setExtensionsPaths: (value) => {
config[instanceName].extensions = value;
},
setDefaultNavigationTimeout: (value) => {
consoleMessage_1.default.info("Fast Page", "Default navigation timeout", value);
config[instanceName].defaultNavigationTimeout = value;
},
blockImages: (value = true) => {
consoleMessage_1.default.info("Fast Page", "Block Image", value);
config[instanceName].blockImages = value;
},
blockFonts: (value = true) => {
consoleMessage_1.default.info("Fast Page", "Block Font", value);
config[instanceName].blockFonts = value;
},
blockCSS: (value = true) => {
consoleMessage_1.default.info("Fast Page", "Block CSS", value);
config[instanceName].blockCSS = value;
},
getConfig(instanceName = null) {
if (instanceName === null) {
return config;
}
return config[instanceName];
},
addHook(name, action) {
config[instanceName].hooks.push({ name, action });
},
addArg(arg) {
config[instanceName].args.push(arg);
},
};
};
//# sourceMappingURL=index.js.map