@qualweb/core
Version:
QualWeb evaluator core engine
146 lines • 5.49 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.QualWeb = void 0;
exports.getFileParsedUrls = getFileParsedUrls;
const puppeteer_extra_1 = __importDefault(require("puppeteer-extra"));
const puppeteer_cluster_1 = require("puppeteer-cluster");
const puppeteer_extra_plugin_stealth_1 = __importDefault(require("puppeteer-extra-plugin-stealth"));
const puppeteer_extra_plugin_adblocker_1 = __importDefault(require("puppeteer-extra-plugin-adblocker"));
const fs_1 = require("fs");
require("colors");
const crawler_1 = require("@qualweb/crawler");
const lib_1 = require("./lib");
class QualWeb {
cluster;
pluginManager = new lib_1.PluginManager();
constructor(plugins) {
if (plugins?.stealth) {
puppeteer_extra_1.default.use((0, puppeteer_extra_plugin_stealth_1.default)());
}
if (plugins?.adBlock) {
puppeteer_extra_1.default.use((0, puppeteer_extra_plugin_adblocker_1.default)({ blockTrackersAndAnnoyances: true }));
}
}
async start(clusterOptions, puppeteerOptions) {
this.cluster = await puppeteer_cluster_1.Cluster.launch({
concurrency: puppeteer_cluster_1.Cluster.CONCURRENCY_CONTEXT,
maxConcurrency: clusterOptions?.maxConcurrency ?? 1,
puppeteerOptions,
puppeteer: puppeteer_extra_1.default,
timeout: clusterOptions?.timeout ?? 60 * 1000,
monitor: clusterOptions?.monitor ?? false
});
}
use(plugin) {
this.pluginManager.use(plugin);
return this;
}
async stop() {
await this.cluster?.close();
}
async evaluate(options) {
const urls = await this.checkUrls(options);
if (!options.translate) {
options.translate = 'en';
}
const errorManager = new lib_1.ErrorManager(options.log);
errorManager.handle(this.cluster);
const reports = {};
await this.handlePageEvaluations(reports, options);
this.addUrlsToEvaluate(urls);
if (options.html) {
this.addHtmlCodeToEvaluate(options.html);
}
await this.cluster?.idle();
errorManager.showErrorsIfAny();
return reports;
}
async handlePageEvaluations(reports, options) {
await this.cluster?.task(async ({ page, data: { url, html } }) => {
const qwPage = new lib_1.QualwebPage(this.pluginManager, page, url, html);
const evaluationManager = new lib_1.EvaluationManager(qwPage);
reports[url ?? 'customHtml'] = await evaluationManager.evaluate(options);
});
}
addUrlsToEvaluate(urls) {
urls.forEach((url) => this.cluster?.queue({ url }));
}
addHtmlCodeToEvaluate(html) {
this.cluster?.queue({ html });
}
async crawl(domain, options, viewport, waitUntil) {
const browser = await puppeteer_extra_1.default.launch();
const incognito = await browser.createBrowserContext();
const crawler = new crawler_1.Crawler(incognito, domain, viewport, waitUntil);
await crawler.crawl(options);
const results = crawler.getResults();
await incognito.close();
await browser.close();
return results;
}
async checkUrls(options) {
const urls = [];
if (options.url) {
urls.push(decodeURIComponent(options.url).trim());
}
if (options.urls) {
urls.push(...options.urls.map((url) => decodeURIComponent(url).trim()));
}
if (options.file) {
urls.push(...(await getFileParsedUrls(options.file)));
}
if (options.crawl) {
const viewport = {
width: 0,
height: 0,
isMobile: false,
isLandscape: true
};
if (options.viewport) {
viewport.width = options?.viewport?.resolution?.width ?? 0;
viewport.height = options?.viewport?.resolution?.height ?? 0;
viewport.isMobile = options?.viewport?.mobile ?? false;
viewport.isLandscape = options?.viewport?.landscape ?? true;
}
urls.push(...(await this.crawl(options.crawl, options.crawlOptions, viewport.width + viewport.height !== 0 ? viewport : undefined, options.waitUntil)));
}
if ((options.html === undefined || options.html.trim() === '') && urls.length === 0) {
throw new Error('Invalid input method');
}
return urls;
}
static createPage(page) {
return new lib_1.QualwebPage(new lib_1.PluginManager(), page);
}
}
exports.QualWeb = QualWeb;
async function getFileParsedUrls(file) {
const content = await readFileData(file);
return content
.split('\n')
.map((url) => {
try {
return decodeURIComponent(url).trim();
}
catch (_err) {
return '';
}
})
.filter((url) => url.trim());
}
function readFileData(file) {
return new Promise((resolve, reject) => {
(0, fs_1.readFile)(file, (err, data) => {
if (err) {
reject(err);
}
else {
resolve(data.toString('utf-8'));
}
});
});
}
//# sourceMappingURL=qualweb.js.map