UNPKG

@35iter/spider-core

Version:

使用 puppetter-cluster 的爬虫工具。

86 lines (85 loc) 3.56 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; Object.defineProperty(exports, "__esModule", { value: true }); exports.openURL = exports.destory = exports.launch = exports.setConfig = void 0; const puppeteer_cluster_1 = require("@35iter/puppeteer-cluster"); const check_1 = require("./check"); let cluster = null; let config = { maxConcurrency: 5, logger: console, }; function setConfig(_config = {}) { config = Object.assign(config, _config); } exports.setConfig = setConfig; function launch() { return __awaiter(this, void 0, void 0, function* () { return cluster ? cluster : (cluster = puppeteer_cluster_1.Cluster.launch({ concurrency: puppeteer_cluster_1.Cluster.CONCURRENCY_CONTEXT, maxConcurrency: config.maxConcurrency, timeout: 2147483647, // 32-bits max number })); }); } exports.launch = launch; function destory() { return __awaiter(this, void 0, void 0, function* () { if (!cluster) return; const _cluster = yield cluster; yield _cluster.idle(); yield _cluster.close(); cluster = null; }); } exports.destory = destory; function openURL({ url, task, check, checkTimeout = 10 * 1000, device = "pc", }) { return __awaiter(this, void 0, void 0, function* () { const _cluster = yield launch(); const timetag = `open ${url} take:`; const time = +new Date(); try { return yield _cluster.execute({}, (params) => __awaiter(this, void 0, void 0, function* () { const { page } = params; // ua yield page.setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36"); // 默认pc 大小 yield page.setViewport({ width: device === "pc" ? 1400 : device === "mobile" ? 375 : device, height: 1000, }); yield page.setDefaultTimeout(0); // 完全打开页面后 yield page.goto(url, { waitUntil: "domcontentloaded", /** * 对于单页应用,有可能会在一个page里面长时间做任务,所以不能给超时时间(默认一个页面生命周期30秒) */ timeout: 0, }); // 额外的判断, if (check) { yield (0, check_1.runCheck)(() => check(page), checkTimeout); } config.logger.log(`${timetag}${+new Date() - time}`); return yield task(params); })); } catch (error) { config.logger.error(error); return null; } }); } exports.openURL = openURL;