@35iter/spider-core
Version:
使用 puppetter-cluster 的爬虫工具。
86 lines (85 loc) • 3.56 kB
JavaScript
;
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.openURL = exports.destory = exports.launch = exports.setConfig = void 0;
const puppeteer_cluster_1 = require("@35iter/puppeteer-cluster");
const check_1 = require("./check");
let cluster = null;
let config = {
maxConcurrency: 5,
logger: console,
};
function setConfig(_config = {}) {
config = Object.assign(config, _config);
}
exports.setConfig = setConfig;
function launch() {
return __awaiter(this, void 0, void 0, function* () {
return cluster
? cluster
: (cluster = puppeteer_cluster_1.Cluster.launch({
concurrency: puppeteer_cluster_1.Cluster.CONCURRENCY_CONTEXT,
maxConcurrency: config.maxConcurrency,
timeout: 2147483647, // 32-bits max number
}));
});
}
exports.launch = launch;
function destory() {
return __awaiter(this, void 0, void 0, function* () {
if (!cluster)
return;
const _cluster = yield cluster;
yield _cluster.idle();
yield _cluster.close();
cluster = null;
});
}
exports.destory = destory;
function openURL({ url, task, check, checkTimeout = 10 * 1000, device = "pc", }) {
return __awaiter(this, void 0, void 0, function* () {
const _cluster = yield launch();
const timetag = `open ${url} take:`;
const time = +new Date();
try {
return yield _cluster.execute({}, (params) => __awaiter(this, void 0, void 0, function* () {
const { page } = params;
// ua
yield page.setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36");
// 默认pc 大小
yield page.setViewport({
width: device === "pc" ? 1400 : device === "mobile" ? 375 : device,
height: 1000,
});
yield page.setDefaultTimeout(0);
// 完全打开页面后
yield page.goto(url, {
waitUntil: "domcontentloaded",
/**
* 对于单页应用,有可能会在一个page里面长时间做任务,所以不能给超时时间(默认一个页面生命周期30秒)
*/
timeout: 0,
});
// 额外的判断,
if (check) {
yield (0, check_1.runCheck)(() => check(page), checkTimeout);
}
config.logger.log(`${timetag}${+new Date() - time}`);
return yield task(params);
}));
}
catch (error) {
config.logger.error(error);
return null;
}
});
}
exports.openURL = openURL;