x-crawl
Version:
x-crawl is a flexible Node.js AI-assisted crawler library.
965 lines (964 loc) • 97.8 kB
JavaScript
function _createForOfIteratorHelper(r, e) { var t = "undefined" != typeof Symbol && r[Symbol.iterator] || r["@@iterator"]; if (!t) { if (Array.isArray(r) || (t = _unsupportedIterableToArray(r)) || e && r && "number" == typeof r.length) { t && (r = t); var _n = 0, F = function F() {}; return { s: F, n: function n() { return _n >= r.length ? { done: !0 } : { done: !1, value: r[_n++] }; }, e: function e(r) { throw r; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var o, a = !0, u = !1; return { s: function s() { t = t.call(r); }, n: function n() { var r = t.next(); return a = r.done, r; }, e: function e(r) { u = !0, o = r; }, f: function f() { try { a || null == t["return"] || t["return"](); } finally { if (u) throw o; } } }; }
function _regeneratorRuntime() { "use strict"; /*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */ _regeneratorRuntime = function _regeneratorRuntime() { return e; }; var t, e = {}, r = Object.prototype, n = r.hasOwnProperty, o = Object.defineProperty || function (t, e, r) { t[e] = r.value; }, i = "function" == typeof Symbol ? Symbol : {}, a = i.iterator || "@@iterator", c = i.asyncIterator || "@@asyncIterator", u = i.toStringTag || "@@toStringTag"; function define(t, e, r) { return Object.defineProperty(t, e, { value: r, enumerable: !0, configurable: !0, writable: !0 }), t[e]; } try { define({}, ""); } catch (t) { define = function define(t, e, r) { return t[e] = r; }; } function wrap(t, e, r, n) { var i = e && e.prototype instanceof Generator ? e : Generator, a = Object.create(i.prototype), c = new Context(n || []); return o(a, "_invoke", { value: makeInvokeMethod(t, r, c) }), a; } function tryCatch(t, e, r) { try { return { type: "normal", arg: t.call(e, r) }; } catch (t) { return { type: "throw", arg: t }; } } e.wrap = wrap; var h = "suspendedStart", l = "suspendedYield", f = "executing", s = "completed", y = {}; function Generator() {} function GeneratorFunction() {} function GeneratorFunctionPrototype() {} var p = {}; define(p, a, function () { return this; }); var d = Object.getPrototypeOf, v = d && d(d(values([]))); v && v !== r && n.call(v, a) && (p = v); var g = GeneratorFunctionPrototype.prototype = Generator.prototype = Object.create(p); function defineIteratorMethods(t) { ["next", "throw", "return"].forEach(function (e) { define(t, e, function (t) { return this._invoke(e, t); }); }); } function AsyncIterator(t, e) { function invoke(r, o, i, a) { var c = tryCatch(t[r], t, o); if ("throw" !== c.type) { var u = c.arg, h = u.value; return h && "object" == _typeof(h) && n.call(h, "__await") ? e.resolve(h.__await).then(function (t) { invoke("next", t, i, a); }, function (t) { invoke("throw", t, i, a); }) : e.resolve(h).then(function (t) { u.value = t, i(u); }, function (t) { return invoke("throw", t, i, a); }); } a(c.arg); } var r; o(this, "_invoke", { value: function value(t, n) { function callInvokeWithMethodAndArg() { return new e(function (e, r) { invoke(t, n, e, r); }); } return r = r ? r.then(callInvokeWithMethodAndArg, callInvokeWithMethodAndArg) : callInvokeWithMethodAndArg(); } }); } function makeInvokeMethod(e, r, n) { var o = h; return function (i, a) { if (o === f) throw Error("Generator is already running"); if (o === s) { if ("throw" === i) throw a; return { value: t, done: !0 }; } for (n.method = i, n.arg = a;;) { var c = n.delegate; if (c) { var u = maybeInvokeDelegate(c, n); if (u) { if (u === y) continue; return u; } } if ("next" === n.method) n.sent = n._sent = n.arg;else if ("throw" === n.method) { if (o === h) throw o = s, n.arg; n.dispatchException(n.arg); } else "return" === n.method && n.abrupt("return", n.arg); o = f; var p = tryCatch(e, r, n); if ("normal" === p.type) { if (o = n.done ? s : l, p.arg === y) continue; return { value: p.arg, done: n.done }; } "throw" === p.type && (o = s, n.method = "throw", n.arg = p.arg); } }; } function maybeInvokeDelegate(e, r) { var n = r.method, o = e.iterator[n]; if (o === t) return r.delegate = null, "throw" === n && e.iterator["return"] && (r.method = "return", r.arg = t, maybeInvokeDelegate(e, r), "throw" === r.method) || "return" !== n && (r.method = "throw", r.arg = new TypeError("The iterator does not provide a '" + n + "' method")), y; var i = tryCatch(o, e.iterator, r.arg); if ("throw" === i.type) return r.method = "throw", r.arg = i.arg, r.delegate = null, y; var a = i.arg; return a ? a.done ? (r[e.resultName] = a.value, r.next = e.nextLoc, "return" !== r.method && (r.method = "next", r.arg = t), r.delegate = null, y) : a : (r.method = "throw", r.arg = new TypeError("iterator result is not an object"), r.delegate = null, y); } function pushTryEntry(t) { var e = { tryLoc: t[0] }; 1 in t && (e.catchLoc = t[1]), 2 in t && (e.finallyLoc = t[2], e.afterLoc = t[3]), this.tryEntries.push(e); } function resetTryEntry(t) { var e = t.completion || {}; e.type = "normal", delete e.arg, t.completion = e; } function Context(t) { this.tryEntries = [{ tryLoc: "root" }], t.forEach(pushTryEntry, this), this.reset(!0); } function values(e) { if (e || "" === e) { var r = e[a]; if (r) return r.call(e); if ("function" == typeof e.next) return e; if (!isNaN(e.length)) { var o = -1, i = function next() { for (; ++o < e.length;) if (n.call(e, o)) return next.value = e[o], next.done = !1, next; return next.value = t, next.done = !0, next; }; return i.next = i; } } throw new TypeError(_typeof(e) + " is not iterable"); } return GeneratorFunction.prototype = GeneratorFunctionPrototype, o(g, "constructor", { value: GeneratorFunctionPrototype, configurable: !0 }), o(GeneratorFunctionPrototype, "constructor", { value: GeneratorFunction, configurable: !0 }), GeneratorFunction.displayName = define(GeneratorFunctionPrototype, u, "GeneratorFunction"), e.isGeneratorFunction = function (t) { var e = "function" == typeof t && t.constructor; return !!e && (e === GeneratorFunction || "GeneratorFunction" === (e.displayName || e.name)); }, e.mark = function (t) { return Object.setPrototypeOf ? Object.setPrototypeOf(t, GeneratorFunctionPrototype) : (t.__proto__ = GeneratorFunctionPrototype, define(t, u, "GeneratorFunction")), t.prototype = Object.create(g), t; }, e.awrap = function (t) { return { __await: t }; }, defineIteratorMethods(AsyncIterator.prototype), define(AsyncIterator.prototype, c, function () { return this; }), e.AsyncIterator = AsyncIterator, e.async = function (t, r, n, o, i) { void 0 === i && (i = Promise); var a = new AsyncIterator(wrap(t, r, n, o), i); return e.isGeneratorFunction(r) ? a : a.next().then(function (t) { return t.done ? t.value : a.next(); }); }, defineIteratorMethods(g), define(g, u, "Generator"), define(g, a, function () { return this; }), define(g, "toString", function () { return "[object Generator]"; }), e.keys = function (t) { var e = Object(t), r = []; for (var n in e) r.push(n); return r.reverse(), function next() { for (; r.length;) { var t = r.pop(); if (t in e) return next.value = t, next.done = !1, next; } return next.done = !0, next; }; }, e.values = values, Context.prototype = { constructor: Context, reset: function reset(e) { if (this.prev = 0, this.next = 0, this.sent = this._sent = t, this.done = !1, this.delegate = null, this.method = "next", this.arg = t, this.tryEntries.forEach(resetTryEntry), !e) for (var r in this) "t" === r.charAt(0) && n.call(this, r) && !isNaN(+r.slice(1)) && (this[r] = t); }, stop: function stop() { this.done = !0; var t = this.tryEntries[0].completion; if ("throw" === t.type) throw t.arg; return this.rval; }, dispatchException: function dispatchException(e) { if (this.done) throw e; var r = this; function handle(n, o) { return a.type = "throw", a.arg = e, r.next = n, o && (r.method = "next", r.arg = t), !!o; } for (var o = this.tryEntries.length - 1; o >= 0; --o) { var i = this.tryEntries[o], a = i.completion; if ("root" === i.tryLoc) return handle("end"); if (i.tryLoc <= this.prev) { var c = n.call(i, "catchLoc"), u = n.call(i, "finallyLoc"); if (c && u) { if (this.prev < i.catchLoc) return handle(i.catchLoc, !0); if (this.prev < i.finallyLoc) return handle(i.finallyLoc); } else if (c) { if (this.prev < i.catchLoc) return handle(i.catchLoc, !0); } else { if (!u) throw Error("try statement without catch or finally"); if (this.prev < i.finallyLoc) return handle(i.finallyLoc); } } } }, abrupt: function abrupt(t, e) { for (var r = this.tryEntries.length - 1; r >= 0; --r) { var o = this.tryEntries[r]; if (o.tryLoc <= this.prev && n.call(o, "finallyLoc") && this.prev < o.finallyLoc) { var i = o; break; } } i && ("break" === t || "continue" === t) && i.tryLoc <= e && e <= i.finallyLoc && (i = null); var a = i ? i.completion : {}; return a.type = t, a.arg = e, i ? (this.method = "next", this.next = i.finallyLoc, y) : this.complete(a); }, complete: function complete(t, e) { if ("throw" === t.type) throw t.arg; return "break" === t.type || "continue" === t.type ? this.next = t.arg : "return" === t.type ? (this.rval = this.arg = t.arg, this.method = "return", this.next = "end") : "normal" === t.type && e && (this.next = e), y; }, finish: function finish(t) { for (var e = this.tryEntries.length - 1; e >= 0; --e) { var r = this.tryEntries[e]; if (r.finallyLoc === t) return this.complete(r.completion, r.afterLoc), resetTryEntry(r), y; } }, "catch": function _catch(t) { for (var e = this.tryEntries.length - 1; e >= 0; --e) { var r = this.tryEntries[e]; if (r.tryLoc === t) { var n = r.completion; if ("throw" === n.type) { var o = n.arg; resetTryEntry(r); } return o; } } throw Error("illegal catch attempt"); }, delegateYield: function delegateYield(e, r, n) { return this.delegate = { iterator: values(e), resultName: r, nextLoc: n }, "next" === this.method && (this.arg = t), y; } }, e; }
function ownKeys(e, r) { var t = Object.keys(e); if (Object.getOwnPropertySymbols) { var o = Object.getOwnPropertySymbols(e); r && (o = o.filter(function (r) { return Object.getOwnPropertyDescriptor(e, r).enumerable; })), t.push.apply(t, o); } return t; }
function _objectSpread(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys(Object(t), !0).forEach(function (r) { _defineProperty(e, r, t[r]); }) : Object.getOwnPropertyDescriptors ? Object.defineProperties(e, Object.getOwnPropertyDescriptors(t)) : ownKeys(Object(t)).forEach(function (r) { Object.defineProperty(e, r, Object.getOwnPropertyDescriptor(t, r)); }); } return e; }
function _defineProperty(e, r, t) { return (r = _toPropertyKey(r)) in e ? Object.defineProperty(e, r, { value: t, enumerable: !0, configurable: !0, writable: !0 }) : e[r] = t, e; }
function _toPropertyKey(t) { var i = _toPrimitive(t, "string"); return "symbol" == _typeof(i) ? i : i + ""; }
function _toPrimitive(t, r) { if ("object" != _typeof(t) || !t) return t; var e = t[Symbol.toPrimitive]; if (void 0 !== e) { var i = e.call(t, r || "default"); if ("object" != _typeof(i)) return i; throw new TypeError("@@toPrimitive must return a primitive value."); } return ("string" === r ? String : Number)(t); }
function asyncGeneratorStep(n, t, e, r, o, a, c) { try { var i = n[a](c), u = i.value; } catch (n) { return void e(n); } i.done ? t(u) : Promise.resolve(u).then(r, o); }
function _asyncToGenerator(n) { return function () { var t = this, e = arguments; return new Promise(function (r, o) { var a = n.apply(t, e); function _next(n) { asyncGeneratorStep(a, r, o, _next, _throw, "next", n); } function _throw(n) { asyncGeneratorStep(a, r, o, _next, _throw, "throw", n); } _next(void 0); }); }; }
function _toConsumableArray(r) { return _arrayWithoutHoles(r) || _iterableToArray(r) || _unsupportedIterableToArray(r) || _nonIterableSpread(); }
function _nonIterableSpread() { throw new TypeError("Invalid attempt to spread non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); }
function _unsupportedIterableToArray(r, a) { if (r) { if ("string" == typeof r) return _arrayLikeToArray(r, a); var t = {}.toString.call(r).slice(8, -1); return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? Array.from(r) : "Arguments" === t || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t) ? _arrayLikeToArray(r, a) : void 0; } }
function _iterableToArray(r) { if ("undefined" != typeof Symbol && null != r[Symbol.iterator] || null != r["@@iterator"]) return Array.from(r); }
function _arrayWithoutHoles(r) { if (Array.isArray(r)) return _arrayLikeToArray(r); }
function _arrayLikeToArray(r, a) { (null == a || a > r.length) && (a = r.length); for (var e = 0, n = Array(a); e < a; e++) n[e] = r[e]; return n; }
function _typeof(o) { "@babel/helpers - typeof"; return _typeof = "function" == typeof Symbol && "symbol" == typeof Symbol.iterator ? function (o) { return typeof o; } : function (o) { return o && "function" == typeof Symbol && o.constructor === Symbol && o !== Symbol.prototype ? "symbol" : typeof o; }, _typeof(o); }
import fs from 'node:fs';
import { writeFile } from 'node:fs/promises';
import path from 'node:path';
import puppeteer from 'puppeteer';
import { Buffer as Buffer$1 } from 'node:buffer';
import Url, { fileURLToPath } from 'node:url';
import chalk from 'chalk';
import http from 'node:http';
import https from 'node:https';
import querystring from 'node:querystring';
import { HttpsProxyAgent } from 'https-proxy-agent';
import OpenAI from 'openai';
import ora from 'ora';
import { Ollama } from 'ollama'; // Log
var log = console.log;
var logStart = chalk.blueBright;
var logStatistics = chalk.whiteBright;
var logSuccess = chalk.green;
var logError = chalk.red;
var logWarn = chalk.yellow;
var logNumber = chalk.hex('#a57fff');
var whiteBold = chalk.white.bold;
var isUndefined = function isUndefined(value) {
return typeof value === 'undefined';
};
var isNumber = function isNumber(value) {
return typeof value === 'number';
};
var isString = function isString(value) {
return typeof value === 'string';
};
var isBoolean = function isBoolean(value) {
return typeof value === 'boolean';
};
var isObject = function isObject(value) {
return _typeof(value) === 'object' && value !== null && !Array.isArray(value);
};
var isArray = Array.isArray;
var isFunction = function isFunction(value) {
return typeof value === 'function';
};
var isPromise = function isPromise(value) {
return isObject(value) && isFunction(value.then) && isFunction(value["catch"]);
};
var isBuffer = Buffer$1.isBuffer;
var dirname = function dirname(dirPath) {
return path.dirname(fileURLToPath(isUndefined(dirPath) ? import.meta.url : new URL(dirPath, import.meta.url)));
};
function sleep(timeout) {
return new Promise(function (resolve) {
return setTimeout(resolve, timeout);
});
}
function random(max) {
var min = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 0;
var result = Math.floor(Math.random() * max);
while (result < min) {
result = Math.floor(Math.random() * max);
}
return result;
}
function mergeSort(arr) {
if (arr.length <= 1) return arr;
var mid = Math.floor(arr.length / 2);
var newLeftArr = mergeSort(arr.slice(0, mid));
var newRightArr = mergeSort(arr.slice(mid));
var newArr = [];
var i = 0;
var j = 0;
while (i < newLeftArr.length && j < newRightArr.length) {
if (newLeftArr[i] <= newRightArr[j]) {
newArr.push(newLeftArr[i++]);
} else {
newArr.push(newRightArr[j++]);
}
}
if (i < newLeftArr.length) {
newArr.push.apply(newArr, _toConsumableArray(newLeftArr.slice(i)));
}
if (j < newRightArr.length) {
newArr.push.apply(newArr, _toConsumableArray(newRightArr.splice(j)));
}
return newArr;
}
function useSleepByBatch(_x, _x2, _x3, _x4, _x5) {
return _useSleepByBatch.apply(this, arguments);
}
function _useSleepByBatch() {
_useSleepByBatch = _asyncToGenerator( /*#__PURE__*/_regeneratorRuntime().mark(function _callee14(isHaventervalTime, isNumberIntervalTime, intervalTime, id, infoConfig) {
var serialNumber, logConfig, timeout;
return _regeneratorRuntime().wrap(function _callee14$(_context14) {
while (1) switch (_context14.prev = _context14.next) {
case 0:
serialNumber = infoConfig.serialNumber, logConfig = infoConfig.logConfig;
if (!(isHaventervalTime && id > 1)) {
_context14.next = 8;
break;
}
timeout = isNumberIntervalTime ? intervalTime : random(intervalTime.max, intervalTime.min);
if (logConfig.process) {
log("".concat(whiteBold(serialNumber), " | Target id: ").concat(logNumber(id), " - Sleep time: ").concat(logNumber(timeout + 'ms')));
}
_context14.next = 6;
return sleep(timeout);
case 6:
_context14.next = 9;
break;
case 8:
if (logConfig.process) {
log("".concat(whiteBold(serialNumber), " | Target id: ").concat(logNumber(id), " - Sleep time: ").concat(logNumber('0ms')));
}
case 9:
case "end":
return _context14.stop();
}
}, _callee14);
}));
return _useSleepByBatch.apply(this, arguments);
}
function asyncBatchCrawl(_x6, _x7, _x8) {
return _asyncBatchCrawl.apply(this, arguments);
}
function _asyncBatchCrawl() {
_asyncBatchCrawl = _asyncToGenerator( /*#__PURE__*/_regeneratorRuntime().mark(function _callee15(devices, infoConfig, singleCrawlHandle) {
var intervalTime, isHaventervalTime, isNumberIntervalTime, crawlPendingQueue, _iterator, _step, device, _id;
return _regeneratorRuntime().wrap(function _callee15$(_context15) {
while (1) switch (_context15.prev = _context15.next) {
case 0:
intervalTime = infoConfig.intervalTime;
isHaventervalTime = !isUndefined(intervalTime);
isNumberIntervalTime = isNumber(intervalTime);
crawlPendingQueue = [];
_iterator = _createForOfIteratorHelper(devices);
_context15.prev = 5;
_iterator.s();
case 7:
if ((_step = _iterator.n()).done) {
_context15.next = 15;
break;
}
device = _step.value;
_id = device.id;
_context15.next = 12;
return useSleepByBatch(isHaventervalTime, isNumberIntervalTime, intervalTime, _id, infoConfig);
case 12:
crawlPendingQueue.push(singleCrawlHandle(device, infoConfig));
case 13:
_context15.next = 7;
break;
case 15:
_context15.next = 20;
break;
case 17:
_context15.prev = 17;
_context15.t0 = _context15["catch"](5);
_iterator.e(_context15.t0);
case 20:
_context15.prev = 20;
_iterator.f();
return _context15.finish(20);
case 23:
_context15.next = 25;
return Promise.all(crawlPendingQueue);
case 25:
case "end":
return _context15.stop();
}
}, _callee15, null, [[5, 17, 20, 23]]);
}));
return _asyncBatchCrawl.apply(this, arguments);
}
function syncBatchCrawl(_x9, _x10, _x11) {
return _syncBatchCrawl.apply(this, arguments);
}
function _syncBatchCrawl() {
_syncBatchCrawl = _asyncToGenerator( /*#__PURE__*/_regeneratorRuntime().mark(function _callee16(devices, infoConfig, singleCrawlHandle) {
var intervalTime, isHaventervalTime, isNumberIntervalTime, _iterator2, _step2, device, _id2;
return _regeneratorRuntime().wrap(function _callee16$(_context16) {
while (1) switch (_context16.prev = _context16.next) {
case 0:
intervalTime = infoConfig.intervalTime;
isHaventervalTime = !isUndefined(intervalTime);
isNumberIntervalTime = isNumber(intervalTime);
_iterator2 = _createForOfIteratorHelper(devices);
_context16.prev = 4;
_iterator2.s();
case 6:
if ((_step2 = _iterator2.n()).done) {
_context16.next = 15;
break;
}
device = _step2.value;
_id2 = device.id;
_context16.next = 11;
return useSleepByBatch(isHaventervalTime, isNumberIntervalTime, intervalTime, _id2, infoConfig);
case 11:
_context16.next = 13;
return singleCrawlHandle(device, infoConfig);
case 13:
_context16.next = 6;
break;
case 15:
_context16.next = 20;
break;
case 17:
_context16.prev = 17;
_context16.t0 = _context16["catch"](4);
_iterator2.e(_context16.t0);
case 20:
_context16.prev = 20;
_iterator2.f();
return _context16.finish(20);
case 23:
case "end":
return _context16.stop();
}
}, _callee16, null, [[4, 17, 20, 23]]);
}));
return _syncBatchCrawl.apply(this, arguments);
}
function isCrawlStatusInHttpStatus(device) {
var _detailTargetConfig$p;
var detailTargetConfig = device.detailTargetConfig,
detailTargetResult = device.detailTargetResult;
var status = null;
if (isObject(detailTargetResult) && Object.hasOwn(detailTargetResult, 'response') && detailTargetResult.response) {
// crawlPage
var response = detailTargetResult.response;
status = response.status();
} else if (isObject(detailTargetResult)) {
var _detailTargetResult$s;
// crawlData / crawlFie
status = (_detailTargetResult$s = detailTargetResult.statusCode) !== null && _detailTargetResult$s !== void 0 ? _detailTargetResult$s : null;
}
var result = false;
var switchByHttpStatus = (_detailTargetConfig$p = detailTargetConfig.proxy) === null || _detailTargetConfig$p === void 0 ? void 0 : _detailTargetConfig$p.switchByHttpStatus;
if (status && switchByHttpStatus && switchByHttpStatus.includes(status)) {
result = true;
}
return result;
}
function controller(_x12, _x13, _x14) {
return _controller.apply(this, arguments);
}
function _controller() {
_controller = _asyncToGenerator( /*#__PURE__*/_regeneratorRuntime().mark(function _callee17(detailTargets, infoConfig, singleCrawlHandle) {
var serialNumber, mode, logConfig, isPriorityCrawl, detailTargetConfigs, devices, batchCrawl, i, crawlQueue, retriedIds, succssIds, errorIds;
return _regeneratorRuntime().wrap(function _callee17$(_context17) {
while (1) switch (_context17.prev = _context17.next) {
case 0:
serialNumber = infoConfig.serialNumber, mode = infoConfig.mode, logConfig = infoConfig.logConfig; // 是否使用优先爬取
isPriorityCrawl = !detailTargets.every(function (item) {
return item.priority === detailTargets[0].priority;
});
detailTargetConfigs = isPriorityCrawl ? mergeSort(detailTargets.map(function (item) {
return _objectSpread(_objectSpread({}, item), {}, {
valueOf: function valueOf() {
return item.priority;
}
});
})).reverse() : detailTargets; // 生成装置
devices = detailTargetConfigs.map(function (detailTargetConfig, index) {
var id = ++index;
var maxRetry = detailTargetConfig.maxRetry,
proxyDetails = detailTargetConfig.proxyDetails;
var crawlErrorQueue = [];
return {
id: id,
isHandle: false,
isSuccess: false,
isStatusNormal: false,
detailTargetConfig: detailTargetConfig,
detailTargetResult: null,
maxRetry: maxRetry,
retryCount: 0,
proxyDetails: proxyDetails,
crawlErrorQueue: crawlErrorQueue,
result: {
id: id,
isSuccess: false,
maxRetry: maxRetry,
retryCount: 0,
proxyDetails: proxyDetails,
crawlErrorQueue: crawlErrorQueue,
data: null
}
};
});
if (logConfig.start) {
log("".concat(whiteBold(serialNumber), " | ").concat(logStart("Start crawling - mode: ".concat(mode, ", total: ").concat(devices.length))));
}
// 选择爬取模式
batchCrawl = mode === 'async' ? asyncBatchCrawl : syncBatchCrawl;
i = 0;
crawlQueue = devices;
case 8:
if (!crawlQueue.length) {
_context17.next = 15;
break;
}
_context17.next = 11;
return batchCrawl(crawlQueue, infoConfig, singleCrawlHandle);
case 11:
crawlQueue = crawlQueue.filter(function (device) {
var isHandle = device.isHandle,
retryCount = device.retryCount,
maxRetry = device.maxRetry,
detailTargetConfig = device.detailTargetConfig,
proxyDetails = device.proxyDetails,
crawlErrorQueue = device.crawlErrorQueue,
isStatusNormal = device.isStatusNormal;
// 没有被处理 / 没成功 / 状态码不符合
var isRetry = false;
var haveRetryChance = retryCount < maxRetry;
if (!isHandle && haveRetryChance) {
isRetry = true;
// 轮换代理
if (proxyDetails.length >= 2) {
var _detailTargetConfig$p2;
// 状态码 / 失败次数
var switchByErrorCount = (_detailTargetConfig$p2 = detailTargetConfig.proxy) === null || _detailTargetConfig$p2 === void 0 ? void 0 : _detailTargetConfig$p2.switchByErrorCount;
if (!isStatusNormal || !isUndefined(switchByErrorCount) && switchByErrorCount >= crawlErrorQueue.length) {
var _proxyDetails$find;
// 设置当前代理 URL 状态
proxyDetails.find(function (detail) {
return detail.url === detailTargetConfig.proxyUrl;
}).state = false;
// 寻找新代理 URL
var newProxyUrl = (_proxyDetails$find = proxyDetails.find(function (detaile) {
return detaile.state;
})) === null || _proxyDetails$find === void 0 ? void 0 : _proxyDetails$find.url;
// 使用新代理 URL
if (!isUndefined(newProxyUrl)) {
detailTargetConfig.proxyUrl = newProxyUrl;
}
}
}
}
return isRetry;
});
if (crawlQueue.length) {
retriedIds = crawlQueue.map(function (item) {
item.retryCount++;
return item.id;
});
if (logConfig.process) {
log("".concat(whiteBold(serialNumber), " | ").concat(logWarn("Start retrying - count: ".concat(++i, ", targets id: [ ").concat(retriedIds.join(', '), " ]"))));
}
}
_context17.next = 8;
break;
case 15:
// 统计结果
if (logConfig.result) {
succssIds = [];
errorIds = [];
devices.forEach(function (device) {
if (device.isSuccess) {
succssIds.push(device.id);
} else {
errorIds.push(device.id);
}
});
log("".concat(whiteBold(serialNumber), " | ").concat(logStatistics("Crawl finish:"), "\n ").concat(logSuccess("Success - total: ".concat(succssIds.length, ", targets id: [ ").concat(succssIds.join(', '), " ]")), "\n ").concat(logError("Error - total: ".concat(errorIds.length, ", targets id: [ ").concat(errorIds.join(', '), " ]"))));
}
return _context17.abrupt("return", devices.map(function (device) {
return device.result;
}));
case 17:
case "end":
return _context17.stop();
}
}, _callee17);
}));
return _controller.apply(this, arguments);
}
function parseHeaders(rawRequestConfig, contentConfig) {
var _rawRequestConfig$hea;
var rawHeaders = (_rawRequestConfig$hea = rawRequestConfig.headers) !== null && _rawRequestConfig$hea !== void 0 ? _rawRequestConfig$hea : {};
var requestConfig = contentConfig.requestConfig,
data = contentConfig.data;
var headers = _objectSpread({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'
}, rawHeaders);
if (!isUndefined(data)) {
var defaultHeaderConfig = [{
key: 'Content-Type',
value: 'application/json'
}, {
key: 'Content-Length',
value: Buffer.byteLength(data)
}];
defaultHeaderConfig.forEach(function (item) {
var key = item.key,
value = item.value;
if (isUndefined(rawHeaders[key])) {
headers[key] = value;
}
});
}
requestConfig.headers = headers;
}
function createContentConfig(rawRequestConfig) {
var _method$toLocaleUpper;
var rawData = rawRequestConfig.data,
url = rawRequestConfig.url,
params = rawRequestConfig.params,
proxyUrl = rawRequestConfig.proxyUrl,
timeout = rawRequestConfig.timeout,
method = rawRequestConfig.method;
var _Url$URL = new Url.URL(url),
protocol = _Url$URL.protocol,
hostname = _Url$URL.hostname,
port = _Url$URL.port,
pathname = _Url$URL.pathname,
search = _Url$URL.search;
var path = pathname;
if (search || params) {
if (search) {
path += "".concat(search).concat(params ? '&' + querystring.stringify(params) : '');
} else {
path += "?".concat(querystring.stringify(params));
}
}
var contentConfig = {
requestConfig: {
agent: proxyUrl ? new HttpsProxyAgent(proxyUrl) : protocol === 'http:' ? new http.Agent() : new https.Agent(),
protocol: protocol,
hostname: hostname,
port: port,
path: path,
method: (_method$toLocaleUpper = method === null || method === void 0 ? void 0 : method.toLocaleUpperCase()) !== null && _method$toLocaleUpper !== void 0 ? _method$toLocaleUpper : 'GET',
headers: {},
timeout: timeout
},
protocol: protocol,
data: isObject(rawData) ? JSON.stringify(rawData) : rawData
};
parseHeaders(rawRequestConfig, contentConfig);
return contentConfig;
}
function request(config) {
return new Promise(function (resolve, reject) {
var _createContentConfig = createContentConfig(config),
requestConfig = _createContentConfig.requestConfig,
protocol = _createContentConfig.protocol,
data = _createContentConfig.data;
function handleRes(res) {
var statusCode = res.statusCode,
headers = res.headers;
var container = [];
res.on('data', function (chunk) {
return container.push(chunk);
});
res.on('end', function () {
var data = Buffer.concat(container);
var resolveRes = {
statusCode: statusCode,
headers: headers,
data: data
};
resolve(resolveRes);
});
}
var req = protocol === 'http:' ? http.request(requestConfig, handleRes) : https.request(requestConfig, handleRes);
req.on('timeout', function () {
reject(new Error("Timeout ".concat(requestConfig.timeout, "ms")));
});
req.on('error', function (err) {
reject(err);
});
// 其他处理
if (!isUndefined(data)) {
req.write(data);
}
req.end();
});
}
var fingerprints = [{
platform: 'Windows',
mobile: 'random',
userAgent: {
value: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
versions: [{
name: 'Chrome',
maxMajorVersion: 112,
minMajorVersion: 100,
maxMinorVersion: 10,
maxPatchVersion: 5615
}, {
name: 'Safari',
maxMinorVersion: 36,
maxPatchVersion: 2333
}]
}
}, {
platform: 'Windows',
mobile: 'random',
userAgent: {
value: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36 Edg/91.0.864.59',
versions: [{
name: 'Chrome',
maxMajorVersion: 91,
minMajorVersion: 88,
maxMinorVersion: 10,
maxPatchVersion: 5615
}, {
name: 'Safari',
maxMinorVersion: 36,
maxPatchVersion: 2333
}, {
name: 'Edg',
maxMinorVersion: 10,
maxPatchVersion: 864
}]
}
}, {
platform: 'Windows',
mobile: 'random',
userAgent: {
value: 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0',
versions: [{
name: 'Firefox',
maxMajorVersion: 47,
minMajorVersion: 43,
maxMinorVersion: 10,
maxPatchVersion: 5000
}]
}
}]; /* Function */
function parsePageCookies(url, cookies) {
var cookiesArr = [];
if (typeof cookies === 'string') {
cookies.split('; ').forEach(function (item) {
var cookie = item.split('=');
cookiesArr.push({
name: cookie[0],
value: cookie[1],
url: url
});
});
} else if (Array.isArray(cookies)) {
cookies.forEach(function (cookie) {
if (!cookie.url) {
cookie.url = url;
}
cookiesArr.push(cookie);
});
} else if (_typeof(cookies) === 'object' && cookies) {
if (!cookies.url) {
cookies.url = url;
}
cookiesArr.push(cookies);
}
return cookiesArr;
}
function transformTargetToDetailTargets(config) {
return isArray(config) ? config.map(function (item) {
return isObject(item) ? item : {
url: item
};
}) : [isObject(config) ? config : {
url: config
}];
}
/* Loader config */
function loaderCommonFingerprintToDetailTarget(detail, fingerprint) {
var ua = fingerprint.ua,
platform = fingerprint.platform,
platformVersion = fingerprint.platformVersion,
mobile = fingerprint.mobile,
acceptLanguage = fingerprint.acceptLanguage,
userAgent = fingerprint.userAgent;
var headers = detail.headers;
if (!headers) {
detail.headers = headers = {};
}
// 1.sec-ch-ua
if (ua) {
headers['sec-ch-ua'] = ua;
}
// 2.sec-ch-ua-mobile
if (mobile) {
headers['sec-ch-ua-mobile'] = mobile === 'random' ? random(2) ? '?1' : '?0' : mobile;
}
// 3.sec-ch-platform
if (platform) {
headers['sec-ch-platform'] = platform;
}
// 4.sec-ch-ua-platform-version
if (platformVersion) {
headers['sec-ch-ua-platform-version'] = platformVersion;
}
// 5.accept-language
if (acceptLanguage) {
headers['accept-language'] = acceptLanguage;
}
// 6.user-agent
if (userAgent) {
var _userAgent$versions;
var value = userAgent.value;
(_userAgent$versions = userAgent.versions) === null || _userAgent$versions === void 0 || _userAgent$versions.forEach(function (version) {
var name = version.name,
maxMajorVersion = version.maxMajorVersion,
minMajorVersion = version.minMajorVersion,
maxMinorVersion = version.maxMinorVersion,
minMinorVersion = version.minMinorVersion,
maxPatchVersion = version.maxPatchVersion,
minPatchVersion = version.minPatchVersion;
var nameSplit = value.split("".concat(name, "/"));
var versionSplit = nameSplit[1].split(' ')[0].split('.');
var originalVersion = versionSplit.join('.');
if (!isUndefined(maxMajorVersion)) {
versionSplit[0] = maxMajorVersion === minMajorVersion ? maxMajorVersion : random(maxMajorVersion, minMajorVersion);
}
if (!isUndefined(maxMinorVersion)) {
versionSplit[1] = maxMinorVersion === minMinorVersion ? maxMinorVersion : random(maxMinorVersion, minMinorVersion);
}
if (!isUndefined(maxPatchVersion)) {
versionSplit[2] = maxPatchVersion === minPatchVersion ? maxPatchVersion : random(maxPatchVersion, minPatchVersion);
}
var searchValue = "".concat(name, "/").concat(originalVersion);
var replaceValue = "".concat(name, "/").concat(versionSplit.join('.'));
value = value.replace(searchValue, replaceValue);
});
headers['user-agent'] = value;
}
}
function loaderPageFingerprintToDetailTarget(detail, fingerprint) {
var _detail$viewport;
var maxWidth = fingerprint.maxWidth,
minWidth = fingerprint.minWidth,
maxHeight = fingerprint.maxHeight,
minHidth = fingerprint.minHidth;
var viewport = (_detail$viewport = detail.viewport) !== null && _detail$viewport !== void 0 ? _detail$viewport : {};
// 1.width / height
if (maxWidth) {
viewport.width = maxWidth === minWidth ? maxWidth : random(maxWidth, minWidth);
}
if (maxHeight) {
viewport.height = maxHeight === minHidth ? maxHeight : random(maxHeight, minHidth);
}
if (Object.hasOwn(viewport, 'width') && Object.hasOwn(viewport, 'height')) {
detail.viewport = viewport;
}
}
function loaderCommonConfigToCrawlConfig(crawlBaseConfig, advancedDetailTargetsConfig, crawlConfig) {
// 1.detailTargets
crawlConfig.detailTargets = advancedDetailTargetsConfig.detailTargets.map(function (rawDetail) {
var _detail$proxy;
// detail > advanced > app
var detail = rawDetail;
var url = detail.url,
timeout = detail.timeout,
proxy = detail.proxy,
maxRetry = detail.maxRetry,
priority = detail.priority,
headers = detail.headers,
fingerprint = detail.fingerprint;
// 1.1.baseUrl
if (crawlBaseConfig.baseUrl) {
detail.url = crawlBaseConfig.baseUrl + url;
}
// 1.2.timeout
if (isUndefined(timeout)) {
if (!isUndefined(advancedDetailTargetsConfig.timeout)) {
var _advancedDetailTarget;
detail.timeout = (_advancedDetailTarget = advancedDetailTargetsConfig.timeout) !== null && _advancedDetailTarget !== void 0 ? _advancedDetailTarget : undefined;
} else {
detail.timeout = crawlBaseConfig.timeout;
}
}
// 1.3.maxRetry
if (isUndefined(maxRetry)) {
if (!isUndefined(advancedDetailTargetsConfig.maxRetry)) {
var _advancedDetailTarget2;
detail.maxRetry = (_advancedDetailTarget2 = advancedDetailTargetsConfig.maxRetry) !== null && _advancedDetailTarget2 !== void 0 ? _advancedDetailTarget2 : 0;
} else {
detail.maxRetry = crawlBaseConfig.maxRetry;
}
}
// 1.4.proxy
if (isUndefined(proxy)) {
if (!isUndefined(advancedDetailTargetsConfig.proxy)) {
detail.proxy = advancedDetailTargetsConfig.proxy;
} else if (!isUndefined(crawlBaseConfig.proxy)) {
detail.proxy = crawlBaseConfig.proxy;
}
}
// 1.5.proxyUrl & proxyDetail
if (!isUndefined((_detail$proxy = detail.proxy) === null || _detail$proxy === void 0 ? void 0 : _detail$proxy.urls)) {
var urls = detail.proxy.urls;
detail.proxyUrl = urls[0];
detail.proxyDetails = urls.map(function (url) {
return {
url: url,
state: true
};
});
} else {
// 默认值
detail.proxyDetails = [];
}
// 1.6.priority
if (isUndefined(priority)) {
detail.priority = 0;
}
// 1.7.header
if (isUndefined(headers) && advancedDetailTargetsConfig.headers) {
detail.headers = _objectSpread({}, advancedDetailTargetsConfig.headers);
}
// 1.8.fingerprint(公共部分)
if (fingerprint) {
// detaileTarget
loaderCommonFingerprintToDetailTarget(detail, fingerprint);
} else if (isUndefined(fingerprint) && isArray(advancedDetailTargetsConfig.fingerprints) && advancedDetailTargetsConfig.fingerprints.length) {
// advancedConfig
var _fingerprints = advancedDetailTargetsConfig.fingerprints;
var selectFingerprintIndex = random(_fingerprints.length);
var _fingerprint = _fingerprints[selectFingerprintIndex];
// 记录每个目标选中的指纹索引
crawlConfig.selectFingerprintIndexs.push(selectFingerprintIndex);
loaderCommonFingerprintToDetailTarget(detail, _fingerprint);
} else if (isUndefined(fingerprint) && !isArray(advancedDetailTargetsConfig.fingerprints) && crawlBaseConfig.enableRandomFingerprint) {
// crawlBaseConfig
var _fingerprint2 = fingerprints[random(fingerprints.length)];
loaderCommonFingerprintToDetailTarget(detail, _fingerprint2);
}
return detail;
});
// 2.intervalTime
crawlConfig.intervalTime = advancedDetailTargetsConfig.intervalTime;
if (isUndefined(advancedDetailTargetsConfig.intervalTime) && !isUndefined(crawlBaseConfig.intervalTime)) {
crawlConfig.intervalTime = crawlBaseConfig.intervalTime;
}
// 3.onCrawlItemComplete
crawlConfig.onCrawlItemComplete = advancedDetailTargetsConfig.onCrawlItemComplete;
}
/* Create config */
/*
每个创建配置函数的返回值都是类似于进阶配置
不同点:
- detailTargets 里面将存放的是详细版目标配置
- 不会保留与详细版目标配置相同的选项
生成 advancedConfig 对象对每个详细版目标配置进行装载, 如果是传入进阶版配置会覆盖生成的 advancedConfig 对象
*/
function createCrawlPageConfig(crawlBaseConfig, originalConfig) {
var crawlPageConfig = {
detailTargets: [],
intervalTime: undefined,
selectFingerprintIndexs: [],
onCrawlItemComplete: undefined
};
var advancedDetailTargetsConfig = {
targets: [],
detailTargets: []
};
if (isObject(originalConfig) && Object.hasOwn(originalConfig, 'targets')) {
// CrawlPageAdvancedConfig 处理
var targets = originalConfig.targets;
advancedDetailTargetsConfig = originalConfig;
advancedDetailTargetsConfig.detailTargets = transformTargetToDetailTargets(targets);
} else {
// string | CrawlPageDetailTargetConfig | (string | CrawlPageDetailTargetConfig)[] 处理
advancedDetailTargetsConfig.detailTargets = transformTargetToDetailTargets(originalConfig);
}
// 装载公共配置
loaderCommonConfigToCrawlConfig(crawlBaseConfig, advancedDetailTargetsConfig, crawlPageConfig);
// 装载单独配置
crawlPageConfig.detailTargets.forEach(function (detail, index) {
var _advancedDetailTarget3;
// detail > advanced > xCrawl
var cookies = detail.cookies,
viewport = detail.viewport,
fingerprint = detail.fingerprint;
// 1.cookies
if (isUndefined(cookies) && advancedDetailTargetsConfig.cookies) {
detail.cookies = advancedDetailTargetsConfig.cookies;
}
// 2.viewport
if (isUndefined(viewport) && advancedDetailTargetsConfig.viewport) {
detail.viewport = advancedDetailTargetsConfig.viewport;
}
// 3.fingerprint
if (fingerprint) {
loaderPageFingerprintToDetailTarget(detail, fingerprint);
} else if (isUndefined(fingerprint) && (_advancedDetailTarget3 = advancedDetailTargetsConfig.fingerprints) !== null && _advancedDetailTarget3 !== void 0 && _advancedDetailTarget3.length) {
// 从对应的选中记录中取出指纹索引
var selectFingerprintIndex = crawlPageConfig.selectFingerprintIndexs[index];
var _fingerprint3 = advancedDetailTargetsConfig.fingerprints[selectFingerprintIndex];
loaderPageFingerprintToDetailTarget(detail, _fingerprint3);
}
});
return crawlPageConfig;
}
function createCrawlHTMLConfig(crawlBaseConfig, originalConfig) {
var crawlHTMLConfig = {
detailTargets: [],
intervalTime: undefined,
selectFingerprintIndexs: [],
onCrawlItemComplete: undefined
};
var advancedDetailTargetsConfig = {
targets: [],
detailTargets: []
};
if (isObject(originalConfig) && Object.hasOwn(originalConfig, 'targets')) {
// CrawlHTMLAdvancedConfig
var targets = originalConfig.targets;
advancedDetailTargetsConfig = _objectSpread(_objectSpread({}, advancedDetailTargetsConfig), originalConfig);
advancedDetailTargetsConfig.detailTargets = transformTargetToDetailTargets(targets);
} else {
// string | CrawlHTMLDetailTargetConfig | (string | CrawlHTMLDetailTargetConfig)[]
advancedDetailTargetsConfig.detailTargets = transformTargetToDetailTargets(originalConfig);
}
loaderCommonConfigToCrawlConfig(crawlBaseConfig, advancedDetailTargetsConfig, crawlHTMLConfig);
return crawlHTMLConfig;
}
function createCrawlDataConfig(crawlBaseConfig, originalConfig) {
var crawlDataConfig = {
detailTargets: [],
intervalTime: undefined,
selectFingerprintIndexs: [],
onCrawlItemComplete: undefined
};
var advancedDetailTargetsConfig = {
targets: [],
detailTargets: []
};
if (isObject(originalConfig) && Object.hasOwn(originalConfig, 'targets')) {
// CrawlDataAdvancedConfig 处理
var targets = originalConfig.targets;
advancedDetailTargetsConfig = originalConfig;
advancedDetailTargetsConfig.detailTargets = transformTargetToDetailTargets(targets);
} else {
// string | CrawlDataDetailTargetConfig | (string | CrawlDataDetailTargetConfig)[] 处理
advancedDetailTargetsConfig.detailTargets = transformTargetToDetailTargets(originalConfig);
}
loaderCommonConfigToCrawlConfig(crawlBaseConfig, advancedDetailTargetsConfig, crawlDataConfig);
return crawlDataConfig;
}
function createCrawlFileConfig(crawlBaseConfig, originalConfig) {
var _advancedDetailTarget4, _advancedDetailTarget5, _advancedDetailTarget6, _advancedDetailTarget7, _advancedDetailTarget8;
var crawlFileConfig = {
detailTargets: [],
intervalTime: undefined,
selectFingerprintIndexs: [],
onBeforeSaveItemFile: undefined,
onCrawlItemComplete: undefined
};
var advancedDetailTargetsConfig = {
targets: [],
detailTargets: []
};
if (isObject(originalConfig) && Object.hasOwn(originalConfig, 'targets')) {
// CrawlFileAdvancedConfig 处理
var targets = originalConfig.targets;
advancedDetailTargetsConfig = originalConfig;
advancedDetailTargetsConfig.detailTargets = transformTargetToDetailTargets(targets);
} else {
// string | CrawlFileDetailTargetConfig | (string | CrawlFileDetailTargetConfig)[] 处理
advancedDetailTargetsConfig.detailTargets = transformTargetToDetailTargets(originalConfig);
}
loaderCommonConfigToCrawlConfig(crawlBaseConfig, advancedDetailTargetsConfig, crawlFileConfig);
var advancedStoreDirInfo = {
exist: !isUndefined((_advancedDetailTarget4 = advancedDetailTargetsConfig) === null || _advancedDetailTarget4 === void 0 ? void 0 : _advancedDetailTarget4.storeDirs),
type: isString((_advancedDetailTarget5 = advancedDetailTargetsConfig) === null || _advancedDetailTarget5 === void 0 ? void 0 : _advancedDetailTarget5.storeDirs) ? 0 : 1
};
var AdvancedExtension = {
exist: !isUndefined((_advancedDetailTarget6 = advancedDetailTargetsConfig) === null || _advancedDetailTarget6 === void 0 ? void 0 : _advancedDetailTarget6.extensions),
type: isString((_advancedDetailTarget7 = advancedDetailTargetsConfig) === null || _advancedDetailTarget7 === void 0 ? void 0 : _advancedDetailTarget7.extensions) ? 0 : 1
};
var haveAdvancedFileNames = !isUndefined((_advancedDetailTarget8 = advancedDetailTargetsConfig) === null || _advancedDetailTarget8 === void 0 ? void 0 : _advancedDetailTarget8.fileNames);
crawlFileConfig.detailTargets.forEach(function (detail, i) {
// 1.storeDir
if (isUndefined(detail.storeDir) && advancedStoreDirInfo.exist) {
detail.storeDir = advancedStoreDirInfo.type === 0 ? advancedDetailTargetsConfig.storeDirs : advancedDetailTargetsConfig.storeDirs[i];
}
// 2.extension
if (isUndefined(detail.extension) && AdvancedExtension.exist) {
detail.extension = advancedStoreDirInfo.type === 0 ? advancedDetailTargetsConfig.extensions : advancedDetailTargetsConfig.extensions[i];
}
// 3.fileName
if (isUndefined(detail.fileName) && haveAdvancedFileNames) {
detail.fileName = advancedDetailTargetsConfig.fileNames[i];
}
});
crawlFileConfig.onBeforeSaveItemFile = advancedDetailTargetsConfig.onBeforeSaveItemFile;
return crawlFileConfig;
}
/* Single crawl handle */
function pageSingleCrawlHandle(_x15, _x16) {
return _pageSingleCrawlHandle.apply(this, arguments);
}
function _pageSingleCrawlHandle() {
_pageSingleCrawlHandle = _asyncToGenerator( /*#__PURE__*/_regeneratorRuntime().mark(function _callee18(device, apiConfig) {
var _detailTargetResult$p;
var detailTargetConfig, detailTargetResult, retryCount, maxRetry, crawlErrorQueue, browser, notAllowRetry, page, response, notError, cookies, _cookies, isStatusNormal, isSuccess;
return _regeneratorRuntime().wrap(function _callee18$(_context18) {
while (1) switch (_context18.prev = _context18.next) {
case 0:
detailTargetConfig = device.detailTargetConfig, detailTargetResult = device.detailTargetResult, retryCount = device.retryCount, maxRetry = device.maxRetry, crawlErrorQueue = device.crawlErrorQueue;
browser = apiConfig.browser;
notAllowRetry = retryCount === maxRetry; // 是否创建过 Page
if (!((_detailTargetResult$p = detailTargetResult === null || detailTargetResult === void 0 ? void 0 : detailTargetResult.page) !== null && _detailTargetResult$p !== void 0)) {
_context18.next = 7;
break;
}
_context18.t0 = _detailTargetResult$p;
_context18.next = 10;
break;
case 7:
_context18.next = 9;
return browser.newPage();
case 9:
_context18.t0 = _context18.sent;
case 10:
page = _context18.t0;
if (!detailTargetConfig.viewport) {
_context18.next = 14;
break;
}
_context18.next = 14;
return page.setViewport(detailTargetConfig.viewport);
case 14:
response = null;
notError = true;
_context18.prev = 16;
if (!detailTargetConfig.proxyUrl) {
_context18.next = 22;
break;
}
_context18.next = 20;
return browser.createBrowserContext({
proxyServer: detailTargetConfig.proxyUrl
});
case 20:
_context18.next = 24;
break;
case 22:
_context18.next = 24;
return browser.createBrowserContext({
proxyServer: undefined
});
case 24:
if (!detailTargetConfig.cookies) {
_context18.next = 30;
break