UNPKG

ayakashi

Version:

The next generation web scraping framework

126 lines (125 loc) 6.27 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); const core_1 = __importDefault(require("@ayakashi/request/core")); const path_1 = require("path"); const pipeproc_1 = require("pipeproc"); const apiPrelude_1 = require("../prelude/apiPrelude"); const yield_1 = require("../prelude/actions/yield"); const request_1 = require("../prelude/actions/request"); const cookies_1 = require("../prelude/actions/cookies"); const opLog_1 = require("../opLog/opLog"); const client_1 = require("../bridge/client"); const cookies_2 = require("./cookies"); const debug_1 = __importDefault(require("debug")); const d = debug_1.default("ayakashi:apiScraperWrapper"); function apiScraperWrapper(log) { return __awaiter(this, void 0, void 0, function* () { try { const opLog = opLog_1.getOpLog(); opLog.info("running apiScraper", log.body.module); const bridgeClient = client_1.getBridgeClient(log.body.connectionConfig.bridgePort); const ayakashiInstance = apiPrelude_1.apiPrelude(); //user-agent setup const userAgentData = yield bridgeClient.getUserAgentData({ agent: (log.body.config.emulatorOptions && log.body.config.emulatorOptions.userAgent) || undefined, platform: (log.body.config.emulatorOptions && log.body.config.emulatorOptions.platform) || undefined, persistentSession: log.body.persistentSession }); if (!userAgentData) { throw new Error("could not generate userAgent"); } const acceptLanguage = (log.body.config.emulatorOptions && log.body.config.emulatorOptions.acceptLanguage) || "en-US"; //get cookie jar const { jar } = yield cookies_2.getCookieJar(log.body.connectionConfig.bridgePort, { persistentSession: log.body.persistentSession }); //attach the request API const myRequest = core_1.default.defaults({ headers: { "User-Agent": userAgentData.userAgent, //tslint:disable max-line-length Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3", //tslint:enable max-line-length "accept-language": acceptLanguage, "cache-control": "no-cache", pragma: "no-cache" }, proxy: log.body.proxyUrl || undefined, strictSSL: !log.body.ignoreCertificateErrors, gzipOrBrotli: true, timeout: 10000, jar: jar }); function cookieSync() { return __awaiter(this, void 0, void 0, function* () { //sync request cookies with the persistent store yield cookies_2.updateCookieJar(log.body.connectionConfig.bridgePort, jar, { persistentSession: log.body.persistentSession }); }); } request_1.attachRequest(ayakashiInstance, myRequest, cookieSync); cookies_1.attachCookieActions(ayakashiInstance, jar, null, cookieSync); //connect to pipeproc const pipeprocClient = pipeproc_1.PipeProc(); yield pipeprocClient.connect({ socket: `ipc://${path_1.resolve(log.body.storeProjectFolder, "ipc")}` }); //attach the yield methods const yieldWatcher = { yieldedAtLeastOnce: false }; yield_1.attachYields(ayakashiInstance, pipeprocClient, log.body.saveTopic, log.body.selfTopic, yieldWatcher); let scraperModule; try { if (log.body.config.simple) { scraperModule = require(path_1.resolve(log.body.projectFolder, log.body.module)); } else { scraperModule = require(path_1.resolve(log.body.projectFolder, "scrapers", log.body.module)); } if (typeof scraperModule !== "function") { scraperModule = scraperModule.default; } if (typeof scraperModule !== "function") { throw new Error(`Scraper <${log.body.module}> is not a function`); } } catch (e) { opLog.error(e.message); throw e; } //run the scraper let result; try { //@ts-ignore if (log.body.input && log.body.input.value && log.body.input.value.continue === true) delete log.body.input.value.continue; result = yield scraperModule(ayakashiInstance, log.body.input.value || {}, log.body.params || {}); } catch (e) { opLog.error(`There was an error while running scraper <${log.body.module}> -`, e.message, e.stack); throw e; } if (result) { yield ayakashiInstance.yield(result); } if (!result && !yieldWatcher.yieldedAtLeastOnce) { yield ayakashiInstance.yield({ continue: true }); } } catch (e) { d(e); throw e; } }); } exports.default = apiScraperWrapper;