UNPKG

@crawlee/core

Version:

The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.

143 lines 4.78 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.RequestManagerTandem = void 0; const log_1 = require("../log"); /** * A request manager that combines a RequestList and a RequestQueue. * It first reads requests from the RequestList and then, when needed, * transfers them in batches to the RequestQueue. */ class RequestManagerTandem { constructor(requestList, requestQueue) { Object.defineProperty(this, "log", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "requestList", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "requestQueue", { enumerable: true, configurable: true, writable: true, value: void 0 }); this.log = log_1.log.child({ prefix: 'RequestManagerTandem' }); this.requestList = requestList; this.requestQueue = requestQueue; } /** * Transfers a batch of requests from the RequestList to the RequestQueue. * Handles both successful transfers and failures appropriately. * @private */ async transferNextBatchToQueue() { const request = await this.requestList.fetchNextRequest(); if (request === null) { return; } try { await this.requestQueue.addRequest(request, { forefront: true }); } catch (error) { // If requestQueue.addRequest() fails here then we must reclaim it back to // the RequestList because probably it's not yet in the queue! this.log.error('Adding of request from the RequestList to the RequestQueue failed, reclaiming request back to the list.', { request }); await this.requestList.reclaimRequest(request); return; } await this.requestList.markRequestHandled(request); } /** * Fetches the next request from the RequestQueue. If the queue is empty and the RequestList * is not finished, it will transfer a batch of requests from the list to the queue first. * @inheritdoc */ async fetchNextRequest() { // First, try to transfer a request from the requestList const [listEmpty, listFinished] = await Promise.all([ this.requestList.isEmpty(), this.requestList.isFinished(), ]); if (!listEmpty && !listFinished) { await this.transferNextBatchToQueue(); } // Try to fetch from queue after potential transfer return this.requestQueue.fetchNextRequest(); } /** * @inheritdoc */ async isFinished() { const storagesFinished = await Promise.all([this.requestList.isFinished(), this.requestQueue.isFinished()]); return storagesFinished.every(Boolean); } /** * @inheritdoc */ async isEmpty() { const storagesEmpty = await Promise.all([this.requestList.isEmpty(), this.requestQueue.isEmpty()]); return storagesEmpty.every(Boolean); } /** * @inheritdoc */ async handledCount() { // Since one of the stores needs to have priority when both are present, we query the request queue - the request list will first be dumped into the queue and then left empty. return await this.requestQueue.handledCount(); } /** * @inheritdoc */ getTotalCount() { return this.requestQueue.getTotalCount(); } /** * @inheritdoc */ getPendingCount() { return this.requestQueue.getPendingCount() + this.requestList.length() - this.requestList.handledCount(); } /** * @inheritdoc */ async *[Symbol.asyncIterator]() { while (true) { const req = await this.fetchNextRequest(); if (!req) break; yield req; } } /** * @inheritdoc */ async markRequestHandled(request) { return this.requestQueue.markRequestHandled(request); } /** * @inheritdoc */ async reclaimRequest(request, options) { return await this.requestQueue.reclaimRequest(request, options); } /** * @inheritdoc */ async addRequest(requestLike, options) { return await this.requestQueue.addRequest(requestLike, options); } /** * @inheritdoc */ async addRequestsBatched(requests, options) { return await this.requestQueue.addRequestsBatched(requests, options); } } exports.RequestManagerTandem = RequestManagerTandem; //# sourceMappingURL=request_manager_tandem.js.map