UNPKG

@crawlee/core

Version:

The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.

172 lines 6.69 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.RequestHandlerResult = void 0; const storages_1 = require("../storages"); /** * A partial implementation of {@link RestrictedCrawlingContext} that stores parameters of calls to context methods for later inspection. * * @experimental */ class RequestHandlerResult { constructor(config, crawleeStateKey) { Object.defineProperty(this, "config", { enumerable: true, configurable: true, writable: true, value: config }); Object.defineProperty(this, "crawleeStateKey", { enumerable: true, configurable: true, writable: true, value: crawleeStateKey }); Object.defineProperty(this, "_keyValueStoreChanges", { enumerable: true, configurable: true, writable: true, value: {} }); Object.defineProperty(this, "pushDataCalls", { enumerable: true, configurable: true, writable: true, value: [] }); Object.defineProperty(this, "addRequestsCalls", { enumerable: true, configurable: true, writable: true, value: [] }); Object.defineProperty(this, "pushData", { enumerable: true, configurable: true, writable: true, value: async (data, datasetIdOrName) => { this.pushDataCalls.push([data, datasetIdOrName]); } }); Object.defineProperty(this, "addRequests", { enumerable: true, configurable: true, writable: true, value: async (requests, options = {}) => { this.addRequestsCalls.push([requests, options]); } }); Object.defineProperty(this, "useState", { enumerable: true, configurable: true, writable: true, value: async (defaultValue) => { const store = await this.getKeyValueStore(undefined); return await store.getAutoSavedValue(this.crawleeStateKey, defaultValue); } }); Object.defineProperty(this, "getKeyValueStore", { enumerable: true, configurable: true, writable: true, value: async (idOrName) => { const store = await storages_1.KeyValueStore.open(idOrName, { config: this.config }); return { id: this.idOrDefault(idOrName), name: idOrName, getValue: async (key) => this.getKeyValueStoreChangedValue(idOrName, key) ?? (await store.getValue(key)), setValue: async (key, value, options) => { this.setKeyValueStoreChangedValue(idOrName, key, value, options); }, getAutoSavedValue: store.getAutoSavedValue.bind(store), getPublicUrl: store.getPublicUrl.bind(store), }; } }); Object.defineProperty(this, "idOrDefault", { enumerable: true, configurable: true, writable: true, value: (idOrName) => idOrName ?? this.config.get('defaultKeyValueStoreId') }); Object.defineProperty(this, "getKeyValueStoreChangedValue", { enumerable: true, configurable: true, writable: true, value: (idOrName, key) => { var _a; const id = this.idOrDefault(idOrName); (_a = this._keyValueStoreChanges)[id] ?? (_a[id] = {}); return this.keyValueStoreChanges[id][key]?.changedValue ?? null; } }); Object.defineProperty(this, "setKeyValueStoreChangedValue", { enumerable: true, configurable: true, writable: true, value: (idOrName, key, changedValue, options) => { var _a; const id = this.idOrDefault(idOrName); (_a = this._keyValueStoreChanges)[id] ?? (_a[id] = {}); this._keyValueStoreChanges[id][key] = { changedValue, options }; } }); } /** * A record of calls to {@link RestrictedCrawlingContext.pushData}, {@link RestrictedCrawlingContext.addRequests}, {@link RestrictedCrawlingContext.enqueueLinks} made by a request handler. */ get calls() { return { pushData: this.pushDataCalls, addRequests: this.addRequestsCalls, }; } /** * A record of changes made to key-value stores by a request handler. */ get keyValueStoreChanges() { return this._keyValueStoreChanges; } /** * Items added to datasets by a request handler. */ get datasetItems() { return this.pushDataCalls.flatMap(([data, datasetIdOrName]) => (Array.isArray(data) ? data : [data]).map((item) => ({ item, datasetIdOrName }))); } /** * URLs enqueued to the request queue by a request handler, either via {@link RestrictedCrawlingContext.addRequests} or {@link RestrictedCrawlingContext.enqueueLinks} */ get enqueuedUrls() { const result = []; for (const [requests] of this.addRequestsCalls) { for (const request of requests) { if (typeof request === 'object' && (!('requestsFromUrl' in request) || request.requestsFromUrl !== undefined) && request.url !== undefined) { result.push({ url: request.url, label: request.label }); } else if (typeof request === 'string') { result.push({ url: request }); } } } return result; } /** * URL lists enqueued to the request queue by a request handler via {@link RestrictedCrawlingContext.addRequests} using the `requestsFromUrl` option. */ get enqueuedUrlLists() { const result = []; for (const [requests] of this.addRequestsCalls) { for (const request of requests) { if (typeof request === 'object' && 'requestsFromUrl' in request && request.requestsFromUrl !== undefined) { result.push({ listUrl: request.requestsFromUrl, label: request.label }); } } } return result; } } exports.RequestHandlerResult = RequestHandlerResult; //# sourceMappingURL=crawler_commons.js.map