@crawlee/core
Version:
The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.
739 lines • 31.8 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.RequestList = exports.REQUESTS_PERSISTENCE_KEY = exports.STATE_PERSISTENCE_KEY = void 0;
const tslib_1 = require("tslib");
const utils_1 = require("@crawlee/utils");
const ow_1 = tslib_1.__importStar(require("ow"));
const configuration_1 = require("../configuration");
const log_1 = require("../log");
const request_1 = require("../request");
const serialization_1 = require("../serialization");
const key_value_store_1 = require("./key_value_store");
const utils_2 = require("./utils");
/** @internal */
exports.STATE_PERSISTENCE_KEY = 'REQUEST_LIST_STATE';
/** @internal */
exports.REQUESTS_PERSISTENCE_KEY = 'REQUEST_LIST_REQUESTS';
const CONTENT_TYPE_BINARY = 'application/octet-stream';
/**
* Represents a static list of URLs to crawl.
* The URLs can be provided either in code or parsed from a text file hosted on the web.
* `RequestList` is used by {@link BasicCrawler}, {@link CheerioCrawler}, {@link PuppeteerCrawler}
* and {@link PlaywrightCrawler} as a source of URLs to crawl.
*
* Each URL is represented using an instance of the {@link Request} class.
* The list can only contain unique URLs. More precisely, it can only contain `Request` instances
* with distinct `uniqueKey` properties. By default, `uniqueKey` is generated from the URL, but it can also be overridden.
* To add a single URL to the list multiple times, corresponding {@link Request} objects will need to have different
* `uniqueKey` properties. You can use the `keepDuplicateUrls` option to do this for you when initializing the
* `RequestList` from sources.
*
* `RequestList` doesn't have a public constructor, you need to create it with the asynchronous {@link RequestList.open} function. After
* the request list is created, no more URLs can be added to it.
* Unlike {@link RequestQueue}, `RequestList` is static but it can contain even millions of URLs.
* > Note that `RequestList` can be used together with `RequestQueue` by the same crawler.
* > In such cases, each request from `RequestList` is enqueued into `RequestQueue` first and then consumed from the latter.
* > This is necessary to avoid the same URL being processed more than once (from the list first and then possibly from the queue).
* > In practical terms, such a combination can be useful when there is a large number of initial URLs,
* > but more URLs would be added dynamically by the crawler.
*
* `RequestList` has an internal state where it stores information about which requests were already handled,
* which are in progress and which were reclaimed. The state may be automatically persisted to the default
* {@link KeyValueStore} by setting the `persistStateKey` option so that if the Node.js process is restarted,
* the crawling can continue where it left off. The automated persisting is launched upon receiving the `persistState`
* event that is periodically emitted by {@link EventManager}.
*
* The internal state is closely tied to the provided sources (URLs). If the sources change on crawler restart, the state will become corrupted and
* `RequestList` will raise an exception. This typically happens when the sources is a list of URLs downloaded from the web.
* In such case, use the `persistRequestsKey` option in conjunction with `persistStateKey`,
* to make the `RequestList` store the initial sources to the default key-value store and load them after restart,
* which will prevent any issues that a live list of URLs might cause.
*
* **Basic usage:**
* ```javascript
* const requestList = await RequestList.open('my-request-list', [
* 'http://www.example.com/page-1',
* { url: 'http://www.example.com/page-2', method: 'POST', userData: { foo: 'bar' }},
* { requestsFromUrl: 'http://www.example.com/my-url-list.txt', userData: { isFromUrl: true } },
* ]);
* ```
*
* **Advanced usage:**
* ```javascript
* const requestList = await RequestList.open(null, [
* // Separate requests
* { url: 'http://www.example.com/page-1', method: 'GET', headers: { ... } },
* { url: 'http://www.example.com/page-2', userData: { foo: 'bar' }},
*
* // Bulk load of URLs from file `http://www.example.com/my-url-list.txt`
* // Note that all URLs must start with http:// or https://
* { requestsFromUrl: 'http://www.example.com/my-url-list.txt', userData: { isFromUrl: true } },
* ], {
* // Persist the state to avoid re-crawling which can lead to data duplications.
* // Keep in mind that the sources have to be immutable or this will throw an error.
* persistStateKey: 'my-state',
* });
* ```
* @category Sources
*/
class RequestList {
/**
* To create new instance of `RequestList` we need to use `RequestList.open()` factory method.
* @param options All `RequestList` configuration options
* @internal
*/
constructor(options = {}) {
Object.defineProperty(this, "log", {
enumerable: true,
configurable: true,
writable: true,
value: log_1.log.child({ prefix: 'RequestList' })
});
/**
* Array of all requests from all sources, in the order as they appeared in sources.
* All requests in the array have distinct uniqueKey!
* @internal
*/
Object.defineProperty(this, "requests", {
enumerable: true,
configurable: true,
writable: true,
value: []
});
/** Index to the next item in requests array to fetch. All previous requests are either handled or in progress. */
Object.defineProperty(this, "nextIndex", {
enumerable: true,
configurable: true,
writable: true,
value: 0
});
/** Dictionary, key is Request.uniqueKey, value is corresponding index in the requests array. */
Object.defineProperty(this, "uniqueKeyToIndex", {
enumerable: true,
configurable: true,
writable: true,
value: {}
});
/**
* Set of `uniqueKey`s of requests that were returned by fetchNextRequest().
* @internal
*/
Object.defineProperty(this, "inProgress", {
enumerable: true,
configurable: true,
writable: true,
value: new Set()
});
/**
* Set of `uniqueKey`s of requests for which reclaimRequest() was called.
* @internal
*/
Object.defineProperty(this, "reclaimed", {
enumerable: true,
configurable: true,
writable: true,
value: new Set()
});
/**
* Starts as true because until we handle the first request, the list is effectively persisted by doing nothing.
* @internal
*/
Object.defineProperty(this, "isStatePersisted", {
enumerable: true,
configurable: true,
writable: true,
value: true
});
/**
* Starts as false because we don't know yet and sources might change in the meantime (eg. download from live list).
* @internal
*/
Object.defineProperty(this, "areRequestsPersisted", {
enumerable: true,
configurable: true,
writable: true,
value: false
});
Object.defineProperty(this, "isLoading", {
enumerable: true,
configurable: true,
writable: true,
value: false
});
Object.defineProperty(this, "isInitialized", {
enumerable: true,
configurable: true,
writable: true,
value: false
});
Object.defineProperty(this, "persistStateKey", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "persistRequestsKey", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "initialState", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "store", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "keepDuplicateUrls", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "sources", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "sourcesFunction", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "proxyConfiguration", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "events", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
const { sources, sourcesFunction, persistStateKey, persistRequestsKey, state, proxyConfiguration, keepDuplicateUrls = false, config = configuration_1.Configuration.getGlobalConfig(), } = options;
if (!(sources || sourcesFunction)) {
throw new ow_1.ArgumentError('At least one of "sources" or "sourcesFunction" must be provided.', this.constructor);
}
(0, ow_1.default)(options, ow_1.default.object.exactShape({
sources: ow_1.default.optional.array, // check only for array and not subtypes to avoid iteration over the whole thing
sourcesFunction: ow_1.default.optional.function,
persistStateKey: ow_1.default.optional.string,
persistRequestsKey: ow_1.default.optional.string,
state: ow_1.default.optional.object.exactShape({
nextIndex: ow_1.default.number,
nextUniqueKey: ow_1.default.string,
inProgress: ow_1.default.object,
}),
keepDuplicateUrls: ow_1.default.optional.boolean,
proxyConfiguration: ow_1.default.optional.object,
}));
this.persistStateKey = persistStateKey ? `SDK_${persistStateKey}` : persistStateKey;
this.persistRequestsKey = persistRequestsKey ? `SDK_${persistRequestsKey}` : persistRequestsKey;
this.initialState = state;
this.events = config.getEventManager();
// If this option is set then all requests will get a pre-generated unique ID and duplicate URLs will be kept in the list.
this.keepDuplicateUrls = keepDuplicateUrls;
// Will be empty after initialization to save memory.
this.sources = sources ? [...sources] : [];
this.sourcesFunction = sourcesFunction;
// The proxy configuration used for `requestsFromUrl` requests.
this.proxyConfiguration = proxyConfiguration;
}
/**
* Loads all remote sources of URLs and potentially starts periodic state persistence.
* This function must be called before you can start using the instance in a meaningful way.
*/
async initialize() {
if (this.isLoading) {
throw new Error('RequestList sources are already loading or were loaded.');
}
this.isLoading = true;
await (0, utils_2.purgeDefaultStorages)({ onlyPurgeOnce: true });
const [state, persistedRequests] = await this._loadStateAndPersistedRequests();
// Add persisted requests / new sources in a memory efficient way because with very
// large lists, we were running out of memory.
if (persistedRequests) {
await this._addPersistedRequests(persistedRequests);
}
else {
await this._addRequestsFromSources();
}
this._restoreState(state);
this.isInitialized = true;
if (this.persistRequestsKey && !this.areRequestsPersisted)
await this._persistRequests();
if (this.persistStateKey) {
this.events.on("persistState" /* EventType.PERSIST_STATE */, this.persistState.bind(this));
}
return this;
}
/**
* Adds previously persisted Requests, as retrieved from the key-value store.
* This needs to be done in a memory efficient way. We should update the input
* to a Stream once apify-client supports streams.
*/
async _addPersistedRequests(persistedRequests) {
// We don't need the sources so we purge them to
// prevent them from hanging in memory.
for (let i = 0; i < this.sources.length; i++) {
delete this.sources[i];
}
this.sources = [];
this.areRequestsPersisted = true;
const requestStream = (0, serialization_1.createDeserialize)(persistedRequests);
for await (const request of requestStream) {
this._addRequest(request);
}
}
/**
* Add Requests from both options.sources and options.sourcesFunction.
* This function is called only when persisted sources were not loaded.
* We need to avoid keeping both sources and requests in memory
* to reduce memory footprint with very large sources.
*/
async _addRequestsFromSources() {
// We'll load all sources in sequence to ensure that they get loaded in the right order.
const sourcesCount = this.sources.length;
for (let i = 0; i < sourcesCount; i++) {
const source = this.sources[i];
// Using delete here to drop the original object ASAP to free memory
// .pop would reverse the array and .shift is SLOW.
delete this.sources[i];
if (typeof source === 'object' && source.requestsFromUrl) {
const fetchedRequests = await this._fetchRequestsFromUrl(source);
await this._addFetchedRequests(source, fetchedRequests);
}
else {
this._addRequest(source);
}
}
// Drop the original array full of empty indexes.
this.sources = [];
if (this.sourcesFunction) {
try {
const sourcesFromFunction = await this.sourcesFunction();
const sourcesFromFunctionCount = sourcesFromFunction.length;
for (let i = 0; i < sourcesFromFunctionCount; i++) {
const source = sourcesFromFunction.shift();
this._addRequest(source);
}
}
catch (e) {
const err = e;
throw new Error(`Loading requests with sourcesFunction failed.\nCause: ${err.message}`);
}
}
}
/**
* @inheritDoc
*/
async persistState() {
if (!this.persistStateKey) {
throw new Error('Cannot persist state. options.persistStateKey is not set.');
}
if (this.isStatePersisted)
return;
try {
this.store ?? (this.store = await key_value_store_1.KeyValueStore.open());
await this.store.setValue(this.persistStateKey, this.getState());
this.isStatePersisted = true;
}
catch (e) {
const err = e;
this.log.exception(err, 'Attempted to persist state, but failed.');
}
}
/**
* Unlike persistState(), this is used only internally, since the sources
* are automatically persisted at RequestList initialization (if the persistRequestsKey is set),
* but there's no reason to persist it again afterwards, because RequestList is immutable.
*/
async _persistRequests() {
const serializedRequests = await (0, serialization_1.serializeArray)(this.requests);
this.store ?? (this.store = await key_value_store_1.KeyValueStore.open());
await this.store.setValue(this.persistRequestsKey, serializedRequests, { contentType: CONTENT_TYPE_BINARY });
this.areRequestsPersisted = true;
}
/**
* Restores RequestList state from a state object.
*/
_restoreState(state) {
// If there's no state it means we've not persisted any (yet).
if (!state)
return;
// Restore previous state.
if (typeof state.nextIndex !== 'number' || state.nextIndex < 0) {
throw new Error('The state object is invalid: nextIndex must be a non-negative number.');
}
if (state.nextIndex > this.requests.length) {
throw new Error('The state object is not consistent with RequestList, too few requests loaded.');
}
if (state.nextIndex < this.requests.length &&
this.requests[state.nextIndex].uniqueKey !== state.nextUniqueKey) {
throw new Error('The state object is not consistent with RequestList the order of URLs seems to have changed.');
}
const deleteFromInProgress = [];
state.inProgress.forEach((uniqueKey) => {
const index = this.uniqueKeyToIndex[uniqueKey];
if (typeof index !== 'number') {
throw new Error('The state object is not consistent with RequestList. Unknown uniqueKey is present in the state.');
}
if (index >= state.nextIndex) {
deleteFromInProgress.push(uniqueKey);
}
});
this.nextIndex = state.nextIndex;
this.inProgress = new Set(state.inProgress);
// WORKAROUND:
// It happened to some users that state object contained something like:
// {
// "nextIndex": 11308,
// "nextUniqueKey": "https://www.anychart.com",
// "inProgress": {
// "https://www.ams360.com": true,
// ...
// "https://www.anychart.com": true,
// }
// Which then caused error "The request is not being processed (uniqueKey: https://www.anychart.com)"
// As a workaround, we just remove all inProgress requests whose index >= nextIndex,
// since they will be crawled again.
if (deleteFromInProgress.length) {
this.log.warning("RequestList's in-progress field is not consistent, skipping invalid in-progress entries", {
deleteFromInProgress,
});
for (const uniqueKey of deleteFromInProgress) {
this.inProgress.delete(uniqueKey);
}
}
// All in-progress requests need to be re-crawled
this.reclaimed = new Set(this.inProgress);
}
/**
* Attempts to load state and requests using the `RequestList` configuration
* and returns a tuple of [state, requests] where each may be null if not loaded.
*/
async _loadStateAndPersistedRequests() {
let state;
let persistedRequests;
if (this.initialState) {
state = this.initialState;
this.log.debug('Loaded state from options.state argument.');
}
else if (this.persistStateKey) {
state = await this._getPersistedState(this.persistStateKey);
if (state)
this.log.debug('Loaded state from key value store using the persistStateKey.');
}
if (this.persistRequestsKey) {
persistedRequests = await this._getPersistedState(this.persistRequestsKey);
if (persistedRequests)
this.log.debug('Loaded requests from key value store using the persistRequestsKey.');
}
return [state, persistedRequests];
}
/**
* Returns an object representing the internal state of the `RequestList` instance.
* Note that the object's fields can change in future releases.
*/
getState() {
this._ensureIsInitialized();
return {
nextIndex: this.nextIndex,
nextUniqueKey: this.nextIndex < this.requests.length ? this.requests[this.nextIndex].uniqueKey : null,
inProgress: [...this.inProgress],
};
}
/**
* @inheritDoc
*/
async isEmpty() {
this._ensureIsInitialized();
return this.reclaimed.size === 0 && this.nextIndex >= this.requests.length;
}
/**
* @inheritDoc
*/
async isFinished() {
this._ensureIsInitialized();
return this.inProgress.size === 0 && this.nextIndex >= this.requests.length;
}
/**
* @inheritDoc
*/
async fetchNextRequest() {
this._ensureIsInitialized();
// First return reclaimed requests if any.
const uniqueKey = this.reclaimed.values().next().value;
if (uniqueKey) {
this.reclaimed.delete(uniqueKey);
const index = this.uniqueKeyToIndex[uniqueKey];
return this.ensureRequest(this.requests[index], index);
}
// Otherwise return next request.
if (this.nextIndex < this.requests.length) {
const index = this.nextIndex;
const request = this.requests[index];
this.inProgress.add(request.uniqueKey);
this.nextIndex++;
this.isStatePersisted = false;
return this.ensureRequest(request, index);
}
return null;
}
/**
* @inheritDoc
*/
async *[Symbol.asyncIterator]() {
while (true) {
const req = await this.fetchNextRequest();
if (!req)
break;
yield req;
}
}
ensureRequest(requestLike, index) {
if (requestLike instanceof request_1.Request) {
return requestLike;
}
this.requests[index] = new request_1.Request(requestLike);
return this.requests[index];
}
/**
* @inheritDoc
*/
async markRequestHandled(request) {
const { uniqueKey } = request;
this._ensureUniqueKeyValid(uniqueKey);
this._ensureInProgressAndNotReclaimed(uniqueKey);
this._ensureIsInitialized();
this.inProgress.delete(uniqueKey);
this.isStatePersisted = false;
}
/**
* @inheritDoc
*/
async reclaimRequest(request) {
const { uniqueKey } = request;
this._ensureUniqueKeyValid(uniqueKey);
this._ensureInProgressAndNotReclaimed(uniqueKey);
this._ensureIsInitialized();
this.reclaimed.add(uniqueKey);
}
/**
* Adds all fetched requests from a URL from a remote resource.
*/
async _addFetchedRequests(source, fetchedRequests) {
const { requestsFromUrl, regex } = source;
const originalLength = this.requests.length;
fetchedRequests.forEach((request) => this._addRequest(request));
const fetchedCount = fetchedRequests.length;
const importedCount = this.requests.length - originalLength;
this.log.info('Fetched and loaded Requests from a remote resource.', {
requestsFromUrl,
regex,
fetchedCount,
importedCount,
duplicateCount: fetchedCount - importedCount,
sample: JSON.stringify(fetchedRequests.slice(0, 5)),
});
}
async _getPersistedState(key) {
this.store ?? (this.store = await key_value_store_1.KeyValueStore.open());
const state = await this.store.getValue(key);
return state;
}
/**
* Fetches URLs from requestsFromUrl and returns them in format of list of requests
*/
async _fetchRequestsFromUrl(source) {
const { requestsFromUrl, regex, ...sharedOpts } = source;
// Download remote resource and parse URLs.
let urlsArr;
try {
urlsArr = await this._downloadListOfUrls({
url: requestsFromUrl,
urlRegExp: regex,
proxyUrl: await this.proxyConfiguration?.newUrl(),
});
}
catch (err) {
throw new Error(`Cannot fetch a request list from ${requestsFromUrl}: ${err}`);
}
// Skip if resource contained no URLs.
if (!urlsArr.length) {
this.log.warning('The fetched list contains no valid URLs.', { requestsFromUrl, regex });
return [];
}
return urlsArr.map((url) => ({ url, ...sharedOpts }));
}
/**
* Adds given request.
* If the `source` parameter is a string or plain object and not an instance
* of a `Request`, then the function creates a `Request` instance.
*/
_addRequest(source) {
let request;
const type = typeof source;
if (type === 'string') {
request = { url: source };
}
else if (source instanceof request_1.Request) {
request = source;
}
else if (source && type === 'object') {
request = source;
}
else {
throw new Error(`Cannot create Request from type: ${type}`);
}
const hasUniqueKey = Reflect.has(Object(source), 'uniqueKey');
request.uniqueKey ?? (request.uniqueKey = request_1.Request.computeUniqueKey(request));
// Add index to uniqueKey if duplicates are to be kept
if (this.keepDuplicateUrls && !hasUniqueKey) {
request.uniqueKey += `-${this.requests.length}`;
}
const { uniqueKey } = request;
this._ensureUniqueKeyValid(uniqueKey);
// Skip requests with duplicate uniqueKey
if (!Object.hasOwn(this.uniqueKeyToIndex, uniqueKey)) {
this.uniqueKeyToIndex[uniqueKey] = this.requests.length;
this.requests.push(request);
}
else if (this.keepDuplicateUrls) {
this.log.warning(`Duplicate uniqueKey: ${uniqueKey} found while the keepDuplicateUrls option was set. Check your sources' unique keys.`);
}
}
/**
* Helper function that validates unique key.
* Throws an error if uniqueKey is not a non-empty string.
*/
_ensureUniqueKeyValid(uniqueKey) {
if (typeof uniqueKey !== 'string' || !uniqueKey) {
throw new Error("Request object's uniqueKey must be a non-empty string");
}
}
/**
* Checks that request is not reclaimed and throws an error if so.
*/
_ensureInProgressAndNotReclaimed(uniqueKey) {
if (!this.inProgress.has(uniqueKey)) {
throw new Error(`The request is not being processed (uniqueKey: ${uniqueKey})`);
}
if (this.reclaimed.has(uniqueKey)) {
throw new Error(`The request was already reclaimed (uniqueKey: ${uniqueKey})`);
}
}
/**
* Throws an error if request list wasn't initialized.
*/
_ensureIsInitialized() {
if (!this.isInitialized) {
throw new Error('RequestList is not initialized; you must call "await requestList.initialize()" before using it!');
}
}
/**
* Returns the total number of unique requests present in the `RequestList`.
*/
length() {
this._ensureIsInitialized();
return this.requests.length;
}
/**
* @inheritDoc
*/
handledCount() {
this._ensureIsInitialized();
return this.nextIndex - this.inProgress.size;
}
/**
* Opens a request list and returns a promise resolving to an instance
* of the {@link RequestList} class that is already initialized.
*
* {@link RequestList} represents a list of URLs to crawl, which is always stored in memory.
* To enable picking up where left off after a process restart, the request list sources
* are persisted to the key-value store at initialization of the list. Then, while crawling,
* a small state object is regularly persisted to keep track of the crawling status.
*
* For more details and code examples, see the {@link RequestList} class.
*
* **Example usage:**
*
* ```javascript
* const sources = [
* 'https://www.example.com',
* 'https://www.google.com',
* 'https://www.bing.com'
* ];
*
* const requestList = await RequestList.open('my-name', sources);
* ```
*
* @param listNameOrOptions
* Name of the request list to be opened, or the options object. Setting a name enables the `RequestList`'s
* state to be persisted in the key-value store. This is useful in case of a restart or migration. Since `RequestList`
* is only stored in memory, a restart or migration wipes it clean. Setting a name will enable the `RequestList`'s
* state to survive those situations and continue where it left off.
*
* The name will be used as a prefix in key-value store, producing keys such as `NAME-REQUEST_LIST_STATE`
* and `NAME-REQUEST_LIST_SOURCES`.
*
* If `null`, the list will not be persisted and will only be stored in memory. Process restart
* will then cause the list to be crawled again from the beginning. We suggest always using a name.
* @param [sources]
* An array of sources of URLs for the {@link RequestList}. It can be either an array of strings,
* plain objects that define at least the `url` property, or an array of {@link Request} instances.
*
* **IMPORTANT:** The `sources` array will be consumed (left empty) after {@link RequestList} initializes.
* This is a measure to prevent memory leaks in situations when millions of sources are
* added.
*
* Additionally, the `requestsFromUrl` property may be used instead of `url`,
* which will instruct {@link RequestList} to download the source URLs from a given remote location.
* The URLs will be parsed from the received response. In this case you can limit the URLs
* using `regex` parameter containing regular expression pattern for URLs to be included.
*
* For details, see the {@link RequestListOptions.sources}
* @param [options]
* The {@link RequestList} options. Note that the `listName` parameter supersedes
* the {@link RequestListOptions.persistStateKey} and {@link RequestListOptions.persistRequestsKey}
* options and the `sources` parameter supersedes the {@link RequestListOptions.sources} option.
*/
static async open(listNameOrOptions, sources, options = {}) {
if (listNameOrOptions != null && typeof listNameOrOptions === 'object') {
options = { ...listNameOrOptions, ...options };
const rl = new RequestList(options);
await rl.initialize();
return rl;
}
const listName = listNameOrOptions;
(0, ow_1.default)(listName, ow_1.default.optional.any(ow_1.default.string, ow_1.default.null));
(0, ow_1.default)(sources, ow_1.default.array);
(0, ow_1.default)(options, ow_1.default.object.is((v) => !Array.isArray(v)));
const rl = new RequestList({
...options,
persistStateKey: listName ? `${listName}-${exports.STATE_PERSISTENCE_KEY}` : options.persistStateKey,
persistRequestsKey: listName ? `${listName}-${exports.REQUESTS_PERSISTENCE_KEY}` : options.persistRequestsKey,
sources: sources ?? options.sources,
});
await rl.initialize();
return rl;
}
/**
* @internal wraps public utility for mocking purposes
*/
async _downloadListOfUrls(options) {
return (0, utils_1.downloadListOfUrls)(options);
}
}
exports.RequestList = RequestList;
//# sourceMappingURL=request_list.js.map