@crawlee/core
Version:
The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.
93 lines • 4.41 kB
TypeScript
import type { Dictionary, StorageClient } from '@crawlee/types';
import { Configuration } from '../configuration';
/**
* Options for purging default storage.
*/
interface PurgeDefaultStorageOptions {
/**
* If set to `true`, calling multiple times will only have effect at the first time.
*/
onlyPurgeOnce?: boolean;
config?: Configuration;
client?: StorageClient;
}
/**
* Cleans up the local storage folder (defaults to `./storage`) created when running code locally.
* Purging will remove all the files in all storages except for INPUT.json in the default KV store.
*
* Purging of storages is happening automatically when we run our crawler (or when we open some storage
* explicitly, e.g. via `RequestList.open()`). We can disable that via `purgeOnStart` {@link Configuration}
* option or by setting `CRAWLEE_PURGE_ON_START` environment variable to `0` or `false`.
*
* This is a shortcut for running (optional) `purge` method on the StorageClient interface, in other words
* it will call the `purge` method of the underlying storage implementation we are currently using. You can
* make sure the storage is purged only once for a given execution context if you set `onlyPurgeOnce` to `true` in
* the `options` object
*/
export declare function purgeDefaultStorages(options?: PurgeDefaultStorageOptions): Promise<void>;
/**
* Cleans up the local storage folder (defaults to `./storage`) created when running code locally.
* Purging will remove all the files in all storages except for INPUT.json in the default KV store.
*
* Purging of storages is happening automatically when we run our crawler (or when we open some storage
* explicitly, e.g. via `RequestList.open()`). We can disable that via `purgeOnStart` {@link Configuration}
* option or by setting `CRAWLEE_PURGE_ON_START` environment variable to `0` or `false`.
*
* This is a shortcut for running (optional) `purge` method on the StorageClient interface, in other words
* it will call the `purge` method of the underlying storage implementation we are currently using.
*/
export declare function purgeDefaultStorages(config?: Configuration, client?: StorageClient): Promise<void>;
export interface UseStateOptions {
config?: Configuration;
/**
* The name of the key-value store you'd like the state to be stored in.
* If not provided, the default store will be used.
*/
keyValueStoreName?: string | null;
}
/**
* Easily create and manage state values. All state values are automatically persisted.
*
* Values can be modified by simply using the assignment operator.
*
* @param name The name of the store to use.
* @param defaultValue If the store does not yet have a value in it, the value will be initialized with the `defaultValue` you provide.
* @param options An optional object parameter where a custom `keyValueStoreName` and `config` can be passed in.
*/
export declare function useState<State extends Dictionary = Dictionary>(name?: string, defaultValue?: State, options?: UseStateOptions): Promise<State>;
/**
* Helper function that creates ID from uniqueKey for local emulation of request queue.
* It's also used for local cache of remote request queue.
*
* This function may not exactly match how requestId is created server side.
* So we never pass requestId created by this to server and use it only for local cache.
*
* @internal
*/
export declare function getRequestId(uniqueKey: string): string;
/**
* When requesting queue head we always fetch requestsInProgressCount * QUERY_HEAD_BUFFER number of requests.
* @internal
*/
export declare const QUERY_HEAD_MIN_LENGTH = 100;
/**
* Indicates how long it usually takes for the underlying storage to propagate all writes
* to be available to subsequent reads.
* @internal
*/
export declare const STORAGE_CONSISTENCY_DELAY_MILLIS = 3000;
/** @internal */
export declare const QUERY_HEAD_BUFFER = 3;
/**
* If queue was modified (request added/updated/deleted) before more than API_PROCESSED_REQUESTS_DELAY_MILLIS
* then we assume the get head operation to be consistent.
* @internal
*/
export declare const API_PROCESSED_REQUESTS_DELAY_MILLIS = 10000;
/**
* How many times we try to get queue head with queueModifiedAt older than API_PROCESSED_REQUESTS_DELAY_MILLIS.
* @internal
*/
export declare const MAX_QUERIES_FOR_CONSISTENCY = 6;
export {};
//# sourceMappingURL=utils.d.ts.map