UNPKG

@crawlee/core

Version:

The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.

79 lines 3.48 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.MAX_QUERIES_FOR_CONSISTENCY = exports.API_PROCESSED_REQUESTS_DELAY_MILLIS = exports.QUERY_HEAD_BUFFER = exports.STORAGE_CONSISTENCY_DELAY_MILLIS = exports.QUERY_HEAD_MIN_LENGTH = void 0; exports.purgeDefaultStorages = purgeDefaultStorages; exports.useState = useState; exports.getRequestId = getRequestId; const tslib_1 = require("tslib"); const node_crypto_1 = tslib_1.__importDefault(require("node:crypto")); const configuration_1 = require("../configuration"); const key_value_store_1 = require("./key_value_store"); async function purgeDefaultStorages(configOrOptions, client) { const options = configOrOptions instanceof configuration_1.Configuration ? { client, config: configOrOptions, } : (configOrOptions ?? {}); const { config = configuration_1.Configuration.getGlobalConfig(), onlyPurgeOnce = false } = options; ({ client = config.getStorageClient() } = options); const casted = client; // if `onlyPurgeOnce` is true, will purge anytime this function is called, otherwise - only on start if (!onlyPurgeOnce || (config.get('purgeOnStart') && !casted.__purged)) { casted.__purged = true; await casted.purge?.(); } } /** * Easily create and manage state values. All state values are automatically persisted. * * Values can be modified by simply using the assignment operator. * * @param name The name of the store to use. * @param defaultValue If the store does not yet have a value in it, the value will be initialized with the `defaultValue` you provide. * @param options An optional object parameter where a custom `keyValueStoreName` and `config` can be passed in. */ async function useState(name, defaultValue = {}, options) { const kvStore = await key_value_store_1.KeyValueStore.open(options?.keyValueStoreName, { config: options?.config || configuration_1.Configuration.getGlobalConfig(), }); return kvStore.getAutoSavedValue(name || 'CRAWLEE_GLOBAL_STATE', defaultValue); } /** * Helper function that creates ID from uniqueKey for local emulation of request queue. * It's also used for local cache of remote request queue. * * This function may not exactly match how requestId is created server side. * So we never pass requestId created by this to server and use it only for local cache. * * @internal */ function getRequestId(uniqueKey) { const str = node_crypto_1.default.createHash('sha256').update(uniqueKey).digest('base64').replace(/[+/=]/g, ''); return str.slice(0, 15); } /** * When requesting queue head we always fetch requestsInProgressCount * QUERY_HEAD_BUFFER number of requests. * @internal */ exports.QUERY_HEAD_MIN_LENGTH = 100; /** * Indicates how long it usually takes for the underlying storage to propagate all writes * to be available to subsequent reads. * @internal */ exports.STORAGE_CONSISTENCY_DELAY_MILLIS = 3000; /** @internal */ exports.QUERY_HEAD_BUFFER = 3; /** * If queue was modified (request added/updated/deleted) before more than API_PROCESSED_REQUESTS_DELAY_MILLIS * then we assume the get head operation to be consistent. * @internal */ exports.API_PROCESSED_REQUESTS_DELAY_MILLIS = 10000; /** * How many times we try to get queue head with queueModifiedAt older than API_PROCESSED_REQUESTS_DELAY_MILLIS. * @internal */ exports.MAX_QUERIES_FOR_CONSISTENCY = 6; //# sourceMappingURL=utils.js.map