@crawlee/core
Version:
The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.
79 lines • 3.48 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.MAX_QUERIES_FOR_CONSISTENCY = exports.API_PROCESSED_REQUESTS_DELAY_MILLIS = exports.QUERY_HEAD_BUFFER = exports.STORAGE_CONSISTENCY_DELAY_MILLIS = exports.QUERY_HEAD_MIN_LENGTH = void 0;
exports.purgeDefaultStorages = purgeDefaultStorages;
exports.useState = useState;
exports.getRequestId = getRequestId;
const tslib_1 = require("tslib");
const node_crypto_1 = tslib_1.__importDefault(require("node:crypto"));
const configuration_1 = require("../configuration");
const key_value_store_1 = require("./key_value_store");
async function purgeDefaultStorages(configOrOptions, client) {
const options = configOrOptions instanceof configuration_1.Configuration
? {
client,
config: configOrOptions,
}
: (configOrOptions ?? {});
const { config = configuration_1.Configuration.getGlobalConfig(), onlyPurgeOnce = false } = options;
({ client = config.getStorageClient() } = options);
const casted = client;
// if `onlyPurgeOnce` is true, will purge anytime this function is called, otherwise - only on start
if (!onlyPurgeOnce || (config.get('purgeOnStart') && !casted.__purged)) {
casted.__purged = true;
await casted.purge?.();
}
}
/**
* Easily create and manage state values. All state values are automatically persisted.
*
* Values can be modified by simply using the assignment operator.
*
* @param name The name of the store to use.
* @param defaultValue If the store does not yet have a value in it, the value will be initialized with the `defaultValue` you provide.
* @param options An optional object parameter where a custom `keyValueStoreName` and `config` can be passed in.
*/
async function useState(name, defaultValue = {}, options) {
const kvStore = await key_value_store_1.KeyValueStore.open(options?.keyValueStoreName, {
config: options?.config || configuration_1.Configuration.getGlobalConfig(),
});
return kvStore.getAutoSavedValue(name || 'CRAWLEE_GLOBAL_STATE', defaultValue);
}
/**
* Helper function that creates ID from uniqueKey for local emulation of request queue.
* It's also used for local cache of remote request queue.
*
* This function may not exactly match how requestId is created server side.
* So we never pass requestId created by this to server and use it only for local cache.
*
* @internal
*/
function getRequestId(uniqueKey) {
const str = node_crypto_1.default.createHash('sha256').update(uniqueKey).digest('base64').replace(/[+/=]/g, '');
return str.slice(0, 15);
}
/**
* When requesting queue head we always fetch requestsInProgressCount * QUERY_HEAD_BUFFER number of requests.
* @internal
*/
exports.QUERY_HEAD_MIN_LENGTH = 100;
/**
* Indicates how long it usually takes for the underlying storage to propagate all writes
* to be available to subsequent reads.
* @internal
*/
exports.STORAGE_CONSISTENCY_DELAY_MILLIS = 3000;
/** @internal */
exports.QUERY_HEAD_BUFFER = 3;
/**
* If queue was modified (request added/updated/deleted) before more than API_PROCESSED_REQUESTS_DELAY_MILLIS
* then we assume the get head operation to be consistent.
* @internal
*/
exports.API_PROCESSED_REQUESTS_DELAY_MILLIS = 10000;
/**
* How many times we try to get queue head with queueModifiedAt older than API_PROCESSED_REQUESTS_DELAY_MILLIS.
* @internal
*/
exports.MAX_QUERIES_FOR_CONSISTENCY = 6;
//# sourceMappingURL=utils.js.map