@crawlee/core
Version:
The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.
414 lines • 18.3 kB
TypeScript
import type { Dictionary, StorageClient } from '@crawlee/types';
import { Configuration } from '../configuration';
import type { Awaitable } from '../typedefs';
import type { StorageManagerOptions } from './storage_manager';
/**
* Helper function to possibly stringify value if options.contentType is not set.
*
* @ignore
*/
export declare const maybeStringify: <T>(value: T, options: {
contentType?: string;
}) => T;
/**
* The `KeyValueStore` class represents a key-value store, a simple data storage that is used
* for saving and reading data records or files. Each data record is
* represented by a unique key and associated with a MIME content type. Key-value stores are ideal
* for saving screenshots, crawler inputs and outputs, web pages, PDFs or to persist the state of crawlers.
*
* Do not instantiate this class directly, use the
* {@link KeyValueStore.open} function instead.
*
* Each crawler run is associated with a default key-value store, which is created exclusively
* for the run. By convention, the crawler input and output are stored into the
* default key-value store under the `INPUT` and `OUTPUT` key, respectively.
* Typically, input and output are JSON files, although it can be any other format.
* To access the default key-value store directly, you can use the
* {@link KeyValueStore.getValue} and {@link KeyValueStore.setValue} convenience functions.
*
* To access the input, you can also use the {@link KeyValueStore.getInput} convenience function.
*
* `KeyValueStore` stores its data on a local disk.
*
* If the `CRAWLEE_STORAGE_DIR` environment variable is set, the data is stored in
* the local directory in the following files:
* ```
* {CRAWLEE_STORAGE_DIR}/key_value_stores/{STORE_ID}/{INDEX}.{EXT}
* ```
* Note that `{STORE_ID}` is the name or ID of the key-value store. The default key-value store has ID: `default`,
* unless you override it by setting the `CRAWLEE_DEFAULT_KEY_VALUE_STORE_ID` environment variable.
* The `{KEY}` is the key of the record and `{EXT}` corresponds to the MIME content type of the data value.
*
* **Example usage:**
*
* ```javascript
* // Get crawler input from the default key-value store.
* const input = await KeyValueStore.getInput();
* // Get some value from the default key-value store.
* const otherValue = await KeyValueStore.getValue('my-key');
*
* // Write crawler output to the default key-value store.
* await KeyValueStore.setValue('OUTPUT', { myResult: 123 });
*
* // Open a named key-value store
* const store = await KeyValueStore.open('some-name');
*
* // Write a record. JavaScript object is automatically converted to JSON,
* // strings and binary buffers are stored as they are
* await store.setValue('some-key', { foo: 'bar' });
*
* // Read a record. Note that JSON is automatically parsed to a JavaScript object,
* // text data returned as a string and other data is returned as binary buffer
* const value = await store.getValue('some-key');
*
* // Drop (delete) the store
* await store.drop();
* ```
* @category Result Stores
*/
export declare class KeyValueStore {
readonly config: Configuration;
readonly id: string;
readonly name?: string;
readonly storageObject?: Record<string, unknown>;
private readonly client;
private persistStateEventStarted;
/** Cache for persistent (auto-saved) values. When we try to set such value, the cache will be updated automatically. */
private readonly cache;
/**
* @internal
*/
constructor(options: KeyValueStoreOptions, config?: Configuration);
/**
* Gets a value from the key-value store.
*
* The function returns a `Promise` that resolves to the record value,
* whose JavaScript type depends on the MIME content type of the record.
* Records with the `application/json`
* content type are automatically parsed and returned as a JavaScript object.
* Similarly, records with `text/plain` content types are returned as a string.
* For all other content types, the value is returned as a raw
* [`Buffer`](https://nodejs.org/api/buffer.html) instance.
*
* If the record does not exist, the function resolves to `null`.
*
* To save or delete a value in the key-value store, use the
* {@link KeyValueStore.setValue} function.
*
* **Example usage:**
*
* ```javascript
* const store = await KeyValueStore.open();
* const buffer = await store.getValue('screenshot1.png');
* ```
* @param key
* Unique key of the record. It can be at most 256 characters long and only consist
* of the following characters: `a`-`z`, `A`-`Z`, `0`-`9` and `!-_.'()`
* @returns
* Returns a promise that resolves to an object, string
* or [`Buffer`](https://nodejs.org/api/buffer.html), depending
* on the MIME content type of the record.
*/
getValue<T = unknown>(key: string): Promise<T | null>;
/**
* Gets a value from the key-value store.
*
* The function returns a `Promise` that resolves to the record value,
* whose JavaScript type depends on the MIME content type of the record.
* Records with the `application/json`
* content type are automatically parsed and returned as a JavaScript object.
* Similarly, records with `text/plain` content types are returned as a string.
* For all other content types, the value is returned as a raw
* [`Buffer`](https://nodejs.org/api/buffer.html) instance.
*
* If the record does not exist, the function resolves to `null`.
*
* To save or delete a value in the key-value store, use the
* {@link KeyValueStore.setValue} function.
*
* **Example usage:**
*
* ```javascript
* const store = await KeyValueStore.open();
* const buffer = await store.getValue('screenshot1.png');
* ```
* @param key
* Unique key of the record. It can be at most 256 characters long and only consist
* of the following characters: `a`-`z`, `A`-`Z`, `0`-`9` and `!-_.'()`
* @param defaultValue
* Fallback that will be returned if no value if present in the storage.
* @returns
* Returns a promise that resolves to an object, string
* or [`Buffer`](https://nodejs.org/api/buffer.html), depending
* on the MIME content type of the record, or the default value if the key is missing from the store.
*/
getValue<T = unknown>(key: string, defaultValue: T): Promise<T>;
/**
* Tests whether a record with the given key exists in the key-value store without retrieving its value.
*
* @param key The queried record key.
* @returns `true` if the record exists, `false` if it does not.
*/
recordExists(key: string): Promise<boolean>;
getAutoSavedValue<T extends Dictionary = Dictionary>(key: string, defaultValue?: T): Promise<T>;
private ensurePersistStateEvent;
/**
* Saves or deletes a record in the key-value store.
* The function returns a promise that resolves once the record has been saved or deleted.
*
* **Example usage:**
*
* ```javascript
* const store = await KeyValueStore.open();
* await store.setValue('OUTPUT', { foo: 'bar' });
* ```
*
* Beware that the key can be at most 256 characters long and only contain the following characters: `a-zA-Z0-9!-_.'()`
*
* By default, `value` is converted to JSON and stored with the
* `application/json; charset=utf-8` MIME content type.
* To store the value with another content type, pass it in the options as follows:
* ```javascript
* const store = await KeyValueStore.open('my-text-store');
* await store.setValue('RESULTS', 'my text data', { contentType: 'text/plain' });
* ```
* If you set custom content type, `value` must be either a string or
* [`Buffer`](https://nodejs.org/api/buffer.html), otherwise an error will be thrown.
*
* If `value` is `null`, the record is deleted instead. Note that the `setValue()` function succeeds
* regardless whether the record existed or not.
*
* To retrieve a value from the key-value store, use the
* {@link KeyValueStore.getValue} function.
*
* **IMPORTANT:** Always make sure to use the `await` keyword when calling `setValue()`,
* otherwise the crawler process might finish before the value is stored!
*
* @param key
* Unique key of the record. It can be at most 256 characters long and only consist
* of the following characters: `a`-`z`, `A`-`Z`, `0`-`9` and `!-_.'()`
* @param value
* Record data, which can be one of the following values:
* - If `null`, the record in the key-value store is deleted.
* - If no `options.contentType` is specified, `value` can be any JavaScript object and it will be stringified to JSON.
* - If `options.contentType` is set, `value` is taken as is and it must be a `String` or [`Buffer`](https://nodejs.org/api/buffer.html).
* For any other value an error will be thrown.
* @param [options] Record options.
*/
setValue<T>(key: string, value: T | null, options?: RecordOptions): Promise<void>;
/**
* Removes the key-value store either from the Apify cloud storage or from the local directory,
* depending on the mode of operation.
*/
drop(): Promise<void>;
/** @internal */
clearCache(): void;
/**
* Iterates over key-value store keys, yielding each in turn to an `iteratee` function.
* Each invocation of `iteratee` is called with three arguments: `(key, index, info)`, where `key`
* is the record key, `index` is a zero-based index of the key in the current iteration
* (regardless of `options.exclusiveStartKey`) and `info` is an object that contains a single property `size`
* indicating size of the record in bytes.
*
* If the `iteratee` function returns a Promise then it is awaited before the next call.
* If it throws an error, the iteration is aborted and the `forEachKey` function throws the error.
*
* **Example usage**
* ```javascript
* const keyValueStore = await KeyValueStore.open();
* await keyValueStore.forEachKey(async (key, index, info) => {
* console.log(`Key at ${index}: ${key} has size ${info.size}`);
* });
* ```
*
* @param iteratee A function that is called for every key in the key-value store.
* @param [options] All `forEachKey()` parameters.
*/
forEachKey(iteratee: KeyConsumer, options?: KeyValueStoreIteratorOptions): Promise<void>;
private _forEachKey;
/**
* Returns a file URL for the given key.
*/
getPublicUrl(key: string): string;
/**
* Opens a key-value store and returns a promise resolving to an instance of the {@link KeyValueStore} class.
*
* Key-value stores are used to store records or files, along with their MIME content type.
* The records are stored and retrieved using a unique key.
* The actual data is stored either on a local filesystem or in the Apify cloud.
*
* For more details and code examples, see the {@link KeyValueStore} class.
*
* @param [storeIdOrName]
* ID or name of the key-value store to be opened. If `null` or `undefined`,
* the function returns the default key-value store associated with the crawler run.
* @param [options] Storage manager options.
*/
static open(storeIdOrName?: string | null, options?: StorageManagerOptions): Promise<KeyValueStore>;
/**
* Gets a value from the default {@link KeyValueStore} associated with the current crawler run.
*
* This is just a convenient shortcut for {@link KeyValueStore.getValue}.
* For example, calling the following code:
* ```javascript
* const value = await KeyValueStore.getValue('my-key');
* ```
*
* is equivalent to:
* ```javascript
* const store = await KeyValueStore.open();
* const value = await store.getValue('my-key');
* ```
*
* To store the value to the default key-value store, you can use the {@link KeyValueStore.setValue} function.
*
* For more information, see {@link KeyValueStore.open}
* and {@link KeyValueStore.getValue}.
*
* @param key Unique record key.
* @returns
* Returns a promise that resolves to an object, string
* or [`Buffer`](https://nodejs.org/api/buffer.html), depending
* on the MIME content type of the record, or `null`
* if the record is missing.
* @ignore
*/
static getValue<T = unknown>(key: string): Promise<T | null>;
/**
* Gets a value from the default {@link KeyValueStore} associated with the current crawler run.
*
* This is just a convenient shortcut for {@link KeyValueStore.getValue}.
* For example, calling the following code:
* ```javascript
* const value = await KeyValueStore.getValue('my-key');
* ```
*
* is equivalent to:
* ```javascript
* const store = await KeyValueStore.open();
* const value = await store.getValue('my-key');
* ```
*
* To store the value to the default key-value store, you can use the {@link KeyValueStore.setValue} function.
*
* For more information, see {@link KeyValueStore.open}
* and {@link KeyValueStore.getValue}.
*
* @param key Unique record key.
* @param defaultValue Fallback that will be returned if no value if present in the storage.
* @returns
* Returns a promise that resolves to an object, string
* or [`Buffer`](https://nodejs.org/api/buffer.html), depending
* on the MIME content type of the record, or the provided default value.
* @ignore
*/
static getValue<T = unknown>(key: string, defaultValue: T): Promise<T>;
/**
* Tests whether a record with the given key exists in the default {@link KeyValueStore} associated with the current crawler run.
* @param key The queried record key.
* @returns `true` if the record exists, `false` if it does not.
*/
static recordExists(key: string): Promise<boolean>;
static getAutoSavedValue<T extends Dictionary = Dictionary>(key: string, defaultValue?: T): Promise<T>;
/**
* Stores or deletes a value in the default {@link KeyValueStore} associated with the current crawler run.
*
* This is just a convenient shortcut for {@link KeyValueStore.setValue}.
* For example, calling the following code:
* ```javascript
* await KeyValueStore.setValue('OUTPUT', { foo: "bar" });
* ```
*
* is equivalent to:
* ```javascript
* const store = await KeyValueStore.open();
* await store.setValue('OUTPUT', { foo: "bar" });
* ```
*
* To get a value from the default key-value store, you can use the {@link KeyValueStore.getValue} function.
*
* For more information, see {@link KeyValueStore.open}
* and {@link KeyValueStore.getValue}.
*
* @param key
* Unique record key.
* @param value
* Record data, which can be one of the following values:
* - If `null`, the record in the key-value store is deleted.
* - If no `options.contentType` is specified, `value` can be any JavaScript object, and it will be stringified to JSON.
* - If `options.contentType` is set, `value` is taken as is, and it must be a `String` or [`Buffer`](https://nodejs.org/api/buffer.html).
* For any other value an error will be thrown.
* @param [options]
* @ignore
*/
static setValue<T>(key: string, value: T | null, options?: RecordOptions): Promise<void>;
/**
* Gets the crawler input value from the default {@link KeyValueStore} associated with the current crawler run.
* By default, it will try to find root input files (either extension-less, `.json` or `.txt`),
* or alternatively read the input from the default {@link KeyValueStore}.
*
* Note that the `getInput()` function does not cache the value read from the key-value store.
* If you need to use the input multiple times in your crawler,
* it is far more efficient to read it once and store it locally.
*
* For more information, see {@link KeyValueStore.open}
* and {@link KeyValueStore.getValue}.
*
* @returns
* Returns a promise that resolves to an object, string
* or [`Buffer`](https://nodejs.org/api/buffer.html), depending
* on the MIME content type of the record, or `null`
* if the record is missing.
* @ignore
*/
static getInput<T = Dictionary | string | Buffer>(): Promise<T | null>;
}
/**
* User-function used in the {@link KeyValueStore.forEachKey} method.
*/
export interface KeyConsumer {
/**
* @param key Current {@link KeyValueStore} key being processed.
* @param index Position of the current key in {@link KeyValueStore}.
* @param info Information about the current {@link KeyValueStore} entry.
* @param info.size Size of the value associated with the current key in bytes.
*/
(key: string, index: number, info: {
size: number;
}): Awaitable<void>;
}
export interface KeyValueStoreOptions {
id: string;
name?: string;
client: StorageClient;
storageObject?: Record<string, unknown>;
}
export interface RecordOptions {
/**
* Specifies a custom MIME content type of the record.
*/
contentType?: string;
/**
* Specifies a custom timeout for the `set-record` API call, in seconds.
*/
timeoutSecs?: number;
/**
* If set to `true`, the `set-record` API call will not be retried if it times out.
*/
doNotRetryTimeouts?: boolean;
}
export interface KeyValueStoreIteratorOptions {
/**
* All keys up to this one (including) are skipped from the result.
*/
exclusiveStartKey?: string;
/**
* If set, only keys that start with this prefix are returned.
*/
prefix?: string;
/**
* Collection name to use for listing keys.
*/
collection?: string;
}
//# sourceMappingURL=key_value_store.d.ts.map