@crawlee/core
Version:
The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.
200 lines • 7.61 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.RecoverableState = void 0;
const tslib_1 = require("tslib");
const core_1 = require("@crawlee/core");
const log_1 = tslib_1.__importDefault(require("@apify/log"));
/**
* A class for managing persistent recoverable state using a plain JavaScript object.
*
* This class facilitates state persistence to a `KeyValueStore`, allowing data to be saved and retrieved
* across migrations or restarts. It manages the loading, saving, and resetting of state data,
* with optional persistence capabilities.
*
* The state is represented by a plain JavaScript object that can be serialized to and deserialized from JSON.
* The class automatically hooks into the event system to persist state when needed.
*/
class RecoverableState {
/**
* Initialize a new recoverable state object.
*
* @param options Configuration options for the recoverable state
*/
constructor(options) {
Object.defineProperty(this, "defaultState", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "state", {
enumerable: true,
configurable: true,
writable: true,
value: null
});
Object.defineProperty(this, "persistenceEnabled", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "persistStateKey", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "persistStateKvsName", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "persistStateKvsId", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "keyValueStore", {
enumerable: true,
configurable: true,
writable: true,
value: null
});
Object.defineProperty(this, "log", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "config", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "serialize", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "deserialize", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
this.defaultState = options.defaultState;
this.persistStateKey = options.persistStateKey;
this.persistenceEnabled = options.persistenceEnabled ?? false;
this.persistStateKvsName = options.persistStateKvsName;
this.persistStateKvsId = options.persistStateKvsId;
this.log = options.logger ?? log_1.default.child({ prefix: 'RecoverableState' });
this.config = options.config ?? core_1.Configuration.getGlobalConfig();
this.serialize = options.serialize ?? JSON.stringify;
this.deserialize = options.deserialize ?? JSON.parse;
this.persistState = this.persistState.bind(this);
}
/**
* Initialize the recoverable state.
*
* This method must be called before using the recoverable state. It loads the saved state
* if persistence is enabled and registers the object to listen for PERSIST_STATE events.
*
* @returns The loaded state object
*/
async initialize() {
if (this.state !== null && this.state !== undefined) {
return this.currentValue;
}
if (!this.persistenceEnabled) {
this.state = this.deserialize(this.serialize(this.defaultState));
return this.currentValue;
}
this.keyValueStore = await core_1.KeyValueStore.open(this.persistStateKvsName ?? this.persistStateKvsId, {
config: this.config,
});
await this.loadSavedState();
// Register for persist state events
const eventManager = this.config.getEventManager();
eventManager.on("persistState" /* EventType.PERSIST_STATE */, this.persistState);
return this.currentValue;
}
/**
* Clean up resources used by the recoverable state.
*
* If persistence is enabled, this method deregisters the object from PERSIST_STATE events
* and persists the current state one last time.
*/
async teardown() {
if (!this.persistenceEnabled || !this.persistState) {
return;
}
const eventManager = this.config.getEventManager();
eventManager.off("persistState" /* EventType.PERSIST_STATE */, this.persistState);
await this.persistState();
}
/**
* Get the current state.
*/
get currentValue() {
if (this.state === null) {
throw new Error('Recoverable state has not yet been loaded');
}
return this.state;
}
/**
* Reset the state to the default values and clear any persisted state.
*
* Resets the current state to the default state and, if persistence is enabled,
* clears the persisted state from the KeyValueStore.
*/
async reset() {
this.state = this.deserialize(this.serialize(this.defaultState));
if (this.persistenceEnabled) {
if (this.keyValueStore === null) {
throw new Error('Recoverable state has not yet been initialized');
}
await this.keyValueStore.setValue(this.persistStateKey, null);
}
}
/**
* Persist the current state to the KeyValueStore.
*
* This method is typically called in response to a PERSIST_STATE event, but can also be called
* directly when needed.
*
* @param eventData Optional data associated with a PERSIST_STATE event
*/
async persistState(eventData) {
this.log.debug(`Persisting state of the RecoverableState (eventData=${JSON.stringify(eventData)}).`);
if (this.keyValueStore === null || this.state === null) {
throw new Error('Recoverable state has not yet been initialized');
}
if (this.persistenceEnabled) {
await this.keyValueStore.setValue(this.persistStateKey, this.serialize(this.state), {
contentType: 'text/plain', // HACK - the result is expected to be JSON, but we do this to avoid the implicit JSON.parse in `KeyValueStore.getValue`
});
}
}
/**
* Load the saved state from the KeyValueStore
*/
async loadSavedState() {
if (this.keyValueStore === null) {
throw new Error('Recoverable state has not yet been initialized');
}
const storedState = await this.keyValueStore.getValue(this.persistStateKey);
if (storedState === null || storedState === undefined) {
this.state = this.deserialize(this.serialize(this.defaultState));
}
else {
this.state = this.deserialize(storedState);
}
}
}
exports.RecoverableState = RecoverableState;
//# sourceMappingURL=recoverable_state.js.map