UNPKG

@lodestar/beacon-node

Version:

A Typescript implementation of the beacon chain

github.com/ChainSafe/lodestar

ChainSafe/lodestar

801 lines • 41.6 kB

JavaScript

var __addDisposableResource = (this && this.__addDisposableResource) || function (env, value, async) { if (value !== null && value !== void 0) { if (typeof value !== "object" && typeof value !== "function") throw new TypeError("Object expected."); var dispose, inner; if (async) { if (!Symbol.asyncDispose) throw new TypeError("Symbol.asyncDispose is not defined."); dispose = value[Symbol.asyncDispose]; } if (dispose === void 0) { if (!Symbol.dispose) throw new TypeError("Symbol.dispose is not defined."); dispose = value[Symbol.dispose]; if (async) inner = dispose; } if (typeof dispose !== "function") throw new TypeError("Object not disposable."); if (inner) dispose = function() { try { inner.call(this); } catch (e) { return Promise.reject(e); } }; env.stack.push({ value: value, dispose: dispose, async: async }); } else if (async) { env.stack.push({ async: true }); } return value; }; var __disposeResources = (this && this.__disposeResources) || (function (SuppressedError) { return function (env) { function fail(e) { env.error = env.hasError ? new SuppressedError(e, env.error, "An error was suppressed during disposal.") : e; env.hasError = true; } var r, s = 0; function next() { while (r = env.stack.pop()) { try { if (!r.async && s === 1) return s = 0, env.stack.push(r), Promise.resolve().then(next); if (r.dispose) { var result = r.dispose.call(r.value); if (r.async) return s |= 2, Promise.resolve(result).then(next, function(e) { fail(e); return next(); }); } else s |= 1; } catch (e) { fail(e); } } if (s === 1) return env.hasError ? Promise.reject(env.error) : Promise.resolve(); if (env.hasError) throw env.error; } return next(); }; })(typeof SuppressedError === "function" ? SuppressedError : function (error, suppressed, message) { var e = new Error(message); return e.name = "SuppressedError", e.error = error, e.suppressed = suppressed, e; }); import { computeStartSlotAtEpoch } from "@lodestar/state-transition"; import { MapDef, fromHex, sleep, toHex, toRootHex } from "@lodestar/utils"; import { AllocSource } from "../../util/bufferPool.js"; import { serializeState } from "../serializeState.js"; import { MapTracker } from "./mapMetrics.js"; import { CacheItemType } from "./types.js"; /** * Before n-historical states, lodestar keeps all checkpoint states since finalized * Since Sep 2024, lodestar stores 3 most recent checkpoint states in memory and the rest on disk. The finalized state * may not be available in memory, and stay on disk instead. */ export const DEFAULT_MAX_CP_STATE_EPOCHS_IN_MEMORY = 3; /** * By default we don't prune any persistent checkpoint states as it's not safe to delete them during * long non-finality as we don't know the state of the chain and there could be a deep (hundreds of epochs) reorg * if there two competing chains with similar weight but we wouldn't have a close enough state to pivot to this chain * and instead require a resync from last finalized checkpoint state which could be very far in the past. */ export const DEFAULT_MAX_CP_STATE_ON_DISK = Infinity; // TODO GLOAS: re-evaluate this timing const PROCESS_CHECKPOINT_STATES_BPS = 6667; /** * An implementation of CheckpointStateCache that keep up to n epoch checkpoint states in memory and persist the rest to disk * - If it's more than `maxEpochsInMemory` epochs old, it will persist n last epochs to disk based on the view of the block * - Once a chain gets finalized we'll prune all states from memory and disk for epochs < finalizedEpoch * - In get*() apis if shouldReload is true, it will reload from disk. The reload() api is expensive and should only be called in some important flows: * - Get state for block processing * - updateHeadState * - as with any cache, the state could be evicted from memory at any time, so we should always check if the state is in memory or not * - Each time we process a state, we only persist exactly 1 checkpoint state per epoch based on the view of block and prune all others. The persisted * checkpoint state could be finalized and used later in archive task, it's also used to regen states. * - When we process multiple states in the same epoch, we could persist different checkpoint states of the same epoch because each block could have its * own view. See unit test of this file `packages/beacon-node/test/unit/chain/stateCache/persistentCheckpointsCache.test.ts` for more details. * * The below diagram shows Previous Root Checkpoint State is persisted for epoch (n-2) and Current Root Checkpoint State is persisted for epoch (n-1) * while at epoch (n) and (n+1) we have both of them in memory * * ╔════════════════════════════════════╗═══════════════╗ * ║ persisted to db or fs ║ in memory ║ * ║ reload if needed ║ ║ * ║ -----------------------------------║---------------║ * ║ epoch: (n-2) (n-1) ║ n (n+1) ║ * ║ |-------|-------|----║--|-------|----║ * ║ ^ ^ ║ ^ ^ ║ * ║ ║ ^ ^ ║ * ╚════════════════════════════════════╝═══════════════╝ * * The "in memory" checkpoint states are similar to the old implementation: we have both Previous Root Checkpoint State and Current Root Checkpoint State per epoch. * However in the "persisted to db or fs" part * - if there is no reorg, we only store 1 checkpoint state per epoch, the one that could potentially be justified/finalized later based on the view of the state * - if there is reorg, we may store >=2 checkpoint states per epoch, including any checkpoints with unknown roots to the processed state * - the goal is to make sure we can regen any states later if needed, and we have the checkpoint state that could be justified/finalized later */ export class PersistentCheckpointStateCache { cache; /** Epoch -> Set<blockRoot> */ epochIndex = new MapDef(() => new Set()); config; metrics; logger; clock; signal; preComputedCheckpoint = null; preComputedCheckpointHits = null; maxEpochsInMemory; maxEpochsOnDisk; datastore; blockStateCache; bufferPool; constructor({ config, metrics, logger, clock, signal, datastore, blockStateCache, bufferPool, }, opts) { this.cache = new MapTracker(metrics?.cpStateCache); this.config = config; if (metrics) { this.metrics = metrics; metrics.cpStateCache.size.addCollect(() => { let persistCount = 0; let inMemoryCount = 0; const memoryEpochs = new Set(); const persistentEpochs = new Set(); for (const [key, cacheItem] of this.cache.entries()) { const { epoch } = fromCacheKey(key); if (isPersistedCacheItem(cacheItem)) { persistCount++; persistentEpochs.add(epoch); } else { inMemoryCount++; memoryEpochs.add(epoch); } } metrics.cpStateCache.size.set({ type: CacheItemType.persisted }, persistCount); metrics.cpStateCache.size.set({ type: CacheItemType.inMemory }, inMemoryCount); metrics.cpStateCache.epochSize.set({ type: CacheItemType.persisted }, persistentEpochs.size); metrics.cpStateCache.epochSize.set({ type: CacheItemType.inMemory }, memoryEpochs.size); }); } this.logger = logger; this.clock = clock; this.signal = signal; if (opts.maxCPStateEpochsInMemory !== undefined && opts.maxCPStateEpochsInMemory < 0) { throw new Error("maxEpochsInMemory must be >= 0"); } if (opts.maxCPStateEpochsOnDisk !== undefined && opts.maxCPStateEpochsOnDisk < 0) { throw new Error("maxCPStateEpochsOnDisk must be >= 0"); } this.maxEpochsInMemory = opts.maxCPStateEpochsInMemory ?? DEFAULT_MAX_CP_STATE_EPOCHS_IN_MEMORY; this.maxEpochsOnDisk = opts.maxCPStateEpochsOnDisk ?? DEFAULT_MAX_CP_STATE_ON_DISK; // Specify different datastore for testing this.datastore = datastore; this.blockStateCache = blockStateCache; this.bufferPool = bufferPool; } /** * Reload checkpoint state keys from the last run. */ async init() { if (this.datastore?.init) { await this.datastore.init(); } const persistedKeys = await this.datastore.readKeys(); // all checkpoint states from the last run are not trusted, remove them // otherwise if we have a bad checkpoint state from the last run, the node get stucked // this was found during mekong devnet, see https://github.com/ChainSafe/lodestar/pull/7255 await Promise.all(persistedKeys.map((key) => this.datastore.remove(key))); this.logger.info("Removed persisted checkpoint states from the last run", { count: persistedKeys.length, maxEpochsInMemory: this.maxEpochsInMemory, }); } /** * Get a state from cache, it may reload from disk. * This is an expensive api, should only be called in some important flows: * - Validate a gossip block * - Get block for processing * - Regen head state */ async getOrReload(cp) { const stateOrStateBytesData = await this.getStateOrLoadDb(cp); if (stateOrStateBytesData === null || isBeaconStateView(stateOrStateBytesData)) { return stateOrStateBytesData ?? null; } const { persistedKey, stateBytes } = stateOrStateBytesData; const logMeta = { persistedKey: toHex(persistedKey) }; this.logger.debug("Reload: read state successful", logMeta); this.metrics?.cpStateCache.stateReloadSecFromSlot.observe(this.clock?.secFromSlot(this.clock?.currentSlot ?? 0) ?? 0); const seedState = this.findSeedStateToReload(cp); this.metrics?.cpStateCache.stateReloadEpochDiff.observe(Math.abs(seedState.epoch - cp.epoch)); this.logger.debug("Reload: found seed state", { ...logMeta, seedSlot: seedState.slot }); try { const env_1 = { stack: [], error: void 0, hasError: false }; try { // 80% of validators serialization time comes from memory allocation, this is to avoid it const sszTimer = this.metrics?.cpStateCache.stateReloadValidatorsSerializeDuration.startTimer(); // automatically free the buffer pool after this scope const validatorsBytesWithKey = __addDisposableResource(env_1, this.serializeStateValidators(seedState), false); let validatorsBytes = validatorsBytesWithKey?.buffer; if (validatorsBytes == null) { // fallback logic in case we can't use the buffer pool this.metrics?.cpStateCache.stateReloadValidatorsSerializeAllocCount.inc(); validatorsBytes = seedState.serializeValidators(); } sszTimer?.(); const timer = this.metrics?.cpStateCache.stateReloadDuration.startTimer(); // preload validators and balances for faster state transition const newCachedState = seedState.loadOtherState(stateBytes, validatorsBytes, { preloadValidatorsAndBalances: true, }); // hashTreeRoot() calls the commit() inside // there is no modification inside the state, it's just that we want to compute and cache all roots const stateRoot = toRootHex(newCachedState.hashTreeRoot()); timer?.(); this.logger.debug("Reload: cached state load successful", { ...logMeta, stateSlot: newCachedState.slot, stateRoot, seedSlot: seedState.slot, }); // only remove persisted state once we reload successfully const cpKey = toCacheKey(cp); this.cache.set(cpKey, { type: CacheItemType.inMemory, state: newCachedState, persistedKey }); this.epochIndex.getOrDefault(cp.epoch).add(cp.rootHex); // don't prune from memory here, call it at the last 1/3 of slot 0 of an epoch return newCachedState; } catch (e_1) { env_1.error = e_1; env_1.hasError = true; } finally { __disposeResources(env_1); } } catch (e) { this.logger.debug("Reload: error loading cached state", logMeta, e); return null; } } /** * Return either state or state bytes loaded from db. */ async getStateOrBytes(cp) { const stateOrLoadedState = await this.getStateOrLoadDb(cp); if (stateOrLoadedState === null || isBeaconStateView(stateOrLoadedState)) { return stateOrLoadedState; } return stateOrLoadedState.stateBytes; } /** * Return either state or state bytes with persisted key loaded from db. */ async getStateOrLoadDb(cp) { const cpKey = toCacheKey(cp); const inMemoryState = this.get(cpKey); if (inMemoryState) { return inMemoryState; } const cacheItem = this.cache.get(cpKey); if (cacheItem === undefined) { return null; } if (isInMemoryCacheItem(cacheItem)) { // should not happen, in-memory state is handled above throw new Error("Expected persistent key"); } const persistedKey = cacheItem.value; const dbReadTimer = this.metrics?.cpStateCache.stateReloadDbReadTime.startTimer(); const stateBytes = await this.datastore.read(persistedKey); dbReadTimer?.(); if (stateBytes === null) { return null; } return { persistedKey, stateBytes }; } /** * Similar to get() api without reloading from disk */ get(cpOrKey) { this.metrics?.cpStateCache.lookups.inc(); const cpKey = typeof cpOrKey === "string" ? cpOrKey : toCacheKey(cpOrKey); const cacheItem = this.cache.get(cpKey); if (cacheItem === undefined) { return null; } this.metrics?.cpStateCache.hits.inc(); if (cpKey === this.preComputedCheckpoint) { this.preComputedCheckpointHits = (this.preComputedCheckpointHits ?? 0) + 1; } if (isInMemoryCacheItem(cacheItem)) { const { state } = cacheItem; this.metrics?.cpStateCache.stateClonedCount.observe(state.clonedCount); return state; } return null; } /** * Add a state of a checkpoint to this cache, prune from memory if necessary. */ add(cp, state) { const cpHex = toCheckpointHex(cp); const key = toCacheKey(cpHex); const cacheItem = this.cache.get(key); this.metrics?.cpStateCache.adds.inc(); if (cacheItem !== undefined && isPersistedCacheItem(cacheItem)) { const persistedKey = cacheItem.value; // was persisted to disk, set back to memory this.cache.set(key, { type: CacheItemType.inMemory, state, persistedKey }); this.logger.verbose("Added checkpoint state to memory but a persisted key existed", { epoch: cp.epoch, rootHex: cpHex.rootHex, persistedKey: toHex(persistedKey), }); } else { this.cache.set(key, { type: CacheItemType.inMemory, state }); this.logger.verbose("Added checkpoint state to memory", { epoch: cp.epoch, rootHex: cpHex.rootHex }); } this.epochIndex.getOrDefault(cp.epoch).add(cpHex.rootHex); this.prunePersistedStates(); } /** * Searches in-memory state for the latest cached state with a `root` without reload, starting with `epoch` and descending */ getLatest(rootHex, maxEpoch) { // sort epochs in descending order, only consider epochs lte `epoch` const epochs = Array.from(this.epochIndex.keys()) .sort((a, b) => b - a) .filter((e) => e <= maxEpoch); for (const epoch of epochs) { if (this.epochIndex.get(epoch)?.has(rootHex)) { const inMemoryClonedState = this.get({ rootHex, epoch }); if (inMemoryClonedState) { return inMemoryClonedState; } } } return null; } /** * Searches state for the latest cached state with a `root`, reload if needed, starting with `epoch` and descending * This is expensive api, should only be called in some important flows: * - Validate a gossip block * - Get block for processing * - Regen head state */ async getOrReloadLatest(rootHex, maxEpoch) { // sort epochs in descending order, only consider epochs lte `epoch` const epochs = Array.from(this.epochIndex.keys()) .sort((a, b) => b - a) .filter((e) => e <= maxEpoch); for (const epoch of epochs) { if (this.epochIndex.get(epoch)?.has(rootHex)) { try { const state = await this.getOrReload({ rootHex, epoch }); if (state) { return state; } } catch (e) { this.logger.debug("Error get or reload state", { epoch, rootHex }, e); } } } return null; } /** * Update the precomputed checkpoint and return the number of hits for the * previous one (if any). */ updatePreComputedCheckpoint(rootHex, epoch) { const previousHits = this.preComputedCheckpointHits; this.preComputedCheckpoint = toCacheKey({ rootHex, epoch }); this.preComputedCheckpointHits = 0; return previousHits; } /** * This is just to conform to the old implementation */ prune() { // do nothing } /** * Prune all checkpoint states before the provided finalized epoch. */ pruneFinalized(finalizedEpoch) { for (const epoch of this.epochIndex.keys()) { if (epoch < finalizedEpoch) { this.deleteAllEpochItems(epoch).catch((e) => this.logger.debug("Error delete all epoch items", { epoch, finalizedEpoch }, e)); } } } /** * After processing a block, prune from memory based on the view of that block. * This is likely persist 1 state per epoch, at the last 1/3 of slot 0 of an epoch although it'll be called on every last 1/3 of slot. * Given the following block b was processed with b2, b1, b0 are ancestors in epoch (n-2), (n-1), n respectively * * epoch: (n-2) (n-1) n (n+1) * |-----------|-----------|-----------|-----------| * ^ ^ ^ ^ * | | | | * block chain: b2---------->b1--------->b0-->b * * After processing block b, if maxEpochsInMemory is: * - 2 then we'll persist {root: b2, epoch n-2} checkpoint state to disk * - 1 then we'll persist {root: b2, epoch n-2} and {root: b1, epoch n-1} checkpoint state to disk * - 0 then we'll persist {root: b2, epoch n-2} and {root: b1, epoch n-1} and {root: b0, epoch n} checkpoint state to disk * - if any old epochs checkpoint states are persisted, no need to do it again * * Note that for each epoch there could be multiple checkpoint states, usually 2, one for Previous Root Checkpoint State and one for Current Root Checkpoint State. * We normally only persist 1 checkpoint state per epoch, the one that could potentially be justified/finalized later based on the view of the block. * Other checkpoint states are pruned from memory. * * This design also covers the reorg scenario. Given block c in the same epoch n where c.slot > b.slot, c is not descendant of b, and c is built on top of c0 * instead of b0 (epoch (n - 1)) * * epoch: (n-2) (n-1) n (n+1) * |-----------|-----------|-----------|-----------| * ^ ^ ^ ^ ^ ^ * | | | | | | * block chain: b2---------->b1----->c0->b0-->b | * ║ | * ╚═══════════>c (reorg) * * After processing block c, if maxEpochsInMemory is: * - 0 then we'll persist {root: c0, epoch: n} checkpoint state to disk. Note that regen should populate {root: c0, epoch: n} checkpoint state before. * * epoch: (n-1) n (n+1) * |-------------------------------------------------------------|-------------------------------------------------------------| * ^ ^ ^ ^ * _______ | | | | * | | | | | | * | db |====== reload ======> {root: b1, epoch: n-1} cp state ======> c0 block state ======> {root: c0, epoch: n} cp state =====> c block state * |_______| * * * * - 1 then we'll persist {root: b1, epoch n-1} checkpoint state to disk. Note that at epoch n there is both {root: b0, epoch: n} and {root: c0, epoch: n} checkpoint states in memory * - 2 then we'll persist {root: b2, epoch n-2} checkpoint state to disk, there are also 2 checkpoint states in memory at epoch n, same to the above (maxEpochsInMemory=1) * * As of Mar 2024, it takes <=350ms to persist a holesky state on fast server */ async processState(blockRootHex, state) { let persistCount = 0; // it's important to sort the epochs in ascending order, in case of big reorg we always want to keep the most recent checkpoint states const sortedEpochs = Array.from(this.epochIndex.keys()).sort((a, b) => a - b); if (sortedEpochs.length <= this.maxEpochsInMemory) { return 0; } const blockSlot = state.slot; const processCPStatesTimeMs = this.config.getSlotComponentDurationMs(PROCESS_CHECKPOINT_STATES_BPS); // we always have clock in production, fallback value is only for test const msFromSlot = this.clock?.msFromSlot(blockSlot) ?? processCPStatesTimeMs; const msToProcessCPStates = processCPStatesTimeMs - msFromSlot; if (msToProcessCPStates > 0) { // At ~67% of slot is the most free time of every slot, take that chance to persist checkpoint states // normally it should only persist checkpoint states at ~67% of slot 0 of epoch await sleep(msToProcessCPStates, this.signal); } // at syncing time, it's critical to persist checkpoint states as soon as possible to avoid OOM during unfinality time // if node is synced this is not a hot time because block comes late, we'll likely miss attestation already, or the block is orphaned const persistEpochs = sortedEpochs.slice(0, sortedEpochs.length - this.maxEpochsInMemory); for (const lowestEpoch of persistEpochs) { try { // getBlockRootAtSlot() may fail, see https://github.com/ChainSafe/lodestar/issues/7495 if (state.slot < computeStartSlotAtEpoch(lowestEpoch)) { // there is no checkpoint states of epochs newer than this state break; } // usually there is only 0 or 1 epoch to persist in this loop persistCount += await this.processPastEpoch(blockRootHex, state, lowestEpoch); this.logger.verbose("Processed past epoch", { epoch: lowestEpoch, slot: blockSlot, root: blockRootHex }); } catch (e) { this.logger.debug("Error processing past epoch", { epoch: lowestEpoch, slot: blockSlot, root: blockRootHex }, e); } } if (persistCount > 0) { this.logger.verbose("Persisted checkpoint states", { slot: blockSlot, root: blockRootHex, persistCount, persistEpochs: persistEpochs.length, }); } return persistCount; } /** * Find a seed state to reload the state of provided checkpoint. Based on the design of n-historical state: * * ╔════════════════════════════════════╗═══════════════╗ * ║ persisted to db or fs ║ in memory ║ * ║ reload if needed ║ ║ * ║ -----------------------------------║---------------║ * ║ epoch: (n-2) (n-1) ║ n (n+1) ║ * ║ |-------|-------|----║--|-------|----║ * ║ ^ ^ ║ ^ ^ ║ * ║ ║ ^ ^ ║ * ╚════════════════════════════════════╝═══════════════╝ * * we always reload an epoch in the past. We'll start with epoch n then (n+1) prioritizing ones with the same view of `reloadedCp`. * * Use seed state from the block cache if cannot find any seed states within this cache. */ findSeedStateToReload(reloadedCp) { const maxEpoch = Math.max(...Array.from(this.epochIndex.keys())); const reloadedCpSlot = computeStartSlotAtEpoch(reloadedCp.epoch); let firstState = null; const logCtx = { reloadedCpEpoch: reloadedCp.epoch, reloadedCpRoot: reloadedCp.rootHex }; // no need to check epochs before `maxEpoch - this.maxEpochsInMemory + 1` before they are all persisted for (let epoch = maxEpoch - this.maxEpochsInMemory + 1; epoch <= maxEpoch; epoch++) { // if there's at least 1 state in memory in an epoch, just return the 1st one if (firstState !== null) { return firstState; } for (const rootHex of this.epochIndex.get(epoch) || []) { const cpKey = toCacheKey({ rootHex, epoch }); const cacheItem = this.cache.get(cpKey); if (cacheItem === undefined) { continue; } if (isInMemoryCacheItem(cacheItem)) { const { state } = cacheItem; if (firstState === null) { firstState = state; } const cpLog = { cpEpoch: epoch, cpRoot: rootHex }; try { // amongst states of the same epoch, choose the one with the same view of reloadedCp if (reloadedCpSlot < state.slot && toRootHex(state.getBlockRootAtSlot(reloadedCpSlot)) === reloadedCp.rootHex) { this.logger.verbose("Reload: use checkpoint state as seed state", { ...cpLog, ...logCtx }); return state; } } catch (e) { // getBlockRootAtSlot may throw error this.logger.debug("Error finding checkpoint state to reload", { ...cpLog, ...logCtx }, e); } } } } // fallback to using the default seed state from block state cache const seedBlockState = this.blockStateCache.getSeedState(); this.logger.verbose("Reload: use default block state as seed state", { stateSlot: seedBlockState.slot, ...logCtx }); return seedBlockState; } clear() { this.cache.clear(); this.epochIndex.clear(); } /** ONLY FOR DEBUGGING PURPOSES. For lodestar debug API */ dumpSummary() { return Array.from(this.cache.keys()).map((key) => { const cp = fromCacheKey(key); // TODO: add checkpoint key and persistent key to the summary return { slot: computeStartSlotAtEpoch(cp.epoch), root: cp.rootHex, reads: this.cache.readCount.get(key) ?? 0, lastRead: this.cache.lastRead.get(key) ?? 0, checkpointState: true, }; }); } getStates() { const items = Array.from(this.cache.values()) .filter(isInMemoryCacheItem) .map((item) => item.state); return items.values(); } /** ONLY FOR DEBUGGING PURPOSES. For spec tests on error */ dumpCheckpointKeys() { return Array.from(this.cache.keys()); } /** * Prune or persist checkpoint states in an epoch * 1) If there is 1 checkpoint state with known root, persist it. This is when there is skipped slot at block 0 of epoch * slot: n * |-----------------------|-----------------------| * PRCS root | * * 2) If there are 2 checkpoint states, PRCS and CRCS and both roots are known to this state, persist CRCS. If the block is reorged, * PRCS is regen and populated to this cache again. * slot: n * |-----------------------|-----------------------| * PRCS root - prune | * CRCS root - persist | * * 3) If there are any roots that unknown to this state, persist their cp state. This is to handle the current block is reorged later * * 4) (derived from above) If there are 2 checkpoint states, PRCS and an unknown root, persist both. * - In the example below block slot (n + 1) reorged n * - If we process state n + 1, CRCS is unknown to it * - we need to also store CRCS to handle the case (n+2) switches to n again * * PRCS - persist * | processState() * | | * -------------n+1 * / | * n-1 ------n------------n+2 * | * CRCS - persist * * - PRCS is the checkpoint state that could be justified/finalized later based on the view of the state * - unknown root checkpoint state is persisted to handle the reorg back to that branch later * * Performance note: * - In normal condition, we persist 1 checkpoint state per epoch. * - In reorged condition, we may persist multiple (most likely 2) checkpoint states per epoch. */ async processPastEpoch(blockRootHex, state, epoch) { let persistCount = 0; const epochBoundarySlot = computeStartSlotAtEpoch(epoch); const epochBoundaryRoot = epochBoundarySlot === state.slot ? fromHex(blockRootHex) : state.getBlockRootAtSlot(epochBoundarySlot); const epochBoundaryHex = toRootHex(epochBoundaryRoot); const prevEpochRoot = toRootHex(state.getBlockRootAtSlot(epochBoundarySlot - 1)); // for each epoch, usually there are 2 rootHexes respective to the 2 checkpoint states: Previous Root Checkpoint State and Current Root Checkpoint State const cpRootHexes = this.epochIndex.get(epoch) ?? []; const persistedRootHexes = new Set(); // 1) if there is no CRCS, persist PRCS (block 0 of epoch is skipped). In this case prevEpochRoot === epochBoundaryHex // 2) if there are PRCS and CRCS, persist CRCS => persist CRCS // => this is simplified to always persist epochBoundaryHex persistedRootHexes.add(epochBoundaryHex); // 3) persist any states with unknown roots to this state for (const rootHex of cpRootHexes) { if (rootHex !== epochBoundaryHex && rootHex !== prevEpochRoot) { persistedRootHexes.add(rootHex); } } for (const rootHex of cpRootHexes) { const cpKey = toCacheKey({ epoch: epoch, rootHex }); const cacheItem = this.cache.get(cpKey); if (cacheItem !== undefined && isInMemoryCacheItem(cacheItem)) { let { persistedKey } = cacheItem; const { state } = cacheItem; const logMeta = { stateSlot: state.slot, rootHex, epochBoundaryHex, persistedKey: persistedKey ? toHex(persistedKey) : "", }; if (persistedRootHexes.has(rootHex)) { if (persistedKey) { // we don't care if the checkpoint state is already persisted this.logger.verbose("Pruned checkpoint state from memory but no need to persist", logMeta); } else { // persist and do not update epochIndex this.metrics?.cpStateCache.statePersistSecFromSlot.observe(this.clock?.secFromSlot(this.clock?.currentSlot ?? 0) ?? 0); const cpPersist = { epoch: epoch, root: fromHex(rootHex) }; // It's not sustainable to allocate ~240MB for each state every epoch, so we use buffer pool to reuse the memory. // As monitored on holesky as of Jan 2024: // - This does not increase heap allocation while gc time is the same // - It helps stabilize persist time and save ~300ms in average (1.5s vs 1.2s) // - It also helps the state reload to save ~500ms in average (4.3s vs 3.8s) // - Also `serializeState.test.ts` perf test shows a lot of differences allocating ~240MB once vs per state serialization const timer = this.metrics?.stateSerializeDuration.startTimer({ source: AllocSource.PERSISTENT_CHECKPOINTS_CACHE_STATE, }); persistedKey = await serializeState(state, AllocSource.PERSISTENT_CHECKPOINTS_CACHE_STATE, (stateBytes) => { timer?.(); return this.datastore.write(cpPersist, stateBytes); }, this.bufferPool); persistCount++; this.logger.verbose("Pruned checkpoint state from memory and persisted to disk", { ...logMeta, persistedKey: toHex(persistedKey), }); } // overwrite cpKey, this means the state is deleted from memory this.cache.set(cpKey, { type: CacheItemType.persisted, value: persistedKey }); } else { if (persistedKey) { // persisted file will be eventually deleted by the archive task // this also means the state is deleted from memory this.cache.set(cpKey, { type: CacheItemType.persisted, value: persistedKey }); // do not update epochIndex } else { // delete the state from memory this.cache.delete(cpKey); const rootSet = this.epochIndex.get(epoch); if (rootSet) { rootSet.delete(rootHex); if (rootSet.size === 0) { this.epochIndex.delete(epoch); } } } this.metrics?.cpStateCache.statePruneFromMemoryCount.inc(); this.logger.verbose("Pruned checkpoint state from memory", logMeta); } } } return persistCount; } /** * Delete all items of an epoch from disk and memory */ async deleteAllEpochItems(epoch) { let persistCount = 0; const rootHexes = this.epochIndex.get(epoch) || []; for (const rootHex of rootHexes) { const key = toCacheKey({ rootHex, epoch }); const cacheItem = this.cache.get(key); if (cacheItem) { const persistedKey = isPersistedCacheItem(cacheItem) ? cacheItem.value : cacheItem.persistedKey; if (persistedKey) { await this.datastore.remove(persistedKey); persistCount++; this.metrics?.cpStateCache.persistedStateRemoveCount.inc(); } } this.cache.delete(key); } this.epochIndex.delete(epoch); this.logger.verbose("Pruned checkpoint states for epoch", { epoch, persistCount, rootHexes: Array.from(rootHexes).join(","), }); } /** * Prune persisted checkpoint states from disk. * Note that this should handle all possible errors and not throw. */ prunePersistedStates() { // epochsOnDisk epochsInMemory // |----------------------------------------------------------|----------------------| const maxTrackedEpochs = this.maxEpochsOnDisk + this.maxEpochsInMemory; if (this.epochIndex.size <= maxTrackedEpochs) { return; } const sortedEpochs = Array.from(this.epochIndex.keys()).sort((a, b) => a - b); const pruneEpochs = sortedEpochs.slice(0, sortedEpochs.length - maxTrackedEpochs); for (const epoch of pruneEpochs) { this.deleteAllEpochItems(epoch).catch((e) => this.logger.debug("Error delete all epoch items", { epoch, maxEpochsOnDisk: this.maxEpochsOnDisk, maxEpochsInMemory: this.maxEpochsInMemory }, e)); } } /** * Serialize validators to bytes leveraging the buffer pool to save memory allocation. * - As monitored on holesky as of Jan 2024, it helps save ~500ms state reload time (4.3s vs 3.8s) * - Also `serializeState.test.ts` perf test shows a lot of differences allocating validators bytes once vs every time, * This is 2x - 3x faster than allocating memory every time. */ serializeStateValidators(state) { const size = state.serializedValidatorsSize(); if (this.bufferPool) { const bufferWithKey = this.bufferPool.alloc(size, AllocSource.PERSISTENT_CHECKPOINTS_CACHE_VALIDATORS); if (bufferWithKey) { const validatorsBytes = bufferWithKey.buffer; const dataView = new DataView(validatorsBytes.buffer, validatorsBytes.byteOffset, validatorsBytes.byteLength); state.serializeValidatorsToBytes({ uint8Array: validatorsBytes, dataView }, 0); return bufferWithKey; } } return null; } } export function toCheckpointHex(checkpoint) { return { epoch: checkpoint.epoch, rootHex: toRootHex(checkpoint.root), }; } export function toCheckpointKey(cp) { return `${cp.rootHex}:${cp.epoch}`; } function toCacheKey(cp) { return `${cp.rootHex}_${cp.epoch}`; } function fromCacheKey(key) { const [rootHex, epoch] = key.split("_"); return { rootHex, epoch: Number(epoch), }; } function isBeaconStateView(stateOrBytes) { return stateOrBytes.slot !== undefined; } function isInMemoryCacheItem(cacheItem) { return cacheItem.type === CacheItemType.inMemory; } function isPersistedCacheItem(cacheItem) { return cacheItem.type === CacheItemType.persisted; } //# sourceMappingURL=persistentCheckpointsCache.js.map