UNPKG

@ethereumjs/binarytree

Version:
739 lines (660 loc) 27.1 kB
import { EthereumJSErrorWithoutCode, Lock, bitsToBytes, bytesToBits, bytesToHex, concatBytes, equalsBits, equalsBytes, matchingBitsLength, setLengthRight, } from '@ethereumjs/util' import debug from 'debug' import { CheckpointDB } from './db/index.ts' import { InternalBinaryNode } from './node/internalNode.ts' import { StemBinaryNode } from './node/stemNode.ts' import { decodeBinaryNode, isInternalBinaryNode, isStemBinaryNode } from './node/util.ts' import { type BinaryTreeOpts, ROOT_DB_KEY } from './types.ts' import type { PutBatch } from '@ethereumjs/util' import type { Debugger } from 'debug' import type { BinaryNode } from './node/types.ts' interface Path { node: BinaryNode | null remaining: number[] stack: Array<[BinaryNode, number[]]> } /** * The basic binary tree interface, use with `import { BinaryTree } from '@ethereumjs/binarytree'`. */ export class BinaryTree { /** The options for instantiating the binary tree */ protected _opts: BinaryTreeOpts /** The root for an empty tree */ EMPTY_TREE_ROOT: Uint8Array protected _db!: CheckpointDB protected _hashLen: number protected _lock = new Lock() protected _root: Uint8Array protected DEBUG: boolean protected _debug: Debugger = debug('binarytree:#') protected debug: (...args: any) => void /** * Creates a new binary tree. * @param opts Options for instantiating the binary tree * * Note: in most cases, the static {@link createBinaryTree} constructor should be used. It uses the same API but provides sensible defaults */ constructor(opts: BinaryTreeOpts) { this._opts = opts if (opts.db instanceof CheckpointDB) { throw EthereumJSErrorWithoutCode('Cannot pass in an instance of CheckpointDB') } this._db = new CheckpointDB({ db: opts.db, cacheSize: opts.cacheSize }) this.EMPTY_TREE_ROOT = new Uint8Array(32) this._hashLen = 32 this._root = this.EMPTY_TREE_ROOT if (opts?.root) { this.root(opts.root) } this.DEBUG = typeof window === 'undefined' ? (process?.env?.DEBUG?.includes('ethjs') ?? false) : false this.debug = this.DEBUG ? (message: string, namespaces: string[] = []) => { let log = this._debug for (const name of namespaces) { log = log.extend(name) } log(message) } : (..._: any) => {} this.DEBUG && this.debug(`Trie created: || Root: ${bytesToHex(this._root)} || Persistent: ${this._opts.useRootPersistence} || CacheSize: ${this._opts.cacheSize} || ----------------`) } /** * Gets and/or Sets the current root of the `tree` */ root(value?: Uint8Array | null): Uint8Array { if (value !== undefined) { if (value === null) { value = this.EMPTY_TREE_ROOT } if (value.length !== this._hashLen) { throw EthereumJSErrorWithoutCode(`Invalid root length. Roots are ${this._hashLen} bytes`) } this._root = value } return this._root } /** * Checks if a given root exists. */ async checkRoot(root: Uint8Array): Promise<boolean> { try { const value = await this._db.get(root) return value !== undefined } catch (error: any) { if (error.message === 'Missing node in DB') { return equalsBytes(root, this.EMPTY_TREE_ROOT) } else { throw error } } } /** * Gets values at a given binary tree `stem` and set of suffixes * @param stem - the stem of the stem node where we're seeking values * @param suffixes - an array of suffixes corresponding to the values desired * @returns A Promise that resolves to an array of `Uint8Array`s or `null` depending on if values were found. * If the stem is not found, will return an empty array. */ async get(stem: Uint8Array, suffixes: number[]): Promise<(Uint8Array | null)[]> { if (stem.length !== 31) throw EthereumJSErrorWithoutCode(`expected stem with length 31; got ${stem.length}`) this.DEBUG && this.debug(`Stem: ${bytesToHex(stem)}; Suffix: ${suffixes}`, ['get']) const stemPath = await this.findPath(stem) if (stemPath.node instanceof StemBinaryNode) { // The retrieved stem node contains an array of 256 possible values. // We read all the suffixes to get the desired values const values = [] for (const suffix of suffixes) { const value = stemPath.node.getValue(suffix) this.DEBUG && this.debug(`Suffix: ${suffix}; Value: ${value === null ? 'null' : bytesToHex(value)}`, [ 'get', ]) values.push(value) } return values } return [] } /** * Stores a given `value` at the given `key` or performs a deletion if `value` is null. * @param stem - the stem (must be 31 bytes) to store the value at. * @param suffixes - array of suffixes at which to store individual values. * @param values - the value(s) to store (or null for deletion). * @returns A Promise that resolves once the value is stored. */ async put(stem: Uint8Array, suffixes: number[], values: (Uint8Array | null)[]): Promise<void> { if (stem.length !== 31) throw EthereumJSErrorWithoutCode(`expected stem with length 31, got ${stem.length}`) if (values.length > 0 && values.length !== suffixes.length) throw EthereumJSErrorWithoutCode( `expected number of values (${values.length}) to equal number of suffixes (${suffixes.length})`, ) this.DEBUG && this.debug(`Stem: ${bytesToHex(stem)}`, ['put']) const putStack: [Uint8Array, BinaryNode | null][] = [] // A stack of updated nodes starting with the stem node being updated/created to be saved to the DB // If the tree is empty, initialize it. if (equalsBytes(this.root(), this.EMPTY_TREE_ROOT)) { await this._createInitialNode(stem, suffixes, values) return } // Find the path to the node (or the nearest node) for the given stem. const foundPath = await this.findPath(stem) // We should always at least get the root node back if (foundPath.stack.length === 0) throw EthereumJSErrorWithoutCode(`Root node not found in trie`) // Step 1) Create or update the stem node let stemNode: StemBinaryNode // If we found a stem node with the same stem, we'll update it. if ( foundPath.node && isStemBinaryNode(foundPath.node) && equalsBytes(foundPath.node.stem, stem) ) { stemNode = foundPath.node } else { // Otherwise, we'll create a new stem node. stemNode = StemBinaryNode.create(stem) this.DEBUG && this.debug(`Creating new stem node for stem: ${bytesToHex(stem)}`, ['put']) } // Update the values in the stem node for (let i = 0; i < suffixes.length; i++) { const suffix = suffixes[i] const value = values[i] stemNode.setValue(suffix, value) this.DEBUG && this.debug( `Setting value for suffix: ${suffix} to value: ${value instanceof Uint8Array ? bytesToHex(value) : value} at stem node with stem: ${bytesToHex(stem)}`, ['put'], ) } // If all values are null then we treat this as a deletion. if (stemNode.values.every((val) => val === null)) { if (foundPath.node !== null) { this.DEBUG && this.debug(`Deleting stem node for stem: ${bytesToHex(stem)}`, ['put']) putStack.push([this.merkelize(stemNode), null]) } else { return // nothing to delete } } else { // Otherwise, we add the new or updated stemNode to the putStack putStack.push([this.merkelize(stemNode), stemNode]) } // Get the bit representation of the stem. const stemBits = bytesToBits(stemNode.stem) // We keep a reference to the current "parent" node path as we update up the tree. let lastUpdatedParentPath: number[] = [] // Step 2: Add any needed new internal nodes if inserting a new stem. // If updating an existing stem, just update the parent internal node reference if (foundPath.stack.length > 1) { // Pop the nearest node on the path. const [nearestNode, nearestNodePath] = foundPath.stack.pop()! const parentPath = foundPath.stack[foundPath.stack.length - 1]?.[1] ?? [] this.DEBUG && this.debug(`Adding necessary internal nodes.`, ['put']) // Update the parent branch if necessary. // If an update was necessary, updateBranch returns a stack of internal nodes // that connect the new stem node to the previous parent inner node const updated = this.updateBranch(stemNode, nearestNode, nearestNodePath, parentPath) if (updated !== undefined) { for (const update of updated) { putStack.push([this.merkelize(update.node), update.node]) lastUpdatedParentPath = update.parentPath } } } // Step 3: Update remaining parent node hashes while (foundPath.stack.length > 1) { const [node, path] = foundPath.stack.pop()! if (isInternalBinaryNode(node)) { // Set child pointer to the last internal node in the putStack (last updated internal node) node.setChild(lastUpdatedParentPath[lastUpdatedParentPath.length - 1], { hash: putStack[putStack.length - 1][0], // Reuse hash already computed above path: lastUpdatedParentPath, }) putStack.push([this.merkelize(node), node]) // Update node hash and add to putStack lastUpdatedParentPath = path this.DEBUG && this.debug(`Updated parent internal node hash for path ${path.join(',')}`, ['put']) } else { throw EthereumJSErrorWithoutCode( `Expected internal node at path ${path.join(',')}, got ${node}`, ) } } // Step 4: Update the root node. let rootNode = foundPath.stack.pop()![0] // The root node. const childReference = putStack[putStack.length - 1][1] if (isStemBinaryNode(rootNode)) { // If the root is a stem node but its stem differs from the one we're updating, // then we need to split the root. Per the spec, when two stems share a common prefix, // we create one internal node per bit in that common prefix, and then at the first // divergence, an internal node that points to both stem nodes. if (!equalsBytes(rootNode.stem, stem)) { this.DEBUG && this.debug(`Root stem differs from new stem. Splitting root.`, ['put']) const rootBits = bytesToBits(rootNode.stem) const commonPrefixLength = matchingBitsLength(rootBits, stemBits) // Create the split node at the divergence bit. const splitNode = InternalBinaryNode.create() const branchForNew = stemBits[commonPrefixLength] const branchForExisting = rootBits[commonPrefixLength] splitNode.setChild(branchForNew, { hash: this.merkelize(stemNode), path: stemBits, }) splitNode.setChild(branchForExisting, { hash: this.merkelize(rootNode), path: rootBits, }) let newRoot = splitNode // If there is a common prefix (i.e. commonPrefixLength > 0), we build a chain // of internal nodes representing that prefix. for (let depth = commonPrefixLength - 1; depth >= 0; depth--) { this.DEBUG && this.debug(`Creating internal node at depth ${depth}`, ['put']) putStack.push([this.merkelize(newRoot), newRoot]) const parent = InternalBinaryNode.create() // At each level, the branch is determined by the bit of the new stem at position i. parent.setChild(stemBits[depth], { hash: this.merkelize(newRoot), path: stemBits.slice(0, depth + 1), }) newRoot = parent } // Now newRoot is an internal node chain that represents the entire common prefix, // ending in a split node that distinguishes the two different stems. rootNode = newRoot } } else { // For an internal root node, we assign the last update child reference to the root. if (childReference !== null) { rootNode.setChild( stemBits[0], childReference !== null ? { hash: this.merkelize(childReference), path: isStemBinaryNode(childReference) ? stemBits : lastUpdatedParentPath, } : null, ) } } this.root(this.merkelize(rootNode)) putStack.push([this._root, rootNode]) this.DEBUG && this.debug(`Updated root hash to ${bytesToHex(this._root)}`, ['put']) await this.saveStack(putStack) } /** * Helper method for updating or creating the parent internal node for a given stem node. * If the nearest node is a stem node with a different stem, a new internal node is created * to branch at the first differing bit. * If the nearest node is an internal node, its child reference is updated. * * @param stemNode - The child stem node that will be referenced by the new/updated internal node. * @param nearestNode - The nearest node to the new stem node. * @param pathToNode - The path (in bits) to `nearestNode` as known from the trie. * @returns An array of nodes and their partial paths from the new stem node to the branch parent node * or `undefined` if no changes were made. */ updateBranch( stemNode: StemBinaryNode, nearestNode: BinaryNode, pathToNode: number[], pathToParent: number[], ): { node: BinaryNode; parentPath: number[] }[] | undefined { const stemBits = bytesToBits(stemNode.stem) if (isStemBinaryNode(nearestNode)) { // For two different stems, find the first differing bit. const nearestNodeStemBits = bytesToBits(nearestNode.stem) const diffIndex = matchingBitsLength(stemBits, nearestNodeStemBits) const parentDiffIndex = matchingBitsLength(pathToNode, pathToParent) const newInternal = InternalBinaryNode.create() // Set the child pointer for the new stem node using the bit at diffIndex. newInternal.setChild(stemBits[diffIndex], { hash: this.merkelize(stemNode), path: stemBits, }) // Set the child pointer for the existing stem node. newInternal.setChild(nearestNodeStemBits[diffIndex], { hash: this.merkelize(nearestNode), path: nearestNodeStemBits, }) const putStack = [{ node: newInternal, parentPath: stemBits.slice(0, diffIndex) }] let parent = newInternal for (let depth = diffIndex - 1; depth > parentDiffIndex; depth--) { this.DEBUG && this.debug(`Creating internal node at depth ${depth}`, ['put']) const newParent = InternalBinaryNode.create() // At each level, the branch is determined by the bit of the new stem at position i. newParent.setChild(stemBits[depth], { hash: this.merkelize(parent), path: stemBits.slice(0, depth + 1), }) putStack.push({ node: newParent, parentPath: stemBits.slice(0, depth) }) parent = newParent } // Return the stack of new internal nodes that connect the new stem node to the previous parent inner node return putStack } else if (isInternalBinaryNode(nearestNode)) { // For an internal node, determine the branch index using the parent's known path length. const branchIndex = stemBits[pathToNode.length] nearestNode.setChild(branchIndex, { hash: this.merkelize(stemNode), path: stemBits, }) return [{ node: nearestNode, parentPath: pathToNode }] } return undefined } /** * Tries to find a path to the node for the given key. * It returns a `Path` object containing: * - `node`: the found node (if any), * - `stack`: an array of tuples [node, path] representing the nodes encountered, * - `remaining`: the bits of the key that were not matched. * * @param keyInBytes - the search key as a byte array. * @returns A Promise that resolves to a Path object. */ async findPath(keyInBytes: Uint8Array): Promise<Path> { const keyInBits = bytesToBits(keyInBytes) this.DEBUG && this.debug(`Searching for key: ${bytesToHex(keyInBytes)}`, ['find_path']) const result: Path = { node: null, stack: [], remaining: keyInBits, } // If tree is empty, return empty path. if (equalsBytes(this.root(), this.EMPTY_TREE_ROOT)) return result // Get the root node. let rawNode = await this._db.get(this.root()) if (rawNode === undefined) throw EthereumJSErrorWithoutCode('root node should exist') const rootNode = decodeBinaryNode(rawNode) this.DEBUG && this.debug(`Starting with Root Node: [${bytesToHex(this.root())}]`, ['find_path']) // Treat the root as being at an empty path. result.stack.push([rootNode, []]) // If the root node is a stem node, we're done. if (isStemBinaryNode(rootNode)) { this.DEBUG && this.debug(`Found stem node at root.`, ['find_path']) if (equalsBytes(keyInBytes, rootNode.stem)) { result.node = rootNode result.remaining = [] } return result } // The root is an internal node. Determine the branch to follow using the first bit of the key let childNode = rootNode.getChild(keyInBits[0]) let finished = false while (!finished) { if (childNode === null) break // Look up child node by its node hash. rawNode = await this._db.get(childNode.hash) if (rawNode === undefined) throw EthereumJSErrorWithoutCode(`missing node at ${childNode.path}`) const decodedNode = decodeBinaryNode(rawNode) // Determine how many bits match between keyInBits and the stored path in childNode. const matchingKeyLength = matchingBitsLength(keyInBits, childNode.path) // If we have an exact match (i.e. the stored path equals a prefix of the key) // and either the key is fully consumed or we have reached a stem node, we stop. if ( matchingKeyLength === childNode.path.length && (matchingKeyLength === keyInBits.length || isStemBinaryNode(decodedNode)) ) { finished = true if ( matchingKeyLength === keyInBits.length && equalsBits(keyInBits, childNode.path) === true ) { // We found the sought node this.DEBUG && this.debug( `Path ${bytesToHex(keyInBytes)} - found full path to node ${bytesToHex( this.merkelize(decodedNode), )}.`, ['find_path'], ) result.node = decodedNode result.remaining = [] return result } // We didn't find the sought node so record the unmatched tail of the key. result.remaining = keyInBits.slice(matchingKeyLength) result.stack.push([decodedNode, childNode.path]) return result } // Otherwise, push this internal node and continue. result.stack.push([decodedNode, keyInBits.slice(0, matchingKeyLength)]) this.DEBUG && this.debug( `Partial Path ${keyInBits.slice(0, matchingKeyLength)} - found next node in path ${bytesToHex( this.merkelize(decodedNode), )}.`, ['find_path'], ) // If the decoded node is not internal, then we cannot traverse further. if (!isInternalBinaryNode(decodedNode)) { result.remaining = keyInBits.slice(matchingKeyLength) finished = true break } // The next branch is determined by the next bit after the matched prefix. const childIndex = keyInBits[matchingKeyLength] childNode = decodedNode.getChild(childIndex) if (childNode === null) { result.remaining = keyInBits.slice(matchingKeyLength) finished = true } } this.DEBUG && this.debug( `Found partial path ${bytesToHex(bitsToBytes(keyInBits.slice(256 - result.remaining.length)))} but sought node is not present in trie.`, ['find_path'], ) return result } /** * Deletes a given `key` from the tree. * @param stem - the stem of the stem node to delete from * @param suffixes - the suffixes to delete * @returns A Promise that resolves once the key is deleted. */ async del(stem: Uint8Array, suffixes: number[]): Promise<void> { this.DEBUG && this.debug(`Stem: ${bytesToHex(stem)}; Suffix(es): ${suffixes}`, ['del']) await this.put(stem, suffixes, new Array(suffixes.length).fill(null)) } /** * Create empty root node for initializing an empty tree. */ async createRootNode(): Promise<void> { const rootNode = null this.DEBUG && this.debug(`No root node. Creating new root node`, ['initialize']) this.root(this.merkelize(rootNode)) await this.saveStack([[this.root(), rootNode]]) return } /** * Creates the initial node from an empty tree. * @private */ protected async _createInitialNode( stem: Uint8Array, indexes: number[], values: (Uint8Array | null)[], ): Promise<void> { const initialNode = StemBinaryNode.create(stem) for (let i = 0; i < indexes.length; i++) { initialNode.setValue(indexes[i], values[i]) } this.root(this.merkelize(initialNode)) await this._db.put(this.root(), initialNode.serialize()) await this.persistRoot() } /** * Saves a stack of nodes to the database. * * @param putStack - an array of tuples of keys (the partial path of the node in the trie) and nodes (BinaryNodes) */ async saveStack(putStack: [Uint8Array, BinaryNode | null][]): Promise<void> { const opStack = putStack.map(([key, node]) => { return { type: node !== null ? 'put' : 'del', key, value: node !== null ? node.serialize() : null, } as PutBatch }) await this._db.batch(opStack) } /** * Creates a proof from a tree and key that can be verified using {@link BinaryTree.verifyBinaryProof}. * @param key a 32 byte binary tree key (31 byte stem + 1 byte suffix) */ async createBinaryProof(key: Uint8Array): Promise<Uint8Array[]> { this.DEBUG && this.debug(`creating proof for ${bytesToHex(key)}`, ['create_proof']) // We only use the stem (i.e. the first 31 bytes) to find the path to the node const { node, stack } = await this.findPath(key.slice(0, 31)) const proof = stack.map(([node, _]) => node.serialize()) if (node !== null) { // If node is found, add node to proof proof.push(node.serialize()) } return proof } /** * The `data` event is given an `Object` that has two properties; the `key` and the `value`. Both should be Uint8Arrays. * @return Returns a [stream](https://nodejs.org/dist/latest-v12.x/docs/api/stream.html#stream_class_stream_readable) of the contents of the `tree` */ createReadStream(): any { throw EthereumJSErrorWithoutCode('Not implemented') } /** * Returns a copy of the underlying tree. * * Note on db: the copy will create a reference to the * same underlying database. * * Note on cache: for memory reasons a copy will not * recreate a new LRU cache but initialize with cache * being deactivated. * * @param includeCheckpoints - If true and during a checkpoint, the copy will contain the checkpointing metadata and will use the same scratch as underlying db. */ shallowCopy(includeCheckpoints = true): BinaryTree { const tree = new BinaryTree({ ...this._opts, db: this._db.db.shallowCopy(), root: this.root(), cacheSize: 0, }) if (includeCheckpoints && this.hasCheckpoints()) { tree._db.setCheckpoints(this._db.checkpoints) } return tree } /** * Persists the root hash in the underlying database */ async persistRoot() { if (this._opts.useRootPersistence === true) { await this._db.put(ROOT_DB_KEY, this.root()) } } /** * Is the tree during a checkpoint phase? */ hasCheckpoints() { return this._db.hasCheckpoints() } /** * Creates a checkpoint that can later be reverted to or committed. * After this is called, all changes can be reverted until `commit` is called. */ checkpoint() { this._db.checkpoint(this.root()) } /** * Commits a checkpoint to disk, if current checkpoint is not nested. * If nested, only sets the parent checkpoint as current checkpoint. * @throws If not during a checkpoint phase */ async commit(): Promise<void> { if (!this.hasCheckpoints()) { throw EthereumJSErrorWithoutCode('trying to commit when not checkpointed') } await this._lock.acquire() await this._db.commit() await this.persistRoot() this._lock.release() } /** * Reverts the tree to the state it was at when `checkpoint` was first called. * If during a nested checkpoint, sets root to most recent checkpoint, and sets * parent checkpoint as current. */ async revert(): Promise<void> { if (!this.hasCheckpoints()) { throw EthereumJSErrorWithoutCode('trying to revert when not checkpointed') } await this._lock.acquire() this.root(await this._db.revert()) await this.persistRoot() this._lock.release() } /** * Flushes all checkpoints, restoring the initial checkpoint state. */ flushCheckpoints() { this._db.checkpoints = [] } protected hash(msg: Uint8Array | null): Uint8Array { // As per spec, if value is null or a 64-byte array of 0s, hash(msg) is a 32-byte array of 0s if (msg === null || (msg.length === 64 && msg.every((byte) => byte === 0))) { return new Uint8Array(32) } if (msg.length !== 32 && msg.length !== 64) { throw EthereumJSErrorWithoutCode('Data must be 32 or 64 bytes') } return Uint8Array.from(this._opts.hashFunction.call(undefined, msg)) } protected merkelize(node: BinaryNode | null): Uint8Array { if (node === null) { return new Uint8Array(32) } if (isInternalBinaryNode(node)) { const [leftChild, rightChild] = node.children return this.hash( concatBytes( leftChild === null ? this.hash(null) : leftChild.hash, rightChild === null ? this.hash(null) : rightChild.hash, ), ) } // Otherwise, it's a stem node. // Map each value in node.values through the hash function. let currentLayerHashes = node.values.map((value) => this.hash(value)) // While there is more than one hash at the current layer, combine them pairwise. while (currentLayerHashes.length > 1) { const newLayerHashes = [] for (let i = 0; i < currentLayerHashes.length; i += 2) { newLayerHashes.push( this.hash(concatBytes(currentLayerHashes[i], currentLayerHashes[i + 1])), ) } currentLayerHashes = newLayerHashes } // Return the hash of the concatenation of node.stem appended with 00 and the final level hash. return this.hash(concatBytes(setLengthRight(node.stem, 32), currentLayerHashes[0])) } }