UNPKG

@atproto/repo

Version:

atproto repo and MST implementation

865 lines 31.4 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); __setModuleDefault(result, mod); return result; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.Leaf = exports.MST = exports.nodeDataDef = void 0; const zod_1 = require("zod"); const common_1 = require("@atproto/common"); const block_map_1 = require("../block-map"); const cid_set_1 = require("../cid-set"); const error_1 = require("../error"); const parse = __importStar(require("../parse")); const util = __importStar(require("./util")); /** * This is an implementation of a Merkle Search Tree (MST) * The data structure is described here: https://hal.inria.fr/hal-02303490/document * The MST is an ordered, insert-order-independent, deterministic tree. * Keys are laid out in alphabetic order. * The key insight of an MST is that each key is hashed and starting 0s are counted * to determine which layer it falls on (5 zeros for ~32 fanout). * This is a merkle tree, so each subtree is referred to by it's hash (CID). * When a leaf is changed, ever tree on the path to that leaf is changed as well, * thereby updating the root hash. * * For atproto, we use SHA-256 as the key hashing algorithm, and ~4 fanout * (2-bits of zero per layer). */ /** * A couple notes on CBOR encoding: * * There are never two neighboring subtrees. * Therefore, we can represent a node as an array of * leaves & pointers to their right neighbor (possibly null), * along with a pointer to the left-most subtree (also possibly null). * * Most keys in a subtree will have overlap. * We do compression on prefixes by describing keys as: * - the length of the prefix that it shares in common with the preceding key * - the rest of the string * * For example: * If the first leaf in a tree is `bsky/posts/abcdefg` and the second is `bsky/posts/abcdehi` * Then the first will be described as `prefix: 0, key: 'bsky/posts/abcdefg'`, * and the second will be described as `prefix: 16, key: 'hi'.` */ const subTreePointer = zod_1.z.nullable(common_1.schema.cid); const treeEntry = zod_1.z.object({ p: zod_1.z.number(), // prefix count of ascii chars that this key shares with the prev key k: common_1.schema.bytes, // the rest of the key outside the shared prefix v: common_1.schema.cid, // value t: subTreePointer, // next subtree (to the right of leaf) }); const nodeData = zod_1.z.object({ l: subTreePointer, // left-most subtree e: zod_1.z.array(treeEntry), //entries }); exports.nodeDataDef = { name: 'mst node', schema: nodeData, }; class MST { constructor(storage, pointer, entries, layer) { Object.defineProperty(this, "storage", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "entries", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "layer", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "pointer", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "outdatedPointer", { enumerable: true, configurable: true, writable: true, value: false }); this.storage = storage; this.entries = entries; this.layer = layer; this.pointer = pointer; } static async create(storage, entries = [], opts) { const pointer = await util.cidForEntries(entries); const { layer = null } = opts || {}; return new MST(storage, pointer, entries, layer); } static async fromData(storage, data, opts) { const { layer = null } = opts || {}; const entries = await util.deserializeNodeData(storage, data, opts); const pointer = await (0, common_1.cidForCbor)(data); return new MST(storage, pointer, entries, layer); } // this is really a *lazy* load, doesn't actually touch storage static load(storage, cid, opts) { const { layer = null } = opts || {}; return new MST(storage, cid, null, layer); } // Immutability // ------------------- // We never mutate an MST, we just return a new MST with updated values async newTree(entries) { const mst = new MST(this.storage, this.pointer, entries, this.layer); mst.outdatedPointer = true; return mst; } // Getters (lazy load) // ------------------- // We don't want to load entries of every subtree, just the ones we need async getEntries() { if (this.entries) return [...this.entries]; if (this.pointer) { const data = await this.storage.readObj(this.pointer, exports.nodeDataDef); const firstLeaf = data.e[0]; const layer = firstLeaf !== undefined ? await util.leadingZerosOnHash(firstLeaf.k) : undefined; this.entries = await util.deserializeNodeData(this.storage, data, { layer, }); return this.entries; } throw new Error('No entries or CID provided'); } // We don't hash the node on every mutation for performance reasons // Instead we keep track of whether the pointer is outdated and only (recursively) calculate when needed async getPointer() { if (!this.outdatedPointer) return this.pointer; const { cid } = await this.serialize(); this.pointer = cid; this.outdatedPointer = false; return this.pointer; } async serialize() { let entries = await this.getEntries(); const outdated = entries.filter((e) => e.isTree() && e.outdatedPointer); if (outdated.length > 0) { await Promise.all(outdated.map((e) => e.getPointer())); entries = await this.getEntries(); } const data = util.serializeNodeData(entries); const block = await (0, common_1.dataToCborBlock)(data); return { cid: block.cid, bytes: block.bytes, }; } // In most cases, we get the layer of a node from a hint on creation // In the case of the topmost node in the tree, we look for a key in the node & determine the layer // In the case where we don't find one, we recurse down until we do. // If we still can't find one, then we have an empty tree and the node is layer 0 async getLayer() { this.layer = await this.attemptGetLayer(); if (this.layer === null) this.layer = 0; return this.layer; } async attemptGetLayer() { if (this.layer !== null) return this.layer; const entries = await this.getEntries(); let layer = await util.layerForEntries(entries); if (layer === null) { for (const entry of entries) { if (entry.isTree()) { const childLayer = await entry.attemptGetLayer(); if (childLayer !== null) { layer = childLayer + 1; break; } } } } if (layer !== null) this.layer = layer; return layer; } // Core functionality // ------------------- // Return the necessary blocks to persist the MST to repo storage async getUnstoredBlocks() { const blocks = new block_map_1.BlockMap(); const pointer = await this.getPointer(); const alreadyHas = await this.storage.has(pointer); if (alreadyHas) return { root: pointer, blocks }; const entries = await this.getEntries(); const data = util.serializeNodeData(entries); await blocks.add(data); for (const entry of entries) { if (entry.isTree()) { const subtree = await entry.getUnstoredBlocks(); blocks.addMap(subtree.blocks); } } return { root: pointer, blocks: blocks }; } // Adds a new leaf for the given key/value pair // Throws if a leaf with that key already exists async add(key, value, knownZeros) { util.ensureValidMstKey(key); const keyZeros = knownZeros ?? (await util.leadingZerosOnHash(key)); const layer = await this.getLayer(); const newLeaf = new Leaf(key, value); if (keyZeros === layer) { // it belongs in this layer const index = await this.findGtOrEqualLeafIndex(key); const found = await this.atIndex(index); if (found?.isLeaf() && found.key === key) { throw new Error(`There is already a value at key: ${key}`); } const prevNode = await this.atIndex(index - 1); if (!prevNode || prevNode.isLeaf()) { // if entry before is a leaf, (or we're on far left) we can just splice in return this.spliceIn(newLeaf, index); } else { // else we try to split the subtree around the key const splitSubTree = await prevNode.splitAround(key); return this.replaceWithSplit(index - 1, splitSubTree[0], newLeaf, splitSubTree[1]); } } else if (keyZeros < layer) { // it belongs on a lower layer const index = await this.findGtOrEqualLeafIndex(key); const prevNode = await this.atIndex(index - 1); if (prevNode && prevNode.isTree()) { // if entry before is a tree, we add it to that tree const newSubtree = await prevNode.add(key, value, keyZeros); return this.updateEntry(index - 1, newSubtree); } else { const subTree = await this.createChild(); const newSubTree = await subTree.add(key, value, keyZeros); return this.spliceIn(newSubTree, index); } } else { // it belongs on a higher layer & we must push the rest of the tree down const split = await this.splitAround(key); // if the newly added key has >=2 more leading zeros than the current highest layer // then we need to add in structural nodes in between as well let left = split[0]; let right = split[1]; const layer = await this.getLayer(); const extraLayersToAdd = keyZeros - layer; // intentionally starting at 1, since first layer is taken care of by split for (let i = 1; i < extraLayersToAdd; i++) { if (left !== null) { left = await left.createParent(); } if (right !== null) { right = await right.createParent(); } } const updated = []; if (left) updated.push(left); updated.push(new Leaf(key, value)); if (right) updated.push(right); const newRoot = await MST.create(this.storage, updated, { layer: keyZeros, }); newRoot.outdatedPointer = true; return newRoot; } } // Gets the value at the given key async get(key) { const index = await this.findGtOrEqualLeafIndex(key); const found = await this.atIndex(index); if (found && found.isLeaf() && found.key === key) { return found.value; } const prev = await this.atIndex(index - 1); if (prev && prev.isTree()) { return prev.get(key); } return null; } // Edits the value at the given key // Throws if the given key does not exist async update(key, value) { util.ensureValidMstKey(key); const index = await this.findGtOrEqualLeafIndex(key); const found = await this.atIndex(index); if (found && found.isLeaf() && found.key === key) { return this.updateEntry(index, new Leaf(key, value)); } const prev = await this.atIndex(index - 1); if (prev && prev.isTree()) { const updatedTree = await prev.update(key, value); return this.updateEntry(index - 1, updatedTree); } throw new Error(`Could not find a record with key: ${key}`); } // Deletes the value at the given key async delete(key) { const altered = await this.deleteRecurse(key); return altered.trimTop(); } async deleteRecurse(key) { const index = await this.findGtOrEqualLeafIndex(key); const found = await this.atIndex(index); // if found, remove it on this level if (found?.isLeaf() && found.key === key) { const prev = await this.atIndex(index - 1); const next = await this.atIndex(index + 1); if (prev?.isTree() && next?.isTree()) { const merged = await prev.appendMerge(next); return this.newTree([ ...(await this.slice(0, index - 1)), merged, ...(await this.slice(index + 2)), ]); } else { return this.removeEntry(index); } } // else recurse down to find it const prev = await this.atIndex(index - 1); if (prev?.isTree()) { const subtree = await prev.deleteRecurse(key); const subTreeEntries = await subtree.getEntries(); if (subTreeEntries.length === 0) { return this.removeEntry(index - 1); } else { return this.updateEntry(index - 1, subtree); } } else { throw new Error(`Could not find a record with key: ${key}`); } } // Simple Operations // ------------------- // update entry in place async updateEntry(index, entry) { const update = [ ...(await this.slice(0, index)), entry, ...(await this.slice(index + 1)), ]; return this.newTree(update); } // remove entry at index async removeEntry(index) { const updated = [ ...(await this.slice(0, index)), ...(await this.slice(index + 1)), ]; return this.newTree(updated); } // append entry to end of the node async append(entry) { const entries = await this.getEntries(); return this.newTree([...entries, entry]); } // prepend entry to start of the node async prepend(entry) { const entries = await this.getEntries(); return this.newTree([entry, ...entries]); } // returns entry at index async atIndex(index) { const entries = await this.getEntries(); return entries[index] ?? null; } // returns a slice of the node (like array.slice) async slice(start, end) { const entries = await this.getEntries(); return entries.slice(start, end); } // inserts entry at index async spliceIn(entry, index) { const update = [ ...(await this.slice(0, index)), entry, ...(await this.slice(index)), ]; return this.newTree(update); } // replaces an entry with [ Maybe(tree), Leaf, Maybe(tree) ] async replaceWithSplit(index, left, leaf, right) { const update = await this.slice(0, index); if (left) update.push(left); update.push(leaf); if (right) update.push(right); update.push(...(await this.slice(index + 1))); return this.newTree(update); } // if the topmost node in the tree only points to another tree, trim the top and return the subtree async trimTop() { let entries; try { entries = await this.getEntries(); } catch (err) { if (err instanceof error_1.MissingBlockError) { return this; } else { throw err; } } if (entries.length === 1 && entries[0].isTree()) { return entries[0].trimTop(); } else { return this; } } // Subtree & Splits // ------------------- // Recursively splits a sub tree around a given key async splitAround(key) { const index = await this.findGtOrEqualLeafIndex(key); // split tree around key const leftData = await this.slice(0, index); const rightData = await this.slice(index); let left = await this.newTree(leftData); let right = await this.newTree(rightData); // if the far right of the left side is a subtree, // we need to split it on the key as well const lastInLeft = leftData[leftData.length - 1]; if (lastInLeft?.isTree()) { left = await left.removeEntry(leftData.length - 1); const split = await lastInLeft.splitAround(key); if (split[0]) { left = await left.append(split[0]); } if (split[1]) { right = await right.prepend(split[1]); } } return [ (await left.getEntries()).length > 0 ? left : null, (await right.getEntries()).length > 0 ? right : null, ]; } // The simple merge case where every key in the right tree is greater than every key in the left tree // (used primarily for deletes) async appendMerge(toMerge) { if ((await this.getLayer()) !== (await toMerge.getLayer())) { throw new Error('Trying to merge two nodes from different layers of the MST'); } const thisEntries = await this.getEntries(); const toMergeEntries = await toMerge.getEntries(); const lastInLeft = thisEntries[thisEntries.length - 1]; const firstInRight = toMergeEntries[0]; if (lastInLeft?.isTree() && firstInRight?.isTree()) { const merged = await lastInLeft.appendMerge(firstInRight); return this.newTree([ ...thisEntries.slice(0, thisEntries.length - 1), merged, ...toMergeEntries.slice(1), ]); } else { return this.newTree([...thisEntries, ...toMergeEntries]); } } // Create relatives // ------------------- async createChild() { const layer = await this.getLayer(); return MST.create(this.storage, [], { layer: layer - 1, }); } async createParent() { const layer = await this.getLayer(); const parent = await MST.create(this.storage, [this], { layer: layer + 1, }); parent.outdatedPointer = true; return parent; } // Finding insertion points // ------------------- // finds index of first leaf node that is greater than or equal to the value async findGtOrEqualLeafIndex(key) { const entries = await this.getEntries(); const maybeIndex = entries.findIndex((entry) => entry.isLeaf() && entry.key >= key); // if we can't find, we're on the end return maybeIndex >= 0 ? maybeIndex : entries.length; } // List operations (partial tree traversal) // ------------------- // @TODO write tests for these // Walk tree starting at key async *walkFrom(key) { yield this; const index = await this.findGtOrEqualLeafIndex(key); const entries = await this.getEntries(); const found = entries[index]; if (found && found.isLeaf() && found.key === key) { yield found; } else { const prev = entries[index - 1]; if (prev) { if (prev.isLeaf() && prev.key === key) { yield prev; } else if (prev.isTree()) { yield* prev.walkFrom(key); } } } for (let i = index; i < entries.length; i++) { const entry = entries[i]; if (entry.isLeaf()) { yield entry; } else { yield* entry.walkFrom(key); } } } async *walkLeavesFrom(key) { for await (const node of this.walkFrom(key)) { if (node.isLeaf()) { yield node; } } } async list(count = Number.MAX_SAFE_INTEGER, after, before) { const vals = []; for await (const leaf of this.walkLeavesFrom(after || '')) { if (leaf.key === after) continue; if (vals.length >= count) break; if (before && leaf.key >= before) break; vals.push(leaf); } return vals; } async listWithPrefix(prefix, count = Number.MAX_SAFE_INTEGER) { const vals = []; for await (const leaf of this.walkLeavesFrom(prefix)) { if (vals.length >= count || !leaf.key.startsWith(prefix)) break; vals.push(leaf); } return vals; } // Full tree traversal // ------------------- // Walk full tree & emit nodes, consumer can bail at any point by returning false async *walk() { yield this; const entries = await this.getEntries(); for (const entry of entries) { if (entry.isTree()) { for await (const e of entry.walk()) { yield e; } } else { yield entry; } } } // Walk full tree & emit nodes, consumer can bail at any point by returning false async paths() { const entries = await this.getEntries(); let paths = []; for (const entry of entries) { if (entry.isLeaf()) { paths.push([entry]); } if (entry.isTree()) { const subPaths = await entry.paths(); paths = [...paths, ...subPaths.map((p) => [entry, ...p])]; } } return paths; } // Walks tree & returns all nodes async allNodes() { const nodes = []; for await (const entry of this.walk()) { nodes.push(entry); } return nodes; } // Walks tree & returns all cids async allCids() { const cids = new cid_set_1.CidSet(); const entries = await this.getEntries(); for (const entry of entries) { if (entry.isLeaf()) { cids.add(entry.value); } else { const subtreeCids = await entry.allCids(); cids.addSet(subtreeCids); } } cids.add(await this.getPointer()); return cids; } // Walks tree & returns all leaves async leaves() { const leaves = []; for await (const entry of this.walk()) { if (entry.isLeaf()) leaves.push(entry); } return leaves; } // Returns total leaf count async leafCount() { const leaves = await this.leaves(); return leaves.length; } // Reachable tree traversal // ------------------- // Walk reachable branches of tree & emit nodes, consumer can bail at any point by returning false async *walkReachable() { yield this; const entries = await this.getEntries(); for (const entry of entries) { if (entry.isTree()) { try { for await (const e of entry.walkReachable()) { yield e; } } catch (err) { if (err instanceof error_1.MissingBlockError) { continue; } else { throw err; } } } else { yield entry; } } } async reachableLeaves() { const leaves = []; for await (const entry of this.walkReachable()) { if (entry.isLeaf()) leaves.push(entry); } return leaves; } // Sync Protocol async *carBlockStream() { const leaves = new cid_set_1.CidSet(); let toFetch = new cid_set_1.CidSet(); toFetch.add(await this.getPointer()); while (toFetch.size() > 0) { const nextLayer = new cid_set_1.CidSet(); const fetched = await this.storage.getBlocks(toFetch.toList()); if (fetched.missing.length > 0) { throw new error_1.MissingBlocksError('mst node', fetched.missing); } for (const cid of toFetch.toList()) { const found = await parse.getAndParseByDef(fetched.blocks, cid, exports.nodeDataDef); yield { cid, bytes: found.bytes }; const entries = await util.deserializeNodeData(this.storage, found.obj); for (const entry of entries) { if (entry.isLeaf()) { leaves.add(entry.value); } else { nextLayer.add(await entry.getPointer()); } } } toFetch = nextLayer; } const leafData = await this.storage.getBlocks(leaves.toList()); if (leafData.missing.length > 0) { throw new error_1.MissingBlocksError('mst leaf', leafData.missing); } for (const leaf of leafData.blocks.entries()) { yield leaf; } } async cidsForPath(key) { const cids = [await this.getPointer()]; const index = await this.findGtOrEqualLeafIndex(key); const found = await this.atIndex(index); if (found && found.isLeaf() && found.key === key) { return [...cids, found.value]; } const prev = await this.atIndex(index - 1); if (prev && prev.isTree()) { return [...cids, ...(await prev.cidsForPath(key))]; } return cids; } // A covering proof is all MST nodes (leaves excluded) needed to prove the value of a given leaf // and its siblings to its immediate right and left (if applicable) // We simply find the immediately preceeding node and then walk from that node until we reach the // first key that is greater than the requested key (the right sibling) async getCoveringProof(key) { const [self, left, right] = await Promise.all([ this.proofForKey(key), this.proofForLeftSib(key), this.proofForRightSib(key), ]); return self.addMap(left).addMap(right); } async proofForKey(key) { const index = await this.findGtOrEqualLeafIndex(key); const found = await this.atIndex(index); let blocks; if (found && found.isLeaf() && found.key === key) { blocks = new block_map_1.BlockMap(); } else { const prev = await this.atIndex(index - 1); if (!prev || prev.isLeaf()) { return new block_map_1.BlockMap(); } else { blocks = await prev.proofForKey(key); } } const serialized = await this.serialize(); return blocks.set(serialized.cid, serialized.bytes); } async proofForLeftSib(key) { const index = await this.findGtOrEqualLeafIndex(key); const prev = await this.atIndex(index - 1); let blocks; if (!prev || prev.isLeaf()) { blocks = new block_map_1.BlockMap(); } else { blocks = await prev.proofForLeftSib(key); } const serialized = await this.serialize(); return blocks.set(serialized.cid, serialized.bytes); } async proofForRightSib(key) { const index = await this.findGtOrEqualLeafIndex(key); let found = await this.atIndex(index); if (!found) { found = await this.atIndex(index - 1); } let blocks; if (!found) { // shouldn't ever hit, null case blocks = new block_map_1.BlockMap(); } else if (found.isTree()) { blocks = await found.proofForRightSib(key); // recurse down } else { const node = found.key === key ? await this.atIndex(index + 1) : await this.atIndex(index - 1); if (!node || node.isLeaf()) { blocks = new block_map_1.BlockMap(); } else { blocks = await node.proofForRightSib(key); } } const serialized = await this.serialize(); return blocks.set(serialized.cid, serialized.bytes); } // Matching Leaf interface // ------------------- isTree() { return true; } isLeaf() { return false; } async equals(other) { if (other.isLeaf()) return false; const thisPointer = await this.getPointer(); const otherPointer = await other.getPointer(); return thisPointer.equals(otherPointer); } } exports.MST = MST; class Leaf { constructor(key, value) { Object.defineProperty(this, "key", { enumerable: true, configurable: true, writable: true, value: key }); Object.defineProperty(this, "value", { enumerable: true, configurable: true, writable: true, value: value }); } isTree() { return false; } isLeaf() { return true; } equals(entry) { if (entry.isLeaf()) { return this.key === entry.key && this.value.equals(entry.value); } else { return false; } } } exports.Leaf = Leaf; //# sourceMappingURL=mst.js.map