@atproto/repo
Version:
atproto repo and MST implementation
865 lines • 31.4 kB
JavaScript
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.Leaf = exports.MST = exports.nodeDataDef = void 0;
const zod_1 = require("zod");
const common_1 = require("@atproto/common");
const block_map_1 = require("../block-map");
const cid_set_1 = require("../cid-set");
const error_1 = require("../error");
const parse = __importStar(require("../parse"));
const util = __importStar(require("./util"));
/**
* This is an implementation of a Merkle Search Tree (MST)
* The data structure is described here: https://hal.inria.fr/hal-02303490/document
* The MST is an ordered, insert-order-independent, deterministic tree.
* Keys are laid out in alphabetic order.
* The key insight of an MST is that each key is hashed and starting 0s are counted
* to determine which layer it falls on (5 zeros for ~32 fanout).
* This is a merkle tree, so each subtree is referred to by it's hash (CID).
* When a leaf is changed, ever tree on the path to that leaf is changed as well,
* thereby updating the root hash.
*
* For atproto, we use SHA-256 as the key hashing algorithm, and ~4 fanout
* (2-bits of zero per layer).
*/
/**
* A couple notes on CBOR encoding:
*
* There are never two neighboring subtrees.
* Therefore, we can represent a node as an array of
* leaves & pointers to their right neighbor (possibly null),
* along with a pointer to the left-most subtree (also possibly null).
*
* Most keys in a subtree will have overlap.
* We do compression on prefixes by describing keys as:
* - the length of the prefix that it shares in common with the preceding key
* - the rest of the string
*
* For example:
* If the first leaf in a tree is `bsky/posts/abcdefg` and the second is `bsky/posts/abcdehi`
* Then the first will be described as `prefix: 0, key: 'bsky/posts/abcdefg'`,
* and the second will be described as `prefix: 16, key: 'hi'.`
*/
const subTreePointer = zod_1.z.nullable(common_1.schema.cid);
const treeEntry = zod_1.z.object({
p: zod_1.z.number(), // prefix count of ascii chars that this key shares with the prev key
k: common_1.schema.bytes, // the rest of the key outside the shared prefix
v: common_1.schema.cid, // value
t: subTreePointer, // next subtree (to the right of leaf)
});
const nodeData = zod_1.z.object({
l: subTreePointer, // left-most subtree
e: zod_1.z.array(treeEntry), //entries
});
exports.nodeDataDef = {
name: 'mst node',
schema: nodeData,
};
class MST {
constructor(storage, pointer, entries, layer) {
Object.defineProperty(this, "storage", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "entries", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "layer", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "pointer", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "outdatedPointer", {
enumerable: true,
configurable: true,
writable: true,
value: false
});
this.storage = storage;
this.entries = entries;
this.layer = layer;
this.pointer = pointer;
}
static async create(storage, entries = [], opts) {
const pointer = await util.cidForEntries(entries);
const { layer = null } = opts || {};
return new MST(storage, pointer, entries, layer);
}
static async fromData(storage, data, opts) {
const { layer = null } = opts || {};
const entries = await util.deserializeNodeData(storage, data, opts);
const pointer = await (0, common_1.cidForCbor)(data);
return new MST(storage, pointer, entries, layer);
}
// this is really a *lazy* load, doesn't actually touch storage
static load(storage, cid, opts) {
const { layer = null } = opts || {};
return new MST(storage, cid, null, layer);
}
// Immutability
// -------------------
// We never mutate an MST, we just return a new MST with updated values
async newTree(entries) {
const mst = new MST(this.storage, this.pointer, entries, this.layer);
mst.outdatedPointer = true;
return mst;
}
// Getters (lazy load)
// -------------------
// We don't want to load entries of every subtree, just the ones we need
async getEntries() {
if (this.entries)
return [...this.entries];
if (this.pointer) {
const data = await this.storage.readObj(this.pointer, exports.nodeDataDef);
const firstLeaf = data.e[0];
const layer = firstLeaf !== undefined
? await util.leadingZerosOnHash(firstLeaf.k)
: undefined;
this.entries = await util.deserializeNodeData(this.storage, data, {
layer,
});
return this.entries;
}
throw new Error('No entries or CID provided');
}
// We don't hash the node on every mutation for performance reasons
// Instead we keep track of whether the pointer is outdated and only (recursively) calculate when needed
async getPointer() {
if (!this.outdatedPointer)
return this.pointer;
const { cid } = await this.serialize();
this.pointer = cid;
this.outdatedPointer = false;
return this.pointer;
}
async serialize() {
let entries = await this.getEntries();
const outdated = entries.filter((e) => e.isTree() && e.outdatedPointer);
if (outdated.length > 0) {
await Promise.all(outdated.map((e) => e.getPointer()));
entries = await this.getEntries();
}
const data = util.serializeNodeData(entries);
const block = await (0, common_1.dataToCborBlock)(data);
return {
cid: block.cid,
bytes: block.bytes,
};
}
// In most cases, we get the layer of a node from a hint on creation
// In the case of the topmost node in the tree, we look for a key in the node & determine the layer
// In the case where we don't find one, we recurse down until we do.
// If we still can't find one, then we have an empty tree and the node is layer 0
async getLayer() {
this.layer = await this.attemptGetLayer();
if (this.layer === null)
this.layer = 0;
return this.layer;
}
async attemptGetLayer() {
if (this.layer !== null)
return this.layer;
const entries = await this.getEntries();
let layer = await util.layerForEntries(entries);
if (layer === null) {
for (const entry of entries) {
if (entry.isTree()) {
const childLayer = await entry.attemptGetLayer();
if (childLayer !== null) {
layer = childLayer + 1;
break;
}
}
}
}
if (layer !== null)
this.layer = layer;
return layer;
}
// Core functionality
// -------------------
// Return the necessary blocks to persist the MST to repo storage
async getUnstoredBlocks() {
const blocks = new block_map_1.BlockMap();
const pointer = await this.getPointer();
const alreadyHas = await this.storage.has(pointer);
if (alreadyHas)
return { root: pointer, blocks };
const entries = await this.getEntries();
const data = util.serializeNodeData(entries);
await blocks.add(data);
for (const entry of entries) {
if (entry.isTree()) {
const subtree = await entry.getUnstoredBlocks();
blocks.addMap(subtree.blocks);
}
}
return { root: pointer, blocks: blocks };
}
// Adds a new leaf for the given key/value pair
// Throws if a leaf with that key already exists
async add(key, value, knownZeros) {
util.ensureValidMstKey(key);
const keyZeros = knownZeros ?? (await util.leadingZerosOnHash(key));
const layer = await this.getLayer();
const newLeaf = new Leaf(key, value);
if (keyZeros === layer) {
// it belongs in this layer
const index = await this.findGtOrEqualLeafIndex(key);
const found = await this.atIndex(index);
if (found?.isLeaf() && found.key === key) {
throw new Error(`There is already a value at key: ${key}`);
}
const prevNode = await this.atIndex(index - 1);
if (!prevNode || prevNode.isLeaf()) {
// if entry before is a leaf, (or we're on far left) we can just splice in
return this.spliceIn(newLeaf, index);
}
else {
// else we try to split the subtree around the key
const splitSubTree = await prevNode.splitAround(key);
return this.replaceWithSplit(index - 1, splitSubTree[0], newLeaf, splitSubTree[1]);
}
}
else if (keyZeros < layer) {
// it belongs on a lower layer
const index = await this.findGtOrEqualLeafIndex(key);
const prevNode = await this.atIndex(index - 1);
if (prevNode && prevNode.isTree()) {
// if entry before is a tree, we add it to that tree
const newSubtree = await prevNode.add(key, value, keyZeros);
return this.updateEntry(index - 1, newSubtree);
}
else {
const subTree = await this.createChild();
const newSubTree = await subTree.add(key, value, keyZeros);
return this.spliceIn(newSubTree, index);
}
}
else {
// it belongs on a higher layer & we must push the rest of the tree down
const split = await this.splitAround(key);
// if the newly added key has >=2 more leading zeros than the current highest layer
// then we need to add in structural nodes in between as well
let left = split[0];
let right = split[1];
const layer = await this.getLayer();
const extraLayersToAdd = keyZeros - layer;
// intentionally starting at 1, since first layer is taken care of by split
for (let i = 1; i < extraLayersToAdd; i++) {
if (left !== null) {
left = await left.createParent();
}
if (right !== null) {
right = await right.createParent();
}
}
const updated = [];
if (left)
updated.push(left);
updated.push(new Leaf(key, value));
if (right)
updated.push(right);
const newRoot = await MST.create(this.storage, updated, {
layer: keyZeros,
});
newRoot.outdatedPointer = true;
return newRoot;
}
}
// Gets the value at the given key
async get(key) {
const index = await this.findGtOrEqualLeafIndex(key);
const found = await this.atIndex(index);
if (found && found.isLeaf() && found.key === key) {
return found.value;
}
const prev = await this.atIndex(index - 1);
if (prev && prev.isTree()) {
return prev.get(key);
}
return null;
}
// Edits the value at the given key
// Throws if the given key does not exist
async update(key, value) {
util.ensureValidMstKey(key);
const index = await this.findGtOrEqualLeafIndex(key);
const found = await this.atIndex(index);
if (found && found.isLeaf() && found.key === key) {
return this.updateEntry(index, new Leaf(key, value));
}
const prev = await this.atIndex(index - 1);
if (prev && prev.isTree()) {
const updatedTree = await prev.update(key, value);
return this.updateEntry(index - 1, updatedTree);
}
throw new Error(`Could not find a record with key: ${key}`);
}
// Deletes the value at the given key
async delete(key) {
const altered = await this.deleteRecurse(key);
return altered.trimTop();
}
async deleteRecurse(key) {
const index = await this.findGtOrEqualLeafIndex(key);
const found = await this.atIndex(index);
// if found, remove it on this level
if (found?.isLeaf() && found.key === key) {
const prev = await this.atIndex(index - 1);
const next = await this.atIndex(index + 1);
if (prev?.isTree() && next?.isTree()) {
const merged = await prev.appendMerge(next);
return this.newTree([
...(await this.slice(0, index - 1)),
merged,
...(await this.slice(index + 2)),
]);
}
else {
return this.removeEntry(index);
}
}
// else recurse down to find it
const prev = await this.atIndex(index - 1);
if (prev?.isTree()) {
const subtree = await prev.deleteRecurse(key);
const subTreeEntries = await subtree.getEntries();
if (subTreeEntries.length === 0) {
return this.removeEntry(index - 1);
}
else {
return this.updateEntry(index - 1, subtree);
}
}
else {
throw new Error(`Could not find a record with key: ${key}`);
}
}
// Simple Operations
// -------------------
// update entry in place
async updateEntry(index, entry) {
const update = [
...(await this.slice(0, index)),
entry,
...(await this.slice(index + 1)),
];
return this.newTree(update);
}
// remove entry at index
async removeEntry(index) {
const updated = [
...(await this.slice(0, index)),
...(await this.slice(index + 1)),
];
return this.newTree(updated);
}
// append entry to end of the node
async append(entry) {
const entries = await this.getEntries();
return this.newTree([...entries, entry]);
}
// prepend entry to start of the node
async prepend(entry) {
const entries = await this.getEntries();
return this.newTree([entry, ...entries]);
}
// returns entry at index
async atIndex(index) {
const entries = await this.getEntries();
return entries[index] ?? null;
}
// returns a slice of the node (like array.slice)
async slice(start, end) {
const entries = await this.getEntries();
return entries.slice(start, end);
}
// inserts entry at index
async spliceIn(entry, index) {
const update = [
...(await this.slice(0, index)),
entry,
...(await this.slice(index)),
];
return this.newTree(update);
}
// replaces an entry with [ Maybe(tree), Leaf, Maybe(tree) ]
async replaceWithSplit(index, left, leaf, right) {
const update = await this.slice(0, index);
if (left)
update.push(left);
update.push(leaf);
if (right)
update.push(right);
update.push(...(await this.slice(index + 1)));
return this.newTree(update);
}
// if the topmost node in the tree only points to another tree, trim the top and return the subtree
async trimTop() {
let entries;
try {
entries = await this.getEntries();
}
catch (err) {
if (err instanceof error_1.MissingBlockError) {
return this;
}
else {
throw err;
}
}
if (entries.length === 1 && entries[0].isTree()) {
return entries[0].trimTop();
}
else {
return this;
}
}
// Subtree & Splits
// -------------------
// Recursively splits a sub tree around a given key
async splitAround(key) {
const index = await this.findGtOrEqualLeafIndex(key);
// split tree around key
const leftData = await this.slice(0, index);
const rightData = await this.slice(index);
let left = await this.newTree(leftData);
let right = await this.newTree(rightData);
// if the far right of the left side is a subtree,
// we need to split it on the key as well
const lastInLeft = leftData[leftData.length - 1];
if (lastInLeft?.isTree()) {
left = await left.removeEntry(leftData.length - 1);
const split = await lastInLeft.splitAround(key);
if (split[0]) {
left = await left.append(split[0]);
}
if (split[1]) {
right = await right.prepend(split[1]);
}
}
return [
(await left.getEntries()).length > 0 ? left : null,
(await right.getEntries()).length > 0 ? right : null,
];
}
// The simple merge case where every key in the right tree is greater than every key in the left tree
// (used primarily for deletes)
async appendMerge(toMerge) {
if ((await this.getLayer()) !== (await toMerge.getLayer())) {
throw new Error('Trying to merge two nodes from different layers of the MST');
}
const thisEntries = await this.getEntries();
const toMergeEntries = await toMerge.getEntries();
const lastInLeft = thisEntries[thisEntries.length - 1];
const firstInRight = toMergeEntries[0];
if (lastInLeft?.isTree() && firstInRight?.isTree()) {
const merged = await lastInLeft.appendMerge(firstInRight);
return this.newTree([
...thisEntries.slice(0, thisEntries.length - 1),
merged,
...toMergeEntries.slice(1),
]);
}
else {
return this.newTree([...thisEntries, ...toMergeEntries]);
}
}
// Create relatives
// -------------------
async createChild() {
const layer = await this.getLayer();
return MST.create(this.storage, [], {
layer: layer - 1,
});
}
async createParent() {
const layer = await this.getLayer();
const parent = await MST.create(this.storage, [this], {
layer: layer + 1,
});
parent.outdatedPointer = true;
return parent;
}
// Finding insertion points
// -------------------
// finds index of first leaf node that is greater than or equal to the value
async findGtOrEqualLeafIndex(key) {
const entries = await this.getEntries();
const maybeIndex = entries.findIndex((entry) => entry.isLeaf() && entry.key >= key);
// if we can't find, we're on the end
return maybeIndex >= 0 ? maybeIndex : entries.length;
}
// List operations (partial tree traversal)
// -------------------
// @TODO write tests for these
// Walk tree starting at key
async *walkFrom(key) {
yield this;
const index = await this.findGtOrEqualLeafIndex(key);
const entries = await this.getEntries();
const found = entries[index];
if (found && found.isLeaf() && found.key === key) {
yield found;
}
else {
const prev = entries[index - 1];
if (prev) {
if (prev.isLeaf() && prev.key === key) {
yield prev;
}
else if (prev.isTree()) {
yield* prev.walkFrom(key);
}
}
}
for (let i = index; i < entries.length; i++) {
const entry = entries[i];
if (entry.isLeaf()) {
yield entry;
}
else {
yield* entry.walkFrom(key);
}
}
}
async *walkLeavesFrom(key) {
for await (const node of this.walkFrom(key)) {
if (node.isLeaf()) {
yield node;
}
}
}
async list(count = Number.MAX_SAFE_INTEGER, after, before) {
const vals = [];
for await (const leaf of this.walkLeavesFrom(after || '')) {
if (leaf.key === after)
continue;
if (vals.length >= count)
break;
if (before && leaf.key >= before)
break;
vals.push(leaf);
}
return vals;
}
async listWithPrefix(prefix, count = Number.MAX_SAFE_INTEGER) {
const vals = [];
for await (const leaf of this.walkLeavesFrom(prefix)) {
if (vals.length >= count || !leaf.key.startsWith(prefix))
break;
vals.push(leaf);
}
return vals;
}
// Full tree traversal
// -------------------
// Walk full tree & emit nodes, consumer can bail at any point by returning false
async *walk() {
yield this;
const entries = await this.getEntries();
for (const entry of entries) {
if (entry.isTree()) {
for await (const e of entry.walk()) {
yield e;
}
}
else {
yield entry;
}
}
}
// Walk full tree & emit nodes, consumer can bail at any point by returning false
async paths() {
const entries = await this.getEntries();
let paths = [];
for (const entry of entries) {
if (entry.isLeaf()) {
paths.push([entry]);
}
if (entry.isTree()) {
const subPaths = await entry.paths();
paths = [...paths, ...subPaths.map((p) => [entry, ...p])];
}
}
return paths;
}
// Walks tree & returns all nodes
async allNodes() {
const nodes = [];
for await (const entry of this.walk()) {
nodes.push(entry);
}
return nodes;
}
// Walks tree & returns all cids
async allCids() {
const cids = new cid_set_1.CidSet();
const entries = await this.getEntries();
for (const entry of entries) {
if (entry.isLeaf()) {
cids.add(entry.value);
}
else {
const subtreeCids = await entry.allCids();
cids.addSet(subtreeCids);
}
}
cids.add(await this.getPointer());
return cids;
}
// Walks tree & returns all leaves
async leaves() {
const leaves = [];
for await (const entry of this.walk()) {
if (entry.isLeaf())
leaves.push(entry);
}
return leaves;
}
// Returns total leaf count
async leafCount() {
const leaves = await this.leaves();
return leaves.length;
}
// Reachable tree traversal
// -------------------
// Walk reachable branches of tree & emit nodes, consumer can bail at any point by returning false
async *walkReachable() {
yield this;
const entries = await this.getEntries();
for (const entry of entries) {
if (entry.isTree()) {
try {
for await (const e of entry.walkReachable()) {
yield e;
}
}
catch (err) {
if (err instanceof error_1.MissingBlockError) {
continue;
}
else {
throw err;
}
}
}
else {
yield entry;
}
}
}
async reachableLeaves() {
const leaves = [];
for await (const entry of this.walkReachable()) {
if (entry.isLeaf())
leaves.push(entry);
}
return leaves;
}
// Sync Protocol
async *carBlockStream() {
const leaves = new cid_set_1.CidSet();
let toFetch = new cid_set_1.CidSet();
toFetch.add(await this.getPointer());
while (toFetch.size() > 0) {
const nextLayer = new cid_set_1.CidSet();
const fetched = await this.storage.getBlocks(toFetch.toList());
if (fetched.missing.length > 0) {
throw new error_1.MissingBlocksError('mst node', fetched.missing);
}
for (const cid of toFetch.toList()) {
const found = await parse.getAndParseByDef(fetched.blocks, cid, exports.nodeDataDef);
yield { cid, bytes: found.bytes };
const entries = await util.deserializeNodeData(this.storage, found.obj);
for (const entry of entries) {
if (entry.isLeaf()) {
leaves.add(entry.value);
}
else {
nextLayer.add(await entry.getPointer());
}
}
}
toFetch = nextLayer;
}
const leafData = await this.storage.getBlocks(leaves.toList());
if (leafData.missing.length > 0) {
throw new error_1.MissingBlocksError('mst leaf', leafData.missing);
}
for (const leaf of leafData.blocks.entries()) {
yield leaf;
}
}
async cidsForPath(key) {
const cids = [await this.getPointer()];
const index = await this.findGtOrEqualLeafIndex(key);
const found = await this.atIndex(index);
if (found && found.isLeaf() && found.key === key) {
return [...cids, found.value];
}
const prev = await this.atIndex(index - 1);
if (prev && prev.isTree()) {
return [...cids, ...(await prev.cidsForPath(key))];
}
return cids;
}
// A covering proof is all MST nodes (leaves excluded) needed to prove the value of a given leaf
// and its siblings to its immediate right and left (if applicable)
// We simply find the immediately preceeding node and then walk from that node until we reach the
// first key that is greater than the requested key (the right sibling)
async getCoveringProof(key) {
const [self, left, right] = await Promise.all([
this.proofForKey(key),
this.proofForLeftSib(key),
this.proofForRightSib(key),
]);
return self.addMap(left).addMap(right);
}
async proofForKey(key) {
const index = await this.findGtOrEqualLeafIndex(key);
const found = await this.atIndex(index);
let blocks;
if (found && found.isLeaf() && found.key === key) {
blocks = new block_map_1.BlockMap();
}
else {
const prev = await this.atIndex(index - 1);
if (!prev || prev.isLeaf()) {
return new block_map_1.BlockMap();
}
else {
blocks = await prev.proofForKey(key);
}
}
const serialized = await this.serialize();
return blocks.set(serialized.cid, serialized.bytes);
}
async proofForLeftSib(key) {
const index = await this.findGtOrEqualLeafIndex(key);
const prev = await this.atIndex(index - 1);
let blocks;
if (!prev || prev.isLeaf()) {
blocks = new block_map_1.BlockMap();
}
else {
blocks = await prev.proofForLeftSib(key);
}
const serialized = await this.serialize();
return blocks.set(serialized.cid, serialized.bytes);
}
async proofForRightSib(key) {
const index = await this.findGtOrEqualLeafIndex(key);
let found = await this.atIndex(index);
if (!found) {
found = await this.atIndex(index - 1);
}
let blocks;
if (!found) {
// shouldn't ever hit, null case
blocks = new block_map_1.BlockMap();
}
else if (found.isTree()) {
blocks = await found.proofForRightSib(key);
// recurse down
}
else {
const node = found.key === key
? await this.atIndex(index + 1)
: await this.atIndex(index - 1);
if (!node || node.isLeaf()) {
blocks = new block_map_1.BlockMap();
}
else {
blocks = await node.proofForRightSib(key);
}
}
const serialized = await this.serialize();
return blocks.set(serialized.cid, serialized.bytes);
}
// Matching Leaf interface
// -------------------
isTree() {
return true;
}
isLeaf() {
return false;
}
async equals(other) {
if (other.isLeaf())
return false;
const thisPointer = await this.getPointer();
const otherPointer = await other.getPointer();
return thisPointer.equals(otherPointer);
}
}
exports.MST = MST;
class Leaf {
constructor(key, value) {
Object.defineProperty(this, "key", {
enumerable: true,
configurable: true,
writable: true,
value: key
});
Object.defineProperty(this, "value", {
enumerable: true,
configurable: true,
writable: true,
value: value
});
}
isTree() {
return false;
}
isLeaf() {
return true;
}
equals(entry) {
if (entry.isLeaf()) {
return this.key === entry.key && this.value.equals(entry.value);
}
else {
return false;
}
}
}
exports.Leaf = Leaf;
//# sourceMappingURL=mst.js.map