pouchdb-merge
Version:
PouchDB's document merge algorithm.
550 lines (486 loc) • 16.1 kB
JavaScript
import { clone } from 'pouchdb-utils';
// We fetch all leafs of the revision tree, and sort them based on tree length
// and whether they were deleted, undeleted documents with the longest revision
// tree (most edits) win
// The final sort algorithm is slightly documented in a sidebar here:
// http://guide.couchdb.org/draft/conflicts.html
function winningRev(metadata) {
var winningId;
var winningPos;
var winningDeleted;
var toVisit = metadata.rev_tree.slice();
var node;
while ((node = toVisit.pop())) {
var tree = node.ids;
var branches = tree[2];
var pos = node.pos;
if (branches.length) { // non-leaf
for (var i = 0, len = branches.length; i < len; i++) {
toVisit.push({pos: pos + 1, ids: branches[i]});
}
continue;
}
var deleted = !!tree[1].deleted;
var id = tree[0];
// sort by deleted, then pos, then id
if (!winningId || (winningDeleted !== deleted ? winningDeleted :
winningPos !== pos ? winningPos < pos : winningId < id)) {
winningId = id;
winningPos = pos;
winningDeleted = deleted;
}
}
return winningPos + '-' + winningId;
}
// Pretty much all below can be combined into a higher order function to
// traverse revisions
// The return value from the callback will be passed as context to all
// children of that node
function traverseRevTree(revs, callback) {
var toVisit = revs.slice();
var node;
while ((node = toVisit.pop())) {
var pos = node.pos;
var tree = node.ids;
var branches = tree[2];
var newCtx =
callback(branches.length === 0, pos, tree[0], node.ctx, tree[1]);
for (var i = 0, len = branches.length; i < len; i++) {
toVisit.push({pos: pos + 1, ids: branches[i], ctx: newCtx});
}
}
}
function sortByPos(a, b) {
return a.pos - b.pos;
}
function collectLeaves(revs) {
var leaves = [];
traverseRevTree(revs, function (isLeaf, pos, id, acc, opts) {
if (isLeaf) {
leaves.push({rev: pos + "-" + id, pos, opts});
}
});
leaves.sort(sortByPos).reverse();
for (var i = 0, len = leaves.length; i < len; i++) {
delete leaves[i].pos;
}
return leaves;
}
// returns revs of all conflicts that is leaves such that
// 1. are not deleted and
// 2. are different than winning revision
function collectConflicts(metadata) {
var win = winningRev(metadata);
var leaves = collectLeaves(metadata.rev_tree);
var conflicts = [];
for (var i = 0, len = leaves.length; i < len; i++) {
var leaf = leaves[i];
if (leaf.rev !== win && !leaf.opts.deleted) {
conflicts.push(leaf.rev);
}
}
return conflicts;
}
// compact a tree by marking its non-leafs as missing,
// and return a list of revs to delete
function compactTree(metadata) {
var revs = [];
traverseRevTree(metadata.rev_tree, function (isLeaf, pos,
revHash, ctx, opts) {
if (opts.status === 'available' && !isLeaf) {
revs.push(pos + '-' + revHash);
opts.status = 'missing';
}
});
return revs;
}
// `findPathToLeaf()` returns an array of revs that goes from the specified
// leaf rev to the root of that leaf’s branch.
//
// eg. for this rev tree:
// 1-9692 ▶ 2-37aa ▶ 3-df22 ▶ 4-6e94 ▶ 5-df4a ▶ 6-6a3a ▶ 7-57e5
// ┃ ┗━━━━━━▶ 5-8d8c ▶ 6-65e0
// ┗━━━━━━▶ 3-43f6 ▶ 4-a3b4
//
// For a `targetRev` of '7-57e5', `findPathToLeaf()` would return ['7-57e5', '6-6a3a', '5-df4a']
// The `revs` argument has the same structure as what `revs_tree` has on e.g.
// the IndexedDB representation of the rev tree datastructure. Please refer to
// tests/unit/test.purge.js for examples of what these look like.
//
// This function will throw an error if:
// - The requested revision does not exist
// - The requested revision is not a leaf
function findPathToLeaf(revs, targetRev) {
let path = [];
const toVisit = revs.slice();
let node;
while ((node = toVisit.pop())) {
const { pos, ids: tree } = node;
const rev = `${pos}-${tree[0]}`;
const branches = tree[2];
// just assuming we're already working on the path up towards our desired leaf.
path.push(rev);
// we've reached the leaf of our dreams, so return the computed path.
if (rev === targetRev) {
//…unleeeeess
if (branches.length !== 0) {
throw new Error('The requested revision is not a leaf');
}
return path.reverse();
}
// this is based on the assumption that after we have a leaf (`branches.length == 0`), we handle the next
// branch. this is true for all branches other than the path leading to the winning rev (which is 7-57e5 in
// the example above. i've added a reset condition for branching nodes (`branches.length > 1`) as well.
if (branches.length === 0 || branches.length > 1) {
path = [];
}
// as a next step, we push the branches of this node to `toVisit` for visiting it during the next iteration
for (let i = 0, len = branches.length; i < len; i++) {
toVisit.push({ pos: pos + 1, ids: branches[i] });
}
}
if (path.length === 0) {
throw new Error('The requested revision does not exist');
}
return path.reverse();
}
// build up a list of all the paths to the leafs in this revision tree
function rootToLeaf(revs) {
var paths = [];
var toVisit = revs.slice();
var node;
while ((node = toVisit.pop())) {
var pos = node.pos;
var tree = node.ids;
var id = tree[0];
var opts = tree[1];
var branches = tree[2];
var isLeaf = branches.length === 0;
var history = node.history ? node.history.slice() : [];
history.push({id, opts});
if (isLeaf) {
paths.push({pos: (pos + 1 - history.length), ids: history});
}
for (var i = 0, len = branches.length; i < len; i++) {
toVisit.push({pos: pos + 1, ids: branches[i], history});
}
}
return paths.reverse();
}
// for a better overview of what this is doing, read:
function sortByPos$1(a, b) {
return a.pos - b.pos;
}
// classic binary search
function binarySearch(arr, item, comparator) {
var low = 0;
var high = arr.length;
var mid;
while (low < high) {
mid = (low + high) >>> 1;
if (comparator(arr[mid], item) < 0) {
low = mid + 1;
} else {
high = mid;
}
}
return low;
}
// assuming the arr is sorted, insert the item in the proper place
function insertSorted(arr, item, comparator) {
var idx = binarySearch(arr, item, comparator);
arr.splice(idx, 0, item);
}
// Turn a path as a flat array into a tree with a single branch.
// If any should be stemmed from the beginning of the array, that's passed
// in as the second argument
function pathToTree(path, numStemmed) {
var root;
var leaf;
for (var i = numStemmed, len = path.length; i < len; i++) {
var node = path[i];
var currentLeaf = [node.id, node.opts, []];
if (leaf) {
leaf[2].push(currentLeaf);
leaf = currentLeaf;
} else {
root = leaf = currentLeaf;
}
}
return root;
}
// compare the IDs of two trees
function compareTree(a, b) {
return a[0] < b[0] ? -1 : 1;
}
// Merge two trees together
// The roots of tree1 and tree2 must be the same revision
function mergeTree(in_tree1, in_tree2) {
var queue = [{tree1: in_tree1, tree2: in_tree2}];
var conflicts = false;
while (queue.length > 0) {
var item = queue.pop();
var tree1 = item.tree1;
var tree2 = item.tree2;
if (tree1[1].status || tree2[1].status) {
tree1[1].status =
(tree1[1].status === 'available' ||
tree2[1].status === 'available') ? 'available' : 'missing';
}
for (var i = 0; i < tree2[2].length; i++) {
if (!tree1[2][0]) {
conflicts = 'new_leaf';
tree1[2][0] = tree2[2][i];
continue;
}
var merged = false;
for (var j = 0; j < tree1[2].length; j++) {
if (tree1[2][j][0] === tree2[2][i][0]) {
queue.push({tree1: tree1[2][j], tree2: tree2[2][i]});
merged = true;
}
}
if (!merged) {
conflicts = 'new_branch';
insertSorted(tree1[2], tree2[2][i], compareTree);
}
}
}
return {conflicts, tree: in_tree1};
}
function doMerge(tree, path, dontExpand) {
var restree = [];
var conflicts = false;
var merged = false;
var res;
if (!tree.length) {
return {tree: [path], conflicts: 'new_leaf'};
}
for (var i = 0, len = tree.length; i < len; i++) {
var branch = tree[i];
if (branch.pos === path.pos && branch.ids[0] === path.ids[0]) {
// Paths start at the same position and have the same root, so they need
// merged
res = mergeTree(branch.ids, path.ids);
restree.push({pos: branch.pos, ids: res.tree});
conflicts = conflicts || res.conflicts;
merged = true;
} else if (dontExpand !== true) {
// The paths start at a different position, take the earliest path and
// traverse up until it as at the same point from root as the path we
// want to merge. If the keys match we return the longer path with the
// other merged After stemming we don't want to expand the trees
var t1 = branch.pos < path.pos ? branch : path;
var t2 = branch.pos < path.pos ? path : branch;
var diff = t2.pos - t1.pos;
var candidateParents = [];
var trees = [];
trees.push({ids: t1.ids, diff, parent: null, parentIdx: null});
while (trees.length > 0) {
var item = trees.pop();
if (item.diff === 0) {
if (item.ids[0] === t2.ids[0]) {
candidateParents.push(item);
}
continue;
}
var elements = item.ids[2];
for (var j = 0, elementsLen = elements.length; j < elementsLen; j++) {
trees.push({
ids: elements[j],
diff: item.diff - 1,
parent: item.ids,
parentIdx: j
});
}
}
var el = candidateParents[0];
if (!el) {
restree.push(branch);
} else {
res = mergeTree(el.ids, t2.ids);
el.parent[2][el.parentIdx] = res.tree;
restree.push({pos: t1.pos, ids: t1.ids});
conflicts = conflicts || res.conflicts;
merged = true;
}
} else {
restree.push(branch);
}
}
// We didnt find
if (!merged) {
restree.push(path);
}
restree.sort(sortByPos$1);
return {
tree: restree,
conflicts: conflicts || 'internal_node'
};
}
// To ensure we don't grow the revision tree infinitely, we stem old revisions
function stem(tree, depth) {
// First we break out the tree into a complete list of root to leaf paths
var paths = rootToLeaf(tree);
var stemmedRevs;
var result;
for (var i = 0, len = paths.length; i < len; i++) {
// Then for each path, we cut off the start of the path based on the
// `depth` to stem to, and generate a new set of flat trees
var path = paths[i];
var stemmed = path.ids;
var node;
if (stemmed.length > depth) {
// only do the stemming work if we actually need to stem
if (!stemmedRevs) {
stemmedRevs = {}; // avoid allocating this object unnecessarily
}
var numStemmed = stemmed.length - depth;
node = {
pos: path.pos + numStemmed,
ids: pathToTree(stemmed, numStemmed)
};
for (var s = 0; s < numStemmed; s++) {
var rev = (path.pos + s) + '-' + stemmed[s].id;
stemmedRevs[rev] = true;
}
} else { // no need to actually stem
node = {
pos: path.pos,
ids: pathToTree(stemmed, 0)
};
}
// Then we remerge all those flat trees together, ensuring that we don't
// connect trees that would go beyond the depth limit
if (result) {
result = doMerge(result, node, true).tree;
} else {
result = [node];
}
}
// this is memory-heavy per Chrome profiler, avoid unless we actually stemmed
if (stemmedRevs) {
traverseRevTree(result, function (isLeaf, pos, revHash) {
// some revisions may have been removed in a branch but not in another
delete stemmedRevs[pos + '-' + revHash];
});
}
return {
tree: result,
revs: stemmedRevs ? Object.keys(stemmedRevs) : []
};
}
function merge(tree, path, depth) {
var newTree = doMerge(tree, path);
var stemmed = stem(newTree.tree, depth);
return {
tree: stemmed.tree,
stemmedRevs: stemmed.revs,
conflicts: newTree.conflicts
};
}
// this method removes a leaf from a rev tree, independent of its status.
// e.g., by removing an available leaf, it could leave its predecessor as
// a missing leaf and corrupting the tree.
function removeLeafFromRevTree(tree, leafRev) {
return tree.flatMap((path) => {
path = removeLeafFromPath(path, leafRev);
return path ? [path] : [];
});
}
function removeLeafFromPath(path, leafRev) {
const tree = clone(path);
const toVisit = [tree];
let node;
while ((node = toVisit.pop())) {
const { pos, ids: [id, , branches], parent } = node;
const isLeaf = branches.length === 0;
const hash = `${pos}-${id}`;
if (isLeaf && hash === leafRev) {
if (!parent) {
// FIXME: we're facing the root, and probably shouldn't just return an empty array (object? null?).
return null;
}
parent.ids[2] = parent.ids[2].filter(function (branchNode) {
return branchNode[0] !== id;
});
return tree;
}
for (let i = 0, len = branches.length; i < len; i++) {
toVisit.push({ pos: pos + 1, ids: branches[i], parent: node });
}
}
return tree;
}
// return true if a rev exists in the rev tree, false otherwise
function revExists(revs, rev) {
var toVisit = revs.slice();
var splitRev = rev.split('-');
var targetPos = parseInt(splitRev[0], 10);
var targetId = splitRev[1];
var node;
while ((node = toVisit.pop())) {
if (node.pos === targetPos && node.ids[0] === targetId) {
return true;
}
var branches = node.ids[2];
for (var i = 0, len = branches.length; i < len; i++) {
toVisit.push({pos: node.pos + 1, ids: branches[i]});
}
}
return false;
}
function getTrees(node) {
return node.ids;
}
// check if a specific revision of a doc has been deleted
// - metadata: the metadata object from the doc store
// - rev: (optional) the revision to check. defaults to winning revision
function isDeleted(metadata, rev) {
if (!rev) {
rev = winningRev(metadata);
}
var id = rev.substring(rev.indexOf('-') + 1);
var toVisit = metadata.rev_tree.map(getTrees);
var tree;
while ((tree = toVisit.pop())) {
if (tree[0] === id) {
return !!tree[1].deleted;
}
toVisit = toVisit.concat(tree[2]);
}
}
function isLocalId(id) {
return typeof id === 'string' && id.startsWith('_local/');
}
// returns the current leaf node for a given revision
function latest(rev, metadata) {
var toVisit = metadata.rev_tree.slice();
var node;
while ((node = toVisit.pop())) {
var pos = node.pos;
var tree = node.ids;
var id = tree[0];
var opts = tree[1];
var branches = tree[2];
var isLeaf = branches.length === 0;
var history = node.history ? node.history.slice() : [];
history.push({id, pos, opts});
if (isLeaf) {
for (var i = 0, len = history.length; i < len; i++) {
var historyNode = history[i];
var historyRev = historyNode.pos + '-' + historyNode.id;
if (historyRev === rev) {
// return the rev of this leaf
return pos + '-' + id;
}
}
}
for (var j = 0, l = branches.length; j < l; j++) {
toVisit.push({pos: pos + 1, ids: branches[j], history});
}
}
/* istanbul ignore next */
throw new Error('Unable to resolve latest revision for id ' + metadata.id + ', rev ' + rev);
}
export { collectConflicts, collectLeaves, compactTree, findPathToLeaf, isDeleted, isLocalId, merge, removeLeafFromRevTree as removeLeafFromTree, revExists, rootToLeaf, traverseRevTree, winningRev, latest };