UNPKG

pouchdb-merge

Version:

PouchDB's document merge algorithm.

github.com/pouchdb/pouchdb

pouchdb/pouchdb

550 lines (486 loc) • 16.1 kB

JavaScript

import { clone } from 'pouchdb-utils'; // We fetch all leafs of the revision tree, and sort them based on tree length // and whether they were deleted, undeleted documents with the longest revision // tree (most edits) win // The final sort algorithm is slightly documented in a sidebar here: // http://guide.couchdb.org/draft/conflicts.html function winningRev(metadata) { var winningId; var winningPos; var winningDeleted; var toVisit = metadata.rev_tree.slice(); var node; while ((node = toVisit.pop())) { var tree = node.ids; var branches = tree[2]; var pos = node.pos; if (branches.length) { // non-leaf for (var i = 0, len = branches.length; i < len; i++) { toVisit.push({pos: pos + 1, ids: branches[i]}); } continue; } var deleted = !!tree[1].deleted; var id = tree[0]; // sort by deleted, then pos, then id if (!winningId || (winningDeleted !== deleted ? winningDeleted : winningPos !== pos ? winningPos < pos : winningId < id)) { winningId = id; winningPos = pos; winningDeleted = deleted; } } return winningPos + '-' + winningId; } // Pretty much all below can be combined into a higher order function to // traverse revisions // The return value from the callback will be passed as context to all // children of that node function traverseRevTree(revs, callback) { var toVisit = revs.slice(); var node; while ((node = toVisit.pop())) { var pos = node.pos; var tree = node.ids; var branches = tree[2]; var newCtx = callback(branches.length === 0, pos, tree[0], node.ctx, tree[1]); for (var i = 0, len = branches.length; i < len; i++) { toVisit.push({pos: pos + 1, ids: branches[i], ctx: newCtx}); } } } function sortByPos(a, b) { return a.pos - b.pos; } function collectLeaves(revs) { var leaves = []; traverseRevTree(revs, function (isLeaf, pos, id, acc, opts) { if (isLeaf) { leaves.push({rev: pos + "-" + id, pos, opts}); } }); leaves.sort(sortByPos).reverse(); for (var i = 0, len = leaves.length; i < len; i++) { delete leaves[i].pos; } return leaves; } // returns revs of all conflicts that is leaves such that // 1. are not deleted and // 2. are different than winning revision function collectConflicts(metadata) { var win = winningRev(metadata); var leaves = collectLeaves(metadata.rev_tree); var conflicts = []; for (var i = 0, len = leaves.length; i < len; i++) { var leaf = leaves[i]; if (leaf.rev !== win && !leaf.opts.deleted) { conflicts.push(leaf.rev); } } return conflicts; } // compact a tree by marking its non-leafs as missing, // and return a list of revs to delete function compactTree(metadata) { var revs = []; traverseRevTree(metadata.rev_tree, function (isLeaf, pos, revHash, ctx, opts) { if (opts.status === 'available' && !isLeaf) { revs.push(pos + '-' + revHash); opts.status = 'missing'; } }); return revs; } // `findPathToLeaf()` returns an array of revs that goes from the specified // leaf rev to the root of that leaf’s branch. // // eg. for this rev tree: // 1-9692 ▶ 2-37aa ▶ 3-df22 ▶ 4-6e94 ▶ 5-df4a ▶ 6-6a3a ▶ 7-57e5 // ┃ ┗━━━━━━▶ 5-8d8c ▶ 6-65e0 // ┗━━━━━━▶ 3-43f6 ▶ 4-a3b4 // // For a `targetRev` of '7-57e5', `findPathToLeaf()` would return ['7-57e5', '6-6a3a', '5-df4a'] // The `revs` argument has the same structure as what `revs_tree` has on e.g. // the IndexedDB representation of the rev tree datastructure. Please refer to // tests/unit/test.purge.js for examples of what these look like. // // This function will throw an error if: // - The requested revision does not exist // - The requested revision is not a leaf function findPathToLeaf(revs, targetRev) { let path = []; const toVisit = revs.slice(); let node; while ((node = toVisit.pop())) { const { pos, ids: tree } = node; const rev = `${pos}-${tree[0]}`; const branches = tree[2]; // just assuming we're already working on the path up towards our desired leaf. path.push(rev); // we've reached the leaf of our dreams, so return the computed path. if (rev === targetRev) { //…unleeeeess if (branches.length !== 0) { throw new Error('The requested revision is not a leaf'); } return path.reverse(); } // this is based on the assumption that after we have a leaf (`branches.length == 0`), we handle the next // branch. this is true for all branches other than the path leading to the winning rev (which is 7-57e5 in // the example above. i've added a reset condition for branching nodes (`branches.length > 1`) as well. if (branches.length === 0 || branches.length > 1) { path = []; } // as a next step, we push the branches of this node to `toVisit` for visiting it during the next iteration for (let i = 0, len = branches.length; i < len; i++) { toVisit.push({ pos: pos + 1, ids: branches[i] }); } } if (path.length === 0) { throw new Error('The requested revision does not exist'); } return path.reverse(); } // build up a list of all the paths to the leafs in this revision tree function rootToLeaf(revs) { var paths = []; var toVisit = revs.slice(); var node; while ((node = toVisit.pop())) { var pos = node.pos; var tree = node.ids; var id = tree[0]; var opts = tree[1]; var branches = tree[2]; var isLeaf = branches.length === 0; var history = node.history ? node.history.slice() : []; history.push({id, opts}); if (isLeaf) { paths.push({pos: (pos + 1 - history.length), ids: history}); } for (var i = 0, len = branches.length; i < len; i++) { toVisit.push({pos: pos + 1, ids: branches[i], history}); } } return paths.reverse(); } // for a better overview of what this is doing, read: function sortByPos$1(a, b) { return a.pos - b.pos; } // classic binary search function binarySearch(arr, item, comparator) { var low = 0; var high = arr.length; var mid; while (low < high) { mid = (low + high) >>> 1; if (comparator(arr[mid], item) < 0) { low = mid + 1; } else { high = mid; } } return low; } // assuming the arr is sorted, insert the item in the proper place function insertSorted(arr, item, comparator) { var idx = binarySearch(arr, item, comparator); arr.splice(idx, 0, item); } // Turn a path as a flat array into a tree with a single branch. // If any should be stemmed from the beginning of the array, that's passed // in as the second argument function pathToTree(path, numStemmed) { var root; var leaf; for (var i = numStemmed, len = path.length; i < len; i++) { var node = path[i]; var currentLeaf = [node.id, node.opts, []]; if (leaf) { leaf[2].push(currentLeaf); leaf = currentLeaf; } else { root = leaf = currentLeaf; } } return root; } // compare the IDs of two trees function compareTree(a, b) { return a[0] < b[0] ? -1 : 1; } // Merge two trees together // The roots of tree1 and tree2 must be the same revision function mergeTree(in_tree1, in_tree2) { var queue = [{tree1: in_tree1, tree2: in_tree2}]; var conflicts = false; while (queue.length > 0) { var item = queue.pop(); var tree1 = item.tree1; var tree2 = item.tree2; if (tree1[1].status || tree2[1].status) { tree1[1].status = (tree1[1].status === 'available' || tree2[1].status === 'available') ? 'available' : 'missing'; } for (var i = 0; i < tree2[2].length; i++) { if (!tree1[2][0]) { conflicts = 'new_leaf'; tree1[2][0] = tree2[2][i]; continue; } var merged = false; for (var j = 0; j < tree1[2].length; j++) { if (tree1[2][j][0] === tree2[2][i][0]) { queue.push({tree1: tree1[2][j], tree2: tree2[2][i]}); merged = true; } } if (!merged) { conflicts = 'new_branch'; insertSorted(tree1[2], tree2[2][i], compareTree); } } } return {conflicts, tree: in_tree1}; } function doMerge(tree, path, dontExpand) { var restree = []; var conflicts = false; var merged = false; var res; if (!tree.length) { return {tree: [path], conflicts: 'new_leaf'}; } for (var i = 0, len = tree.length; i < len; i++) { var branch = tree[i]; if (branch.pos === path.pos && branch.ids[0] === path.ids[0]) { // Paths start at the same position and have the same root, so they need // merged res = mergeTree(branch.ids, path.ids); restree.push({pos: branch.pos, ids: res.tree}); conflicts = conflicts || res.conflicts; merged = true; } else if (dontExpand !== true) { // The paths start at a different position, take the earliest path and // traverse up until it as at the same point from root as the path we // want to merge. If the keys match we return the longer path with the // other merged After stemming we don't want to expand the trees var t1 = branch.pos < path.pos ? branch : path; var t2 = branch.pos < path.pos ? path : branch; var diff = t2.pos - t1.pos; var candidateParents = []; var trees = []; trees.push({ids: t1.ids, diff, parent: null, parentIdx: null}); while (trees.length > 0) { var item = trees.pop(); if (item.diff === 0) { if (item.ids[0] === t2.ids[0]) { candidateParents.push(item); } continue; } var elements = item.ids[2]; for (var j = 0, elementsLen = elements.length; j < elementsLen; j++) { trees.push({ ids: elements[j], diff: item.diff - 1, parent: item.ids, parentIdx: j }); } } var el = candidateParents[0]; if (!el) { restree.push(branch); } else { res = mergeTree(el.ids, t2.ids); el.parent[2][el.parentIdx] = res.tree; restree.push({pos: t1.pos, ids: t1.ids}); conflicts = conflicts || res.conflicts; merged = true; } } else { restree.push(branch); } } // We didnt find if (!merged) { restree.push(path); } restree.sort(sortByPos$1); return { tree: restree, conflicts: conflicts || 'internal_node' }; } // To ensure we don't grow the revision tree infinitely, we stem old revisions function stem(tree, depth) { // First we break out the tree into a complete list of root to leaf paths var paths = rootToLeaf(tree); var stemmedRevs; var result; for (var i = 0, len = paths.length; i < len; i++) { // Then for each path, we cut off the start of the path based on the // `depth` to stem to, and generate a new set of flat trees var path = paths[i]; var stemmed = path.ids; var node; if (stemmed.length > depth) { // only do the stemming work if we actually need to stem if (!stemmedRevs) { stemmedRevs = {}; // avoid allocating this object unnecessarily } var numStemmed = stemmed.length - depth; node = { pos: path.pos + numStemmed, ids: pathToTree(stemmed, numStemmed) }; for (var s = 0; s < numStemmed; s++) { var rev = (path.pos + s) + '-' + stemmed[s].id; stemmedRevs[rev] = true; } } else { // no need to actually stem node = { pos: path.pos, ids: pathToTree(stemmed, 0) }; } // Then we remerge all those flat trees together, ensuring that we don't // connect trees that would go beyond the depth limit if (result) { result = doMerge(result, node, true).tree; } else { result = [node]; } } // this is memory-heavy per Chrome profiler, avoid unless we actually stemmed if (stemmedRevs) { traverseRevTree(result, function (isLeaf, pos, revHash) { // some revisions may have been removed in a branch but not in another delete stemmedRevs[pos + '-' + revHash]; }); } return { tree: result, revs: stemmedRevs ? Object.keys(stemmedRevs) : [] }; } function merge(tree, path, depth) { var newTree = doMerge(tree, path); var stemmed = stem(newTree.tree, depth); return { tree: stemmed.tree, stemmedRevs: stemmed.revs, conflicts: newTree.conflicts }; } // this method removes a leaf from a rev tree, independent of its status. // e.g., by removing an available leaf, it could leave its predecessor as // a missing leaf and corrupting the tree. function removeLeafFromRevTree(tree, leafRev) { return tree.flatMap((path) => { path = removeLeafFromPath(path, leafRev); return path ? [path] : []; }); } function removeLeafFromPath(path, leafRev) { const tree = clone(path); const toVisit = [tree]; let node; while ((node = toVisit.pop())) { const { pos, ids: [id, , branches], parent } = node; const isLeaf = branches.length === 0; const hash = `${pos}-${id}`; if (isLeaf && hash === leafRev) { if (!parent) { // FIXME: we're facing the root, and probably shouldn't just return an empty array (object? null?). return null; } parent.ids[2] = parent.ids[2].filter(function (branchNode) { return branchNode[0] !== id; }); return tree; } for (let i = 0, len = branches.length; i < len; i++) { toVisit.push({ pos: pos + 1, ids: branches[i], parent: node }); } } return tree; } // return true if a rev exists in the rev tree, false otherwise function revExists(revs, rev) { var toVisit = revs.slice(); var splitRev = rev.split('-'); var targetPos = parseInt(splitRev[0], 10); var targetId = splitRev[1]; var node; while ((node = toVisit.pop())) { if (node.pos === targetPos && node.ids[0] === targetId) { return true; } var branches = node.ids[2]; for (var i = 0, len = branches.length; i < len; i++) { toVisit.push({pos: node.pos + 1, ids: branches[i]}); } } return false; } function getTrees(node) { return node.ids; } // check if a specific revision of a doc has been deleted // - metadata: the metadata object from the doc store // - rev: (optional) the revision to check. defaults to winning revision function isDeleted(metadata, rev) { if (!rev) { rev = winningRev(metadata); } var id = rev.substring(rev.indexOf('-') + 1); var toVisit = metadata.rev_tree.map(getTrees); var tree; while ((tree = toVisit.pop())) { if (tree[0] === id) { return !!tree[1].deleted; } toVisit = toVisit.concat(tree[2]); } } function isLocalId(id) { return typeof id === 'string' && id.startsWith('_local/'); } // returns the current leaf node for a given revision function latest(rev, metadata) { var toVisit = metadata.rev_tree.slice(); var node; while ((node = toVisit.pop())) { var pos = node.pos; var tree = node.ids; var id = tree[0]; var opts = tree[1]; var branches = tree[2]; var isLeaf = branches.length === 0; var history = node.history ? node.history.slice() : []; history.push({id, pos, opts}); if (isLeaf) { for (var i = 0, len = history.length; i < len; i++) { var historyNode = history[i]; var historyRev = historyNode.pos + '-' + historyNode.id; if (historyRev === rev) { // return the rev of this leaf return pos + '-' + id; } } } for (var j = 0, l = branches.length; j < l; j++) { toVisit.push({pos: pos + 1, ids: branches[j], history}); } } /* istanbul ignore next */ throw new Error('Unable to resolve latest revision for id ' + metadata.id + ', rev ' + rev); } export { collectConflicts, collectLeaves, compactTree, findPathToLeaf, isDeleted, isLocalId, merge, removeLeafFromRevTree as removeLeafFromTree, revExists, rootToLeaf, traverseRevTree, winningRev, latest };