UNPKG

edit-distance

Version:
185 lines (170 loc) 5.86 kB
var Mapping, postOrderWalk, ted, tedBt, trackedMin, zero; ({Mapping, zero, trackedMin} = require('./util')); // Implements a post-order walk of a given tree. postOrderWalk = function(root, childrenCb, visitCb) { var child, children, firstChild, index, k, len, node, ref, ref1, stack1, stack2; // Create stacks stack1 = []; stack2 = []; // Push root to stack1 stack1.push([void 0, root]); // Run while stack1 is not empty while (stack1.length > 0) { // Pop a node from stack1 and push it to stack2 [index, node] = stack1.pop(); children = childrenCb(node); firstChild = (ref = children != null ? children[0] : void 0) != null ? ref : null; stack2.push([index, node, firstChild]); ref1 = children != null ? children : []; // Push its children to stack1 for (index = k = 0, len = ref1.length; k < len; index = ++k) { child = ref1[index]; stack1.push([index, child]); } } // Visit all elements of stack2 while (stack2.length > 0) { [index, node, firstChild] = stack2.pop(); visitCb(index, node, firstChild); } }; // Computes the tree edit distance (TED). // @example // var rootA = {id: 1, children: [{id: 2}, {id: 3}]}; // var rootB = {id: 1, children: [{id: 4}, {id: 3}, {id: 5}]}; // var children = function(node) { return node.children; }; // var insert = remove = function(node) { return 1; }; // var update = function(nodeA, nodeB) { return nodeA.id !== nodeB.id ? 1 : 0; }; // ted(rootA, rootB, children, insert, remove, update); // @see Zhang, Kaizhong, and Dennis Shasha. "Simple fast algorithms for the // editing distance between trees and related problems." SIAM journal on // computing 18.6 (1989): 1245-1262. // Could be improved using: // @see Pawlik, Mateusz, and Nikolaus Augsten. "Tree edit distance: Robust and // memory-efficient." Information Systems 56 (2016): 157-173. ted = function(rootA, rootB, childrenCb, insertCb, removeCb, updateCb) { var fdist, i, j, k, l, len, len1, preprocess, ref, ref1, tA, tB, tdist, tdistance, treeDistance, ttrack; preprocess = function(root) { var t; t = { // Nodes in post-order. nodes: [], // Leftmost leaf descendant (see paper). llds: [], // Keyroots (see paper). keyroots: [] }; postOrderWalk(root, childrenCb, function(index, node, firstChild) { var childIndex, lldIndex, nIndex; // Push nodes in post-order. nIndex = t.nodes.length; t.nodes.push(node); // Exploit post-order walk to fetch left-most leaf. if (firstChild == null) { lldIndex = nIndex; } else { // XXX: replace O(n) lookup with O(1) lookup using node decorator? childIndex = t.nodes.indexOf(firstChild); lldIndex = t.llds[childIndex]; } t.llds.push(lldIndex); // Exploit property of keyroots. if (index !== 0) { t.keyroots.push(nIndex); } }); return t; }; treeDistance = function(i, j) { var a, aL, aN, b, bL, bN, iOff, jOff, k, l, m, min, n, o, p, q, r, ref, ref1, ref2, ref3; aL = tA.llds; bL = tB.llds; aN = tA.nodes; bN = tB.nodes; iOff = aL[i] - 1; jOff = bL[j] - 1; m = i - aL[i] + 2; n = j - bL[j] + 2; // Minimize from upper left to lower right (dynamic programming, see paper). for (a = k = 1, ref = m; k < ref; a = k += 1) { fdist[a][0] = fdist[a - 1][0] + removeCb(aN[a + iOff]); } for (b = l = 1, ref1 = n; l < ref1; b = l += 1) { fdist[0][b] = fdist[0][b - 1] + insertCb(bN[b + jOff]); } for (a = o = 1, ref2 = m; o < ref2; a = o += 1) { for (b = r = 1, ref3 = n; r < ref3; b = r += 1) { if (aL[i] === aL[a + iOff] && bL[j] === bL[b + jOff]) { min = trackedMin(fdist[a - 1][b] + removeCb(aN[a + iOff]), fdist[a][b - 1] + insertCb(bN[b + jOff]), fdist[a - 1][b - 1] + updateCb(aN[a + iOff], bN[b + jOff])); ttrack[a + iOff][b + jOff] = min.index; tdist[a + iOff][b + jOff] = fdist[a][b] = min.value; } else { p = aL[a + iOff] - 1 - iOff; q = bL[b + jOff] - 1 - jOff; fdist[a][b] = Math.min(fdist[a - 1][b] + removeCb(aN[a + iOff]), fdist[a][b - 1] + insertCb(bN[b + jOff]), fdist[p][q] + tdist[a + iOff][b + jOff]); } } } }; tA = preprocess(rootA); tB = preprocess(rootB); ttrack = zero(tA.nodes.length, tB.nodes.length); tdist = zero(tA.nodes.length, tB.nodes.length); fdist = zero(tA.nodes.length + 1, tB.nodes.length + 1); ref = tA.keyroots; // Iterate keyroots. for (k = 0, len = ref.length; k < len; k++) { i = ref[k]; ref1 = tB.keyroots; for (l = 0, len1 = ref1.length; l < len1; l++) { j = ref1[l]; treeDistance(i, j); } } tdistance = tdist[tA.nodes.length - 1][tB.nodes.length - 1]; return new Mapping(tA, tB, tdistance, ttrack, tedBt); }; // Backtracks the tree-to-tree mapping from lower right to upper left. tedBt = function(tA, tB, ttrack) { var i, j, mapping; mapping = []; i = tA.nodes.length - 1; j = tB.nodes.length - 1; while (i >= 0 && j >= 0) { switch (ttrack[i][j]) { case 0: // Remove mapping.push([tA.nodes[i], null]); --i; break; case 1: // Insert mapping.push([null, tB.nodes[j]]); --j; break; case 2: // Update mapping.push([tA.nodes[i], tB.nodes[j]]); --i; --j; break; default: throw new Error(`Invalid operation ${ttrack[i][j]} at (${i}, ${j})`); } } // Handle epsilon nodes. if (i === -1 && j !== -1) { while (j >= 0) { mapping.push([null, tB.nodes[j]]); --j; } } if (i !== -1 && j === -1) { while (i >= 0) { mapping.push([tA.nodes[i], null]); --i; } } return mapping; }; module.exports = ted;