edit-distance
Version:
String and tree edit distance
185 lines (170 loc) • 5.86 kB
JavaScript
var Mapping, postOrderWalk, ted, tedBt, trackedMin, zero;
({Mapping, zero, trackedMin} = require('./util'));
// Implements a post-order walk of a given tree.
postOrderWalk = function(root, childrenCb, visitCb) {
var child, children, firstChild, index, k, len, node, ref, ref1, stack1, stack2;
// Create stacks
stack1 = [];
stack2 = [];
// Push root to stack1
stack1.push([void 0, root]);
// Run while stack1 is not empty
while (stack1.length > 0) {
// Pop a node from stack1 and push it to stack2
[index, node] = stack1.pop();
children = childrenCb(node);
firstChild = (ref = children != null ? children[0] : void 0) != null ? ref : null;
stack2.push([index, node, firstChild]);
ref1 = children != null ? children : [];
// Push its children to stack1
for (index = k = 0, len = ref1.length; k < len; index = ++k) {
child = ref1[index];
stack1.push([index, child]);
}
}
// Visit all elements of stack2
while (stack2.length > 0) {
[index, node, firstChild] = stack2.pop();
visitCb(index, node, firstChild);
}
};
// Computes the tree edit distance (TED).
// @example
// var rootA = {id: 1, children: [{id: 2}, {id: 3}]};
// var rootB = {id: 1, children: [{id: 4}, {id: 3}, {id: 5}]};
// var children = function(node) { return node.children; };
// var insert = remove = function(node) { return 1; };
// var update = function(nodeA, nodeB) { return nodeA.id !== nodeB.id ? 1 : 0; };
// ted(rootA, rootB, children, insert, remove, update);
// @see Zhang, Kaizhong, and Dennis Shasha. "Simple fast algorithms for the
// editing distance between trees and related problems." SIAM journal on
// computing 18.6 (1989): 1245-1262.
// Could be improved using:
// @see Pawlik, Mateusz, and Nikolaus Augsten. "Tree edit distance: Robust and
// memory-efficient." Information Systems 56 (2016): 157-173.
ted = function(rootA, rootB, childrenCb, insertCb, removeCb, updateCb) {
var fdist, i, j, k, l, len, len1, preprocess, ref, ref1, tA, tB, tdist, tdistance, treeDistance, ttrack;
preprocess = function(root) {
var t;
t = {
// Nodes in post-order.
nodes: [],
// Leftmost leaf descendant (see paper).
llds: [],
// Keyroots (see paper).
keyroots: []
};
postOrderWalk(root, childrenCb, function(index, node, firstChild) {
var childIndex, lldIndex, nIndex;
// Push nodes in post-order.
nIndex = t.nodes.length;
t.nodes.push(node);
// Exploit post-order walk to fetch left-most leaf.
if (firstChild == null) {
lldIndex = nIndex;
} else {
// XXX: replace O(n) lookup with O(1) lookup using node decorator?
childIndex = t.nodes.indexOf(firstChild);
lldIndex = t.llds[childIndex];
}
t.llds.push(lldIndex);
// Exploit property of keyroots.
if (index !== 0) {
t.keyroots.push(nIndex);
}
});
return t;
};
treeDistance = function(i, j) {
var a, aL, aN, b, bL, bN, iOff, jOff, k, l, m, min, n, o, p, q, r, ref, ref1, ref2, ref3;
aL = tA.llds;
bL = tB.llds;
aN = tA.nodes;
bN = tB.nodes;
iOff = aL[i] - 1;
jOff = bL[j] - 1;
m = i - aL[i] + 2;
n = j - bL[j] + 2;
// Minimize from upper left to lower right (dynamic programming, see paper).
for (a = k = 1, ref = m; k < ref; a = k += 1) {
fdist[a][0] = fdist[a - 1][0] + removeCb(aN[a + iOff]);
}
for (b = l = 1, ref1 = n; l < ref1; b = l += 1) {
fdist[0][b] = fdist[0][b - 1] + insertCb(bN[b + jOff]);
}
for (a = o = 1, ref2 = m; o < ref2; a = o += 1) {
for (b = r = 1, ref3 = n; r < ref3; b = r += 1) {
if (aL[i] === aL[a + iOff] && bL[j] === bL[b + jOff]) {
min = trackedMin(fdist[a - 1][b] + removeCb(aN[a + iOff]), fdist[a][b - 1] + insertCb(bN[b + jOff]), fdist[a - 1][b - 1] + updateCb(aN[a + iOff], bN[b + jOff]));
ttrack[a + iOff][b + jOff] = min.index;
tdist[a + iOff][b + jOff] = fdist[a][b] = min.value;
} else {
p = aL[a + iOff] - 1 - iOff;
q = bL[b + jOff] - 1 - jOff;
fdist[a][b] = Math.min(fdist[a - 1][b] + removeCb(aN[a + iOff]), fdist[a][b - 1] + insertCb(bN[b + jOff]), fdist[p][q] + tdist[a + iOff][b + jOff]);
}
}
}
};
tA = preprocess(rootA);
tB = preprocess(rootB);
ttrack = zero(tA.nodes.length, tB.nodes.length);
tdist = zero(tA.nodes.length, tB.nodes.length);
fdist = zero(tA.nodes.length + 1, tB.nodes.length + 1);
ref = tA.keyroots;
// Iterate keyroots.
for (k = 0, len = ref.length; k < len; k++) {
i = ref[k];
ref1 = tB.keyroots;
for (l = 0, len1 = ref1.length; l < len1; l++) {
j = ref1[l];
treeDistance(i, j);
}
}
tdistance = tdist[tA.nodes.length - 1][tB.nodes.length - 1];
return new Mapping(tA, tB, tdistance, ttrack, tedBt);
};
// Backtracks the tree-to-tree mapping from lower right to upper left.
tedBt = function(tA, tB, ttrack) {
var i, j, mapping;
mapping = [];
i = tA.nodes.length - 1;
j = tB.nodes.length - 1;
while (i >= 0 && j >= 0) {
switch (ttrack[i][j]) {
case 0:
// Remove
mapping.push([tA.nodes[i], null]);
--i;
break;
case 1:
// Insert
mapping.push([null, tB.nodes[j]]);
--j;
break;
case 2:
// Update
mapping.push([tA.nodes[i], tB.nodes[j]]);
--i;
--j;
break;
default:
throw new Error(`Invalid operation ${ttrack[i][j]} at (${i}, ${j})`);
}
}
// Handle epsilon nodes.
if (i === -1 && j !== -1) {
while (j >= 0) {
mapping.push([null, tB.nodes[j]]);
--j;
}
}
if (i !== -1 && j === -1) {
while (i >= 0) {
mapping.push([tA.nodes[i], null]);
--i;
}
}
return mapping;
};
module.exports = ted;