UNPKG

diffusion

Version:

Diffusion JavaScript client

524 lines (414 loc) 14.1 kB
/*eslint valid-jsdoc: "off"*/ var approximateCubeRoot = require('util/math').approximateCubeRoot; var BAIL_OUT_FACTOR = 10000; var MAXIMUM_STORAGE = 0x7fffffff; // Constant result codes var SUCCESS = 0, REPLACE = 1, NO_CHANGE = 2; function Storage(max) { var maximumD = (max - 3) / 4; var vectorLength = 15; var vector = []; function fill(start) { for (var i = start; i < vectorLength; i += 4) { vector[i + 0] = -1; vector[i + 1] = -1; vector[i + 2] = 0x7fffffff; vector[i + 3] = 0x7fffffff; } } function ensure(d) { var required = 4 * (d + 1) + 3; if (vectorLength < required) { vectorLength = required; } } this.initialise = function(d) { ensure(d); fill(3); return vector; }; this.extend = function(d) { if (d > maximumD) { return null; } var originalLength = vectorLength; ensure(d); fill(originalLength); return vector; }; } // Forward key function keyF(k) { return k < 0 ? -4 * k - 1 : 4 * k; } // Reverse key function keyR(k) { return keyF(k) + 2; } // Get forward function getF(v, k) { var i = v[keyF(k)]; return i === undefined ? 0 : i; } // Get reverse function getR(v, k) { var i = v[keyR(k)]; return i === undefined ? 0 : i; } // Set forward function setF(v, k, i) { v[keyF(k)] = i; } // Set reverse function setR(v, k, i) { var key = keyR(k); v[key] = i; } // Next forward function nextF(v, k) { var left = getF(v, k + 1); var right = getF(v, k - 1); return left < right ? right : left + 1; } // Next reverse function nextR(v, k) { var left = getR(v, k + 1); var right = getR(v, k - 1); return left < right ? left : right - 1; } /** * Limit a diagonal by the length of an input. * * <p> * If d is greater than the length, return the diagonal to use instead. This * is used to constrain the forward vector entries to an (n+1) * (m+1) * rectangle, and the reverse vector entries to an (m+1) * (n+1) rectangle. */ function corner(d, length) { if (d <= length) { return d; } else { return 2 * length - d; } } /** * Bail-out is necessary because in the worst case the algorithm is O(N^2) * in time, where N is the total input length. The cost also depends on the * number and distribution of differences between the two values. * <p> * See the Java MyersBinaryDiff#calculateBailOutLimit implementation for more * in-depth documentation regarding the choice of strategy. */ function calculateBailOutLimit(l1, l2, bailOutFactor) { var total = l1 + l2; var cube = approximateCubeRoot(total); var mult = bailOutFactor * cube; // Minimum bound of 256 arbitrarily chosen to match gnudiff. It's not a // critical refinement, but encourages a little more work before // bailing. return Math.max(256, mult / 100); } function checkBounds(buffer, offset, length) { if (offset < 0) { throw new Error("offset " + offset + " < 0"); } if (length < 0) { throw new Error("length " + length + " < 0"); } if (offset + length > buffer.length || offset + length < 0) { throw new Error("offset " + offset + " + " + length + " > " + buffer.length); } } function Execution(storage, a, b, script, bailOutLimit) { var self = this; this.diff = function(aOffset, aLength, bOffset, bLength) { checkBounds(a, aOffset, aLength); checkBounds(b, bOffset, bLength); var x = 0; var y = 0; while (x < aLength && y < bLength && a[aOffset + x] === b[bOffset + y]) { ++x; ++y; } var u = aLength; var v = bLength; while (u > x && v > y && a[aOffset + u - 1] === b[bOffset + v - 1]) { --u; --v; } var r1 = script.match(aOffset, x); // Prefix if (r1 !== SUCCESS) { return r1; } var r2; if (x === u) { r2 = script.insert(bOffset + y, v - y); } else if (y === v) { r2 = script.delete(); } else { r2 = self.middleSnake(aOffset + x, u - x, bOffset + y, v - y); } if (r2 !== SUCCESS) { return r2; } return script.match(aOffset + u, aLength - u); // Suffix }; this.middleSnake = function(aOffset, aLength, bOffset, bLength) { var delta = aLength - bLength; var odd = delta & 1; var vec = storage.initialise(1); setF(vec, 0, 0); setR(vec, 0, aLength); var d = 1; for (;;) { var cornerA = corner(d, aLength); var cornerB = corner(d, bLength); var k1 = -cornerA; for (; k1 <= cornerB; k1 +=2) { var x1 = nextF(vec, k1); var u1 = x1; while (u1 < aLength && u1 + k1 < bLength && a[aOffset + u1] === b[bOffset + u1 + k1]) { ++u1; } // Short circuit if d == 1. Suppose d == 1. There's at least // one difference, so either u < n or u < m - k. The next // test guarantees k == -delta == m-n. So u < n. We've not // made any reverse steps yet, so reverse(k + delta) is n. if (odd && d > 1 && Math.abs(k1 + delta) <= d - 1 && u1 >= getR(vec, k1 + delta)) { return self.recurse(aOffset, aLength, bOffset, bLength, x1, u1, k1); } setF(vec, k1, u1); } var k2 = -cornerB; for (; k2 <= cornerA; k2 += 2) { var u2 = nextR(vec, k2); var x2 = u2; var kd = k2 - delta; while (x2 > 0 && x2 + kd > 0 && a[aOffset + x2 - 1] === b[bOffset + x2 + kd - 1]) { --x2; } if (!odd && Math.abs(kd) <= d && x2 <= getF(vec, kd)) { return self.recurse(aOffset, aLength, bOffset, bLength, x2, u2, kd); } setR(vec, k2, x2); } if (d > bailOutLimit) { return bail(vec, aOffset, aLength, bOffset, bLength, cornerA, cornerB); } ++d; vec = storage.extend(d); if (vec === null) { return REPLACE; } } }; this.recurse = function(aOffset, aLength, bOffset, bLength, x, u, k) { var r1 = self.diff(aOffset, x, bOffset, x + k); if (r1 !== SUCCESS) { return r1; } var r2 = script.match(aOffset + x, u - x); if (r2 !== SUCCESS) { return r2; } return self.diff(aOffset + u, aLength - u, bOffset + u + k, bLength - u - k); }; function bail(vec, aOffset, aLength, bOffset, bLength, cornerA, cornerB) { var xbest = 0; var ybest = 0; var x; var y; for (var k1 = -cornerA; k1 <= cornerB; k1 += 2) { // Forward x values can exceed n because we don't constrain // #getF. Similarly y values can exceed n. Detect // and slide back within the square. var x1 = Math.min(getF(vec, k1), aLength); if (x1 + k1 > bLength) { x = bLength - k1; } else { x = x1; } y = x + k1; if (x + y > xbest + ybest) { xbest = x; ybest = y; } } for (var k2 = -cornerB; k2 <= cornerA; k2 += 2) { // Similarly reverse x and y values can be less than 0 var x2 = Math.max(getR(vec, k2), 0); var kd = k2 - (aLength - bLength); if (x2 + kd < 0) { x = -kd; } else { x = x2; } y = x + kd; if (aLength + bLength - x - y > xbest + ybest) { xbest = x; ybest = y; } } var r = boundedDiff(aOffset, xbest, bOffset, ybest, aLength, bLength); if (r !== SUCCESS) { return r; } return boundedDiff( aOffset + xbest, aLength - xbest, bOffset + ybest, bLength - ybest, aLength, bLength); } /* * Variant of #diff used by #bail to ensure we are dividing the problem space * sufficiently to prevent stack overflow */ function boundedDiff(aOffset, aLength, bOffset, bLength, totalN, totalM) { var totalSpace = totalN * totalM; var nm = aLength * bLength; // We use Math.floor here instead of a bitwise op as totalSpace may be an integer larger than 32 bits var threshold = Math.floor((1 << 24) + totalSpace / 2); if (nm >= threshold) { var x = aLength / 2 | 0; var y = bLength / 2 | 0; var r1 = self.diff(aOffset, x, bOffset, y); if (r1 !== SUCCESS) { return r1; } return self.diff(aOffset + x, aLength - x, bOffset + y, bLength - y); } else { return self.diff(aOffset, aLength, bOffset, bLength); } } } // Diff operations var INSERT = function(script, start, length) { return script.insert(start, length); }; var MATCH = function(script, start, length) { return script.match(start, length); }; var NOOP = function() { return SUCCESS; }; function coalesce(delegate, aOffset, bOffset) { var neverFlushed = true; var pendingLength = 0; var pendingStart = 0; var pending = NOOP; function flushPending() { neverFlushed &= pending === NOOP; return pending(delegate, pendingStart, pendingLength); } function process(op, start, length) { if (length > 0) { if (pending !== op) { var r = flushPending(); if (r !== SUCCESS) { return r; } pending = op; pendingStart = start; pendingLength = length; } else { pendingLength += length; } } return SUCCESS; } return { insert : function(bStart, length) { return process(INSERT, bStart - bOffset, length); }, match : function(aStart, length) { return process(MATCH, aStart - aOffset, length); }, delete : function(aStart, length) { // eslint-disable-line no-unused-vars // We discard all deletes, but must flush pending matches. // INSERT,DELETE,INSERT can be coalesced to drop the DELETE; // MATCH,DELETE,MATCH cannot. if (pending === INSERT) { return SUCCESS; } var r = flushPending(); pending = NOOP; return r; }, close : function(aLength, bLength) { // eslint-disable-line no-unused-vars if (neverFlushed) { if (pending === INSERT) { return REPLACE; } else if (pendingStart === 0 && pendingLength === aLength) { return NO_CHANGE; } } var r = flushPending(); if (r !== SUCCESS) { return r; } return delegate.close(); } }; } /** * Implementation of Myer's diff with the linear space refinement. * <P> * Diff the subset of two buffers, as specified by offset/length parameters, * with differences written to the provided script. * <P> * See E. Myers (1986). "An O(ND) Difference Algorithm and Its Variations". * Algorithmica 1 (2): 251–266. * http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.4.6927 * <P> * For more documentation, refer to the Java implementation. * <P> * The bail out factor determines when to give up in middlesnake and produce a * more approximate result. Larger values increase the precision at the cost of * increased CPU. * <P> * The number of diagonals to search in each middleSnake call is bounded by * max(256, bailOutFactor/100 * totalLength) * <P> * For inputs with a lot of small differences, a smaller bailOutFactor often * has a beneficial effect of moderately reducing the size of the result. * But it can also increase the size, occasionally dramatically. * Additionally, the reduced precision affects the quality of JSON * structural deltas. YMMV, as they say. * * @param {Number} [maximumStorage] - Maximum storage limit * @param {Number} [bailOutFactor] - Bail-out limit factor */ module.exports = function MyersBinaryDiff(maximumStorage, bailOutFactor) { if (maximumStorage === undefined) { maximumStorage = MAXIMUM_STORAGE; } if (bailOutFactor === undefined) { bailOutFactor = BAIL_OUT_FACTOR; } var storage = new Storage(maximumStorage); /** * @param {Buffer} a - Source data * @param {Number} aOffset - Start of source data * @param {Number} aLength - Length of source data * @param {Buffer} b - Target data * @param {Number} bOffset - Start of target data * @param {Number} bLength - Length of target data * @param {Script} editScript - The edit script */ this.diff = function(a, aOffset, aLength, b, bOffset, bLength, editScript) { var script = coalesce(editScript, aOffset, bOffset); var execution = new Execution(storage, a, b, script, calculateBailOutLimit(aLength, bLength, bailOutFactor)); var result = execution.diff(aOffset, aLength, bOffset, bLength); if (result !== SUCCESS) { return result; } return script.close(aLength, bLength); }; }; module.exports.SUCCESS = SUCCESS; module.exports.REPLACE = REPLACE; module.exports.NO_CHANGE = NO_CHANGE;