diffusion
Version:
Diffusion JavaScript client
524 lines (414 loc) • 14.1 kB
JavaScript
/*eslint valid-jsdoc: "off"*/
var approximateCubeRoot = require('util/math').approximateCubeRoot;
var BAIL_OUT_FACTOR = 10000;
var MAXIMUM_STORAGE = 0x7fffffff;
// Constant result codes
var SUCCESS = 0,
REPLACE = 1,
NO_CHANGE = 2;
function Storage(max) {
var maximumD = (max - 3) / 4;
var vectorLength = 15;
var vector = [];
function fill(start) {
for (var i = start; i < vectorLength; i += 4) {
vector[i + 0] = -1;
vector[i + 1] = -1;
vector[i + 2] = 0x7fffffff;
vector[i + 3] = 0x7fffffff;
}
}
function ensure(d) {
var required = 4 * (d + 1) + 3;
if (vectorLength < required) {
vectorLength = required;
}
}
this.initialise = function(d) {
ensure(d);
fill(3);
return vector;
};
this.extend = function(d) {
if (d > maximumD) {
return null;
}
var originalLength = vectorLength;
ensure(d);
fill(originalLength);
return vector;
};
}
// Forward key
function keyF(k) {
return k < 0 ? -4 * k - 1 : 4 * k;
}
// Reverse key
function keyR(k) {
return keyF(k) + 2;
}
// Get forward
function getF(v, k) {
var i = v[keyF(k)];
return i === undefined ? 0 : i;
}
// Get reverse
function getR(v, k) {
var i = v[keyR(k)];
return i === undefined ? 0 : i;
}
// Set forward
function setF(v, k, i) {
v[keyF(k)] = i;
}
// Set reverse
function setR(v, k, i) {
var key = keyR(k);
v[key] = i;
}
// Next forward
function nextF(v, k) {
var left = getF(v, k + 1);
var right = getF(v, k - 1);
return left < right ? right : left + 1;
}
// Next reverse
function nextR(v, k) {
var left = getR(v, k + 1);
var right = getR(v, k - 1);
return left < right ? left : right - 1;
}
/**
* Limit a diagonal by the length of an input.
*
* <p>
* If d is greater than the length, return the diagonal to use instead. This
* is used to constrain the forward vector entries to an (n+1) * (m+1)
* rectangle, and the reverse vector entries to an (m+1) * (n+1) rectangle.
*/
function corner(d, length) {
if (d <= length) {
return d;
} else {
return 2 * length - d;
}
}
/**
* Bail-out is necessary because in the worst case the algorithm is O(N^2)
* in time, where N is the total input length. The cost also depends on the
* number and distribution of differences between the two values.
* <p>
* See the Java MyersBinaryDiff#calculateBailOutLimit implementation for more
* in-depth documentation regarding the choice of strategy.
*/
function calculateBailOutLimit(l1, l2, bailOutFactor) {
var total = l1 + l2;
var cube = approximateCubeRoot(total);
var mult = bailOutFactor * cube;
// Minimum bound of 256 arbitrarily chosen to match gnudiff. It's not a
// critical refinement, but encourages a little more work before
// bailing.
return Math.max(256, mult / 100);
}
function checkBounds(buffer, offset, length) {
if (offset < 0) {
throw new Error("offset " + offset + " < 0");
}
if (length < 0) {
throw new Error("length " + length + " < 0");
}
if (offset + length > buffer.length || offset + length < 0) {
throw new Error("offset " + offset + " + " + length + " > " + buffer.length);
}
}
function Execution(storage, a, b, script, bailOutLimit) {
var self = this;
this.diff = function(aOffset, aLength, bOffset, bLength) {
checkBounds(a, aOffset, aLength);
checkBounds(b, bOffset, bLength);
var x = 0;
var y = 0;
while (x < aLength && y < bLength && a[aOffset + x] === b[bOffset + y]) {
++x;
++y;
}
var u = aLength;
var v = bLength;
while (u > x && v > y && a[aOffset + u - 1] === b[bOffset + v - 1]) {
--u;
--v;
}
var r1 = script.match(aOffset, x); // Prefix
if (r1 !== SUCCESS) {
return r1;
}
var r2;
if (x === u) {
r2 = script.insert(bOffset + y, v - y);
} else if (y === v) {
r2 = script.delete();
} else {
r2 = self.middleSnake(aOffset + x, u - x, bOffset + y, v - y);
}
if (r2 !== SUCCESS) {
return r2;
}
return script.match(aOffset + u, aLength - u); // Suffix
};
this.middleSnake = function(aOffset, aLength, bOffset, bLength) {
var delta = aLength - bLength;
var odd = delta & 1;
var vec = storage.initialise(1);
setF(vec, 0, 0);
setR(vec, 0, aLength);
var d = 1;
for (;;) {
var cornerA = corner(d, aLength);
var cornerB = corner(d, bLength);
var k1 = -cornerA;
for (; k1 <= cornerB; k1 +=2) {
var x1 = nextF(vec, k1);
var u1 = x1;
while (u1 < aLength && u1 + k1 < bLength && a[aOffset + u1] === b[bOffset + u1 + k1]) {
++u1;
}
// Short circuit if d == 1. Suppose d == 1. There's at least
// one difference, so either u < n or u < m - k. The next
// test guarantees k == -delta == m-n. So u < n. We've not
// made any reverse steps yet, so reverse(k + delta) is n.
if (odd && d > 1 && Math.abs(k1 + delta) <= d - 1 && u1 >= getR(vec, k1 + delta)) {
return self.recurse(aOffset, aLength, bOffset, bLength, x1, u1, k1);
}
setF(vec, k1, u1);
}
var k2 = -cornerB;
for (; k2 <= cornerA; k2 += 2) {
var u2 = nextR(vec, k2);
var x2 = u2;
var kd = k2 - delta;
while (x2 > 0 && x2 + kd > 0 && a[aOffset + x2 - 1] === b[bOffset + x2 + kd - 1]) {
--x2;
}
if (!odd && Math.abs(kd) <= d && x2 <= getF(vec, kd)) {
return self.recurse(aOffset, aLength, bOffset, bLength, x2, u2, kd);
}
setR(vec, k2, x2);
}
if (d > bailOutLimit) {
return bail(vec, aOffset, aLength, bOffset, bLength, cornerA, cornerB);
}
++d;
vec = storage.extend(d);
if (vec === null) {
return REPLACE;
}
}
};
this.recurse = function(aOffset, aLength, bOffset, bLength, x, u, k) {
var r1 = self.diff(aOffset, x, bOffset, x + k);
if (r1 !== SUCCESS) {
return r1;
}
var r2 = script.match(aOffset + x, u - x);
if (r2 !== SUCCESS) {
return r2;
}
return self.diff(aOffset + u, aLength - u, bOffset + u + k, bLength - u - k);
};
function bail(vec, aOffset, aLength, bOffset, bLength, cornerA, cornerB) {
var xbest = 0;
var ybest = 0;
var x;
var y;
for (var k1 = -cornerA; k1 <= cornerB; k1 += 2) {
// Forward x values can exceed n because we don't constrain
// #getF. Similarly y values can exceed n. Detect
// and slide back within the square.
var x1 = Math.min(getF(vec, k1), aLength);
if (x1 + k1 > bLength) {
x = bLength - k1;
} else {
x = x1;
}
y = x + k1;
if (x + y > xbest + ybest) {
xbest = x;
ybest = y;
}
}
for (var k2 = -cornerB; k2 <= cornerA; k2 += 2) {
// Similarly reverse x and y values can be less than 0
var x2 = Math.max(getR(vec, k2), 0);
var kd = k2 - (aLength - bLength);
if (x2 + kd < 0) {
x = -kd;
} else {
x = x2;
}
y = x + kd;
if (aLength + bLength - x - y > xbest + ybest) {
xbest = x;
ybest = y;
}
}
var r = boundedDiff(aOffset, xbest, bOffset, ybest, aLength, bLength);
if (r !== SUCCESS) {
return r;
}
return boundedDiff(
aOffset + xbest,
aLength - xbest,
bOffset + ybest,
bLength - ybest,
aLength,
bLength);
}
/*
* Variant of #diff used by #bail to ensure we are dividing the problem space
* sufficiently to prevent stack overflow
*/
function boundedDiff(aOffset, aLength, bOffset, bLength, totalN, totalM) {
var totalSpace = totalN * totalM;
var nm = aLength * bLength;
// We use Math.floor here instead of a bitwise op as totalSpace may be an integer larger than 32 bits
var threshold = Math.floor((1 << 24) + totalSpace / 2);
if (nm >= threshold) {
var x = aLength / 2 | 0;
var y = bLength / 2 | 0;
var r1 = self.diff(aOffset, x, bOffset, y);
if (r1 !== SUCCESS) {
return r1;
}
return self.diff(aOffset + x, aLength - x, bOffset + y, bLength - y);
} else {
return self.diff(aOffset, aLength, bOffset, bLength);
}
}
}
// Diff operations
var INSERT = function(script, start, length) {
return script.insert(start, length);
};
var MATCH = function(script, start, length) {
return script.match(start, length);
};
var NOOP = function() {
return SUCCESS;
};
function coalesce(delegate, aOffset, bOffset) {
var neverFlushed = true;
var pendingLength = 0;
var pendingStart = 0;
var pending = NOOP;
function flushPending() {
neverFlushed &= pending === NOOP;
return pending(delegate, pendingStart, pendingLength);
}
function process(op, start, length) {
if (length > 0) {
if (pending !== op) {
var r = flushPending();
if (r !== SUCCESS) {
return r;
}
pending = op;
pendingStart = start;
pendingLength = length;
} else {
pendingLength += length;
}
}
return SUCCESS;
}
return {
insert : function(bStart, length) {
return process(INSERT, bStart - bOffset, length);
},
match : function(aStart, length) {
return process(MATCH, aStart - aOffset, length);
},
delete : function(aStart, length) { // eslint-disable-line no-unused-vars
// We discard all deletes, but must flush pending matches.
// INSERT,DELETE,INSERT can be coalesced to drop the DELETE;
// MATCH,DELETE,MATCH cannot.
if (pending === INSERT) {
return SUCCESS;
}
var r = flushPending();
pending = NOOP;
return r;
},
close : function(aLength, bLength) { // eslint-disable-line no-unused-vars
if (neverFlushed) {
if (pending === INSERT) {
return REPLACE;
} else if (pendingStart === 0 && pendingLength === aLength) {
return NO_CHANGE;
}
}
var r = flushPending();
if (r !== SUCCESS) {
return r;
}
return delegate.close();
}
};
}
/**
* Implementation of Myer's diff with the linear space refinement.
* <P>
* Diff the subset of two buffers, as specified by offset/length parameters,
* with differences written to the provided script.
* <P>
* See E. Myers (1986). "An O(ND) Difference Algorithm and Its Variations".
* Algorithmica 1 (2): 251–266.
* http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.4.6927
* <P>
* For more documentation, refer to the Java implementation.
* <P>
* The bail out factor determines when to give up in middlesnake and produce a
* more approximate result. Larger values increase the precision at the cost of
* increased CPU.
* <P>
* The number of diagonals to search in each middleSnake call is bounded by
* max(256, bailOutFactor/100 * totalLength)
* <P>
* For inputs with a lot of small differences, a smaller bailOutFactor often
* has a beneficial effect of moderately reducing the size of the result.
* But it can also increase the size, occasionally dramatically.
* Additionally, the reduced precision affects the quality of JSON
* structural deltas. YMMV, as they say.
*
* @param {Number} [maximumStorage] - Maximum storage limit
* @param {Number} [bailOutFactor] - Bail-out limit factor
*/
module.exports = function MyersBinaryDiff(maximumStorage, bailOutFactor) {
if (maximumStorage === undefined) {
maximumStorage = MAXIMUM_STORAGE;
}
if (bailOutFactor === undefined) {
bailOutFactor = BAIL_OUT_FACTOR;
}
var storage = new Storage(maximumStorage);
/**
* @param {Buffer} a - Source data
* @param {Number} aOffset - Start of source data
* @param {Number} aLength - Length of source data
* @param {Buffer} b - Target data
* @param {Number} bOffset - Start of target data
* @param {Number} bLength - Length of target data
* @param {Script} editScript - The edit script
*/
this.diff = function(a, aOffset, aLength, b, bOffset, bLength, editScript) {
var script = coalesce(editScript, aOffset, bOffset);
var execution = new Execution(storage, a, b, script, calculateBailOutLimit(aLength, bLength, bailOutFactor));
var result = execution.diff(aOffset, aLength, bOffset, bLength);
if (result !== SUCCESS) {
return result;
}
return script.close(aLength, bLength);
};
};
module.exports.SUCCESS = SUCCESS;
module.exports.REPLACE = REPLACE;
module.exports.NO_CHANGE = NO_CHANGE;