node-diff3
Version:
A node.js module for text diffing and three-way-merge.
555 lines (473 loc) • 15.3 kB
JavaScript
export {
LCS,
diffComm,
diffIndices,
diffPatch,
diff3MergeRegions,
diff3Merge,
mergeDiff3,
merge,
mergeDigIn,
patch,
stripPatch,
invertPatch
};
// Text diff algorithm following Hunt and McIlroy 1976.
// J. W. Hunt and M. D. McIlroy, An algorithm for differential buffer
// comparison, Bell Telephone Laboratories CSTR #41 (1976)
// http://www.cs.dartmouth.edu/~doug/
// https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
//
// Expects two arrays, finds longest common sequence
function LCS(buffer1, buffer2) {
let equivalenceClasses = {};
for (let j = 0; j < buffer2.length; j++) {
const item = buffer2[j];
if (equivalenceClasses[item]) {
equivalenceClasses[item].push(j);
} else {
equivalenceClasses[item] = [j];
}
}
const NULLRESULT = { buffer1index: -1, buffer2index: -1, chain: null };
let candidates = [NULLRESULT];
for (let i = 0; i < buffer1.length; i++) {
const item = buffer1[i];
const buffer2indices = equivalenceClasses[item] || [];
let r = 0;
let c = candidates[0];
for (let jx = 0; jx < buffer2indices.length; jx++) {
const j = buffer2indices[jx];
let s;
for (s = r; s < candidates.length; s++) {
if ((candidates[s].buffer2index < j) && ((s === candidates.length - 1) || (candidates[s + 1].buffer2index > j))) {
break;
}
}
if (s < candidates.length) {
const newCandidate = { buffer1index: i, buffer2index: j, chain: candidates[s] };
if (r === candidates.length) {
candidates.push(c);
} else {
candidates[r] = c;
}
r = s + 1;
c = newCandidate;
if (r === candidates.length) {
break; // no point in examining further (j)s
}
}
}
candidates[r] = c;
}
// At this point, we know the LCS: it's in the reverse of the
// linked-list through .chain of candidates[candidates.length - 1].
return candidates[candidates.length - 1];
}
// We apply the LCS to build a 'comm'-style picture of the
// differences between buffer1 and buffer2.
function diffComm(buffer1, buffer2) {
const lcs = LCS(buffer1, buffer2);
let result = [];
let tail1 = buffer1.length;
let tail2 = buffer2.length;
let common = {common: []};
function processCommon() {
if (common.common.length) {
common.common.reverse();
result.push(common);
common = {common: []};
}
}
for (let candidate = lcs; candidate !== null; candidate = candidate.chain) {
let different = {buffer1: [], buffer2: []};
while (--tail1 > candidate.buffer1index) {
different.buffer1.push(buffer1[tail1]);
}
while (--tail2 > candidate.buffer2index) {
different.buffer2.push(buffer2[tail2]);
}
if (different.buffer1.length || different.buffer2.length) {
processCommon();
different.buffer1.reverse();
different.buffer2.reverse();
result.push(different);
}
if (tail1 >= 0) {
common.common.push(buffer1[tail1]);
}
}
processCommon();
result.reverse();
return result;
}
// We apply the LCS to give a simple representation of the
// offsets and lengths of mismatched chunks in the input
// buffers. This is used by diff3MergeRegions.
function diffIndices(buffer1, buffer2) {
const lcs = LCS(buffer1, buffer2);
let result = [];
let tail1 = buffer1.length;
let tail2 = buffer2.length;
for (let candidate = lcs; candidate !== null; candidate = candidate.chain) {
const mismatchLength1 = tail1 - candidate.buffer1index - 1;
const mismatchLength2 = tail2 - candidate.buffer2index - 1;
tail1 = candidate.buffer1index;
tail2 = candidate.buffer2index;
if (mismatchLength1 || mismatchLength2) {
result.push({
buffer1: [tail1 + 1, mismatchLength1],
buffer1Content: buffer1.slice(tail1 + 1, tail1 + 1 + mismatchLength1),
buffer2: [tail2 + 1, mismatchLength2],
buffer2Content: buffer2.slice(tail2 + 1, tail2 + 1 + mismatchLength2)
});
}
}
result.reverse();
return result;
}
// We apply the LCS to build a JSON representation of a
// diff(1)-style patch.
function diffPatch(buffer1, buffer2) {
const lcs = LCS(buffer1, buffer2);
let result = [];
let tail1 = buffer1.length;
let tail2 = buffer2.length;
function chunkDescription(buffer, offset, length) {
let chunk = [];
for (let i = 0; i < length; i++) {
chunk.push(buffer[offset + i]);
}
return {
offset: offset,
length: length,
chunk: chunk
};
}
for (let candidate = lcs; candidate !== null; candidate = candidate.chain) {
const mismatchLength1 = tail1 - candidate.buffer1index - 1;
const mismatchLength2 = tail2 - candidate.buffer2index - 1;
tail1 = candidate.buffer1index;
tail2 = candidate.buffer2index;
if (mismatchLength1 || mismatchLength2) {
result.push({
buffer1: chunkDescription(buffer1, candidate.buffer1index + 1, mismatchLength1),
buffer2: chunkDescription(buffer2, candidate.buffer2index + 1, mismatchLength2)
});
}
}
result.reverse();
return result;
}
// Given three buffers, A, O, and B, where both A and B are
// independently derived from O, returns a fairly complicated
// internal representation of merge decisions it's taken. The
// interested reader may wish to consult
//
// Sanjeev Khanna, Keshav Kunal, and Benjamin C. Pierce.
// 'A Formal Investigation of ' In Arvind and Prasad,
// editors, Foundations of Software Technology and Theoretical
// Computer Science (FSTTCS), December 2007.
//
// (http://www.cis.upenn.edu/~bcpierce/papers/diff3-short.pdf)
//
function diff3MergeRegions(a, o, b) {
// "hunks" are array subsets where `a` or `b` are different from `o`
// https://www.gnu.org/software/diffutils/manual/html_node/diff3-Hunks.html
let hunks = [];
function addHunk(h, ab) {
hunks.push({
ab: ab,
oStart: h.buffer1[0],
oLength: h.buffer1[1], // length of o to remove
abStart: h.buffer2[0],
abLength: h.buffer2[1] // length of a/b to insert
// abContent: (ab === 'a' ? a : b).slice(h.buffer2[0], h.buffer2[0] + h.buffer2[1])
});
}
diffIndices(o, a).forEach(item => addHunk(item, 'a'));
diffIndices(o, b).forEach(item => addHunk(item, 'b'));
hunks.sort((x,y) => x.oStart - y.oStart);
let results = [];
let currOffset = 0;
function advanceTo(endOffset) {
if (endOffset > currOffset) {
results.push({
stable: true,
buffer: 'o',
bufferStart: currOffset,
bufferLength: endOffset - currOffset,
bufferContent: o.slice(currOffset, endOffset)
});
currOffset = endOffset;
}
}
while (hunks.length) {
let hunk = hunks.shift();
let regionStart = hunk.oStart;
let regionEnd = hunk.oStart + hunk.oLength;
let regionHunks = [hunk];
advanceTo(regionStart);
// Try to pull next overlapping hunk into this region
while (hunks.length) {
const nextHunk = hunks[0];
const nextHunkStart = nextHunk.oStart;
if (nextHunkStart > regionEnd) break; // no overlap
regionEnd = Math.max(regionEnd, nextHunkStart + nextHunk.oLength);
regionHunks.push(hunks.shift());
}
if (regionHunks.length === 1) {
// Only one hunk touches this region, meaning that there is no conflict here.
// Either `a` or `b` is inserting into a region of `o` unchanged by the other.
if (hunk.abLength > 0) {
const buffer = (hunk.ab === 'a' ? a : b);
results.push({
stable: true,
buffer: hunk.ab,
bufferStart: hunk.abStart,
bufferLength: hunk.abLength,
bufferContent: buffer.slice(hunk.abStart, hunk.abStart + hunk.abLength)
});
}
} else {
// A true a/b conflict. Determine the bounds involved from `a`, `o`, and `b`.
// Effectively merge all the `a` hunks into one giant hunk, then do the
// same for the `b` hunks; then, correct for skew in the regions of `o`
// that each side changed, and report appropriate spans for the three sides.
let bounds = {
a: [a.length, -1, o.length, -1],
b: [b.length, -1, o.length, -1]
};
while (regionHunks.length) {
hunk = regionHunks.shift();
const oStart = hunk.oStart;
const oEnd = oStart + hunk.oLength;
const abStart = hunk.abStart;
const abEnd = abStart + hunk.abLength;
let b = bounds[hunk.ab];
b[0] = Math.min(abStart, b[0]);
b[1] = Math.max(abEnd, b[1]);
b[2] = Math.min(oStart, b[2]);
b[3] = Math.max(oEnd, b[3]);
}
const aStart = bounds.a[0] + (regionStart - bounds.a[2]);
const aEnd = bounds.a[1] + (regionEnd - bounds.a[3]);
const bStart = bounds.b[0] + (regionStart - bounds.b[2]);
const bEnd = bounds.b[1] + (regionEnd - bounds.b[3]);
let result = {
stable: false,
aStart: aStart,
aLength: aEnd - aStart,
aContent: a.slice(aStart, aEnd),
oStart: regionStart,
oLength: regionEnd - regionStart,
oContent: o.slice(regionStart, regionEnd),
bStart: bStart,
bLength: bEnd - bStart,
bContent: b.slice(bStart, bEnd)
};
results.push(result);
}
currOffset = regionEnd;
}
advanceTo(o.length);
return results;
}
// Applies the output of diff3MergeRegions to actually
// construct the merged buffer; the returned result alternates
// between 'ok' and 'conflict' blocks.
// A "false conflict" is where `a` and `b` both change the same from `o`
function diff3Merge(a, o, b, options) {
let defaults = {
excludeFalseConflicts: true,
stringSeparator: /\s+/
};
options = Object.assign(defaults, options);
if (typeof a === 'string') a = a.split(options.stringSeparator);
if (typeof o === 'string') o = o.split(options.stringSeparator);
if (typeof b === 'string') b = b.split(options.stringSeparator);
let results = [];
const regions = diff3MergeRegions(a, o, b);
let okBuffer = [];
function flushOk() {
if (okBuffer.length) {
results.push({ ok: okBuffer });
}
okBuffer = [];
}
function isFalseConflict(a, b) {
if (a.length !== b.length) return false;
for (let i = 0; i < a.length; i++) {
if (a[i] !== b[i]) return false;
}
return true;
}
regions.forEach(region => {
if (region.stable) {
okBuffer.push(...region.bufferContent);
} else {
if (options.excludeFalseConflicts && isFalseConflict(region.aContent, region.bContent)) {
okBuffer.push(...region.aContent);
} else {
flushOk();
results.push({
conflict: {
a: region.aContent,
aIndex: region.aStart,
o: region.oContent,
oIndex: region.oStart,
b: region.bContent,
bIndex: region.bStart
}
});
}
}
});
flushOk();
return results;
}
function mergeDiff3(a, o, b, options) {
const defaults = {
excludeFalseConflicts: true,
stringSeparator: /\s+/,
label: {}
};
options = Object.assign(defaults, options);
const aSection = '<<<<<<<' + (options.label.a ? ` ${options.label.a}` : '');
const oSection = '|||||||' + (options.label.o ? ` ${options.label.o}` : '');
const xSection = '=======';
const bSection = '>>>>>>>' + (options.label.b ? ` ${options.label.b}` : '');
const regions = diff3Merge(a, o, b, options);
let conflict = false;
let result = [];
regions.forEach(region => {
if (region.ok) {
result = result.concat(region.ok);
} else if (region.conflict) {
conflict = true;
result = result.concat(
[aSection],
region.conflict.a,
[oSection],
region.conflict.o,
[xSection],
region.conflict.b,
[bSection]
);
}
});
return {
conflict: conflict,
result: result
};
}
function merge(a, o, b, options) {
const defaults = {
excludeFalseConflicts: true,
stringSeparator: /\s+/,
label: {}
};
options = Object.assign(defaults, options);
const aSection = '<<<<<<<' + (options.label.a ? ` ${options.label.a}` : '');
const xSection = '=======';
const bSection = '>>>>>>>' + (options.label.b ? ` ${options.label.b}` : '');
const regions = diff3Merge(a, o, b, options);
let conflict = false;
let result = [];
regions.forEach(region => {
if (region.ok) {
result = result.concat(region.ok);
} else if (region.conflict) {
conflict = true;
result = result.concat(
[aSection],
region.conflict.a,
[xSection],
region.conflict.b,
[bSection]
);
}
});
return {
conflict: conflict,
result: result
};
}
function mergeDigIn(a, o, b, options) {
const defaults = {
excludeFalseConflicts: true,
stringSeparator: /\s+/,
label: {}
};
options = Object.assign(defaults, options);
const aSection = '<<<<<<<' + (options.label.a ? ` ${options.label.a}` : '');
const xSection = '=======';
const bSection = '>>>>>>>' + (options.label.b ? ` ${options.label.b}` : '');
const regions = diff3Merge(a, o, b, options);
let conflict = false;
let result = [];
regions.forEach(region => {
if (region.ok) {
result = result.concat(region.ok);
} else {
const c = diffComm(region.conflict.a, region.conflict.b);
for (let j = 0; j < c.length; j++) {
let inner = c[j];
if (inner.common) {
result = result.concat(inner.common);
} else {
conflict = true;
result = result.concat(
[aSection],
inner.buffer1,
[xSection],
inner.buffer2,
[bSection]
);
}
}
}
});
return {
conflict: conflict,
result: result
};
}
// Applies a patch to a buffer.
// Given buffer1 and buffer2, `patch(buffer1, diffPatch(buffer1, buffer2))` should give buffer2.
function patch(buffer, patch) {
let result = [];
let currOffset = 0;
function advanceTo(targetOffset) {
while (currOffset < targetOffset) {
result.push(buffer[currOffset]);
currOffset++;
}
}
for (let chunkIndex = 0; chunkIndex < patch.length; chunkIndex++) {
let chunk = patch[chunkIndex];
advanceTo(chunk.buffer1.offset);
for (let itemIndex = 0; itemIndex < chunk.buffer2.chunk.length; itemIndex++) {
result.push(chunk.buffer2.chunk[itemIndex]);
}
currOffset += chunk.buffer1.length;
}
advanceTo(buffer.length);
return result;
}
// Takes the output of diffPatch(), and removes extra information from it.
// It can still be used by patch(), below, but can no longer be inverted.
function stripPatch(patch) {
return patch.map(chunk => ({
buffer1: { offset: chunk.buffer1.offset, length: chunk.buffer1.length },
buffer2: { chunk: chunk.buffer2.chunk }
}));
}
// Takes the output of diffPatch(), and inverts the sense of it, so that it
// can be applied to buffer2 to give buffer1 rather than the other way around.
function invertPatch(patch) {
return patch.map(chunk => ({
buffer1: chunk.buffer2,
buffer2: chunk.buffer1
}));
}