UNPKG

diff

Version:

A JavaScript text diff implementation.

254 lines (253 loc) 12 kB
export default class Diff { diff(oldStr, newStr, // Type below is not accurate/complete - see above for full possibilities - but it compiles options = {}) { let callback; if (typeof options === 'function') { callback = options; options = {}; } else if ('callback' in options) { callback = options.callback; } // Allow subclasses to massage the input prior to running const oldString = this.castInput(oldStr, options); const newString = this.castInput(newStr, options); const oldTokens = this.removeEmpty(this.tokenize(oldString, options)); const newTokens = this.removeEmpty(this.tokenize(newString, options)); return this.diffWithOptionsObj(oldTokens, newTokens, options, callback); } diffWithOptionsObj(oldTokens, newTokens, options, callback) { var _a; const done = (value) => { value = this.postProcess(value, options); if (callback) { setTimeout(function () { callback(value); }, 0); return undefined; } else { return value; } }; const newLen = newTokens.length, oldLen = oldTokens.length; let editLength = 1; let maxEditLength = newLen + oldLen; if (options.maxEditLength != null) { maxEditLength = Math.min(maxEditLength, options.maxEditLength); } const maxExecutionTime = (_a = options.timeout) !== null && _a !== void 0 ? _a : Infinity; const abortAfterTimestamp = Date.now() + maxExecutionTime; const bestPath = [{ oldPos: -1, lastComponent: undefined }]; // Seed editLength = 0, i.e. the content starts with the same values let newPos = this.extractCommon(bestPath[0], newTokens, oldTokens, 0, options); if (bestPath[0].oldPos + 1 >= oldLen && newPos + 1 >= newLen) { // Identity per the equality and tokenizer return done(this.buildValues(bestPath[0].lastComponent, newTokens, oldTokens)); } // Once we hit the right edge of the edit graph on some diagonal k, we can // definitely reach the end of the edit graph in no more than k edits, so // there's no point in considering any moves to diagonal k+1 any more (from // which we're guaranteed to need at least k+1 more edits). // Similarly, once we've reached the bottom of the edit graph, there's no // point considering moves to lower diagonals. // We record this fact by setting minDiagonalToConsider and // maxDiagonalToConsider to some finite value once we've hit the edge of // the edit graph. // This optimization is not faithful to the original algorithm presented in // Myers's paper, which instead pointlessly extends D-paths off the end of // the edit graph - see page 7 of Myers's paper which notes this point // explicitly and illustrates it with a diagram. This has major performance // implications for some common scenarios. For instance, to compute a diff // where the new text simply appends d characters on the end of the // original text of length n, the true Myers algorithm will take O(n+d^2) // time while this optimization needs only O(n+d) time. let minDiagonalToConsider = -Infinity, maxDiagonalToConsider = Infinity; // Main worker method. checks all permutations of a given edit length for acceptance. const execEditLength = () => { for (let diagonalPath = Math.max(minDiagonalToConsider, -editLength); diagonalPath <= Math.min(maxDiagonalToConsider, editLength); diagonalPath += 2) { let basePath; const removePath = bestPath[diagonalPath - 1], addPath = bestPath[diagonalPath + 1]; if (removePath) { // No one else is going to attempt to use this value, clear it // @ts-expect-error - perf optimisation. This type-violating value will never be read. bestPath[diagonalPath - 1] = undefined; } let canAdd = false; if (addPath) { // what newPos will be after we do an insertion: const addPathNewPos = addPath.oldPos - diagonalPath; canAdd = addPath && 0 <= addPathNewPos && addPathNewPos < newLen; } const canRemove = removePath && removePath.oldPos + 1 < oldLen; if (!canAdd && !canRemove) { // If this path is a terminal then prune // @ts-expect-error - perf optimisation. This type-violating value will never be read. bestPath[diagonalPath] = undefined; continue; } // Select the diagonal that we want to branch from. We select the prior // path whose position in the old string is the farthest from the origin // and does not pass the bounds of the diff graph if (!canRemove || (canAdd && removePath.oldPos < addPath.oldPos)) { basePath = this.addToPath(addPath, true, false, 0, options); } else { basePath = this.addToPath(removePath, false, true, 1, options); } newPos = this.extractCommon(basePath, newTokens, oldTokens, diagonalPath, options); if (basePath.oldPos + 1 >= oldLen && newPos + 1 >= newLen) { // If we have hit the end of both strings, then we are done return done(this.buildValues(basePath.lastComponent, newTokens, oldTokens)) || true; } else { bestPath[diagonalPath] = basePath; if (basePath.oldPos + 1 >= oldLen) { maxDiagonalToConsider = Math.min(maxDiagonalToConsider, diagonalPath - 1); } if (newPos + 1 >= newLen) { minDiagonalToConsider = Math.max(minDiagonalToConsider, diagonalPath + 1); } } } editLength++; }; // Performs the length of edit iteration. Is a bit fugly as this has to support the // sync and async mode which is never fun. Loops over execEditLength until a value // is produced, or until the edit length exceeds options.maxEditLength (if given), // in which case it will return undefined. if (callback) { (function exec() { setTimeout(function () { if (editLength > maxEditLength || Date.now() > abortAfterTimestamp) { return callback(undefined); } if (!execEditLength()) { exec(); } }, 0); }()); } else { while (editLength <= maxEditLength && Date.now() <= abortAfterTimestamp) { const ret = execEditLength(); if (ret) { return ret; } } } } addToPath(path, added, removed, oldPosInc, options) { const last = path.lastComponent; if (last && !options.oneChangePerToken && last.added === added && last.removed === removed) { return { oldPos: path.oldPos + oldPosInc, lastComponent: { count: last.count + 1, added: added, removed: removed, previousComponent: last.previousComponent } }; } else { return { oldPos: path.oldPos + oldPosInc, lastComponent: { count: 1, added: added, removed: removed, previousComponent: last } }; } } extractCommon(basePath, newTokens, oldTokens, diagonalPath, options) { const newLen = newTokens.length, oldLen = oldTokens.length; let oldPos = basePath.oldPos, newPos = oldPos - diagonalPath, commonCount = 0; while (newPos + 1 < newLen && oldPos + 1 < oldLen && this.equals(oldTokens[oldPos + 1], newTokens[newPos + 1], options)) { newPos++; oldPos++; commonCount++; if (options.oneChangePerToken) { basePath.lastComponent = { count: 1, previousComponent: basePath.lastComponent, added: false, removed: false }; } } if (commonCount && !options.oneChangePerToken) { basePath.lastComponent = { count: commonCount, previousComponent: basePath.lastComponent, added: false, removed: false }; } basePath.oldPos = oldPos; return newPos; } equals(left, right, options) { if (options.comparator) { return options.comparator(left, right); } else { return left === right || (!!options.ignoreCase && left.toLowerCase() === right.toLowerCase()); } } removeEmpty(array) { const ret = []; for (let i = 0; i < array.length; i++) { if (array[i]) { ret.push(array[i]); } } return ret; } // eslint-disable-next-line @typescript-eslint/no-unused-vars castInput(value, options) { return value; } // eslint-disable-next-line @typescript-eslint/no-unused-vars tokenize(value, options) { return Array.from(value); } join(chars) { // Assumes ValueT is string, which is the case for most subclasses. // When it's false, e.g. in diffArrays, this method needs to be overridden (e.g. with a no-op) // Yes, the casts are verbose and ugly, because this pattern - of having the base class SORT OF // assume tokens and values are strings, but not completely - is weird and janky. return chars.join(''); } postProcess(changeObjects, // eslint-disable-next-line @typescript-eslint/no-unused-vars options) { return changeObjects; } get useLongestToken() { return false; } buildValues(lastComponent, newTokens, oldTokens) { // First we convert our linked list of components in reverse order to an // array in the right order: const components = []; let nextComponent; while (lastComponent) { components.push(lastComponent); nextComponent = lastComponent.previousComponent; delete lastComponent.previousComponent; lastComponent = nextComponent; } components.reverse(); const componentLen = components.length; let componentPos = 0, newPos = 0, oldPos = 0; for (; componentPos < componentLen; componentPos++) { const component = components[componentPos]; if (!component.removed) { if (!component.added && this.useLongestToken) { let value = newTokens.slice(newPos, newPos + component.count); value = value.map(function (value, i) { const oldValue = oldTokens[oldPos + i]; return oldValue.length > value.length ? oldValue : value; }); component.value = this.join(value); } else { component.value = this.join(newTokens.slice(newPos, newPos + component.count)); } newPos += component.count; // Common case if (!component.added) { oldPos += component.count; } } else { component.value = this.join(oldTokens.slice(oldPos, oldPos + component.count)); oldPos += component.count; } } return components; } }