UNPKG

diff

Version:

A JavaScript text diff implementation.

kpdecker/jsdiff

1,074 lines (1,061 loc) • 78.3 kB

JavaScript

(function (global, factory) { typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) : typeof define === 'function' && define.amd ? define(['exports'], factory) : (global = typeof globalThis !== 'undefined' ? globalThis : global || self, factory(global.Diff = {})); })(this, (function (exports) { 'use strict'; class Diff { diff(oldStr, newStr, // Type below is not accurate/complete - see above for full possibilities - but it compiles options = {}) { let callback; if (typeof options === 'function') { callback = options; options = {}; } else if ('callback' in options) { callback = options.callback; } // Allow subclasses to massage the input prior to running const oldString = this.castInput(oldStr, options); const newString = this.castInput(newStr, options); const oldTokens = this.removeEmpty(this.tokenize(oldString, options)); const newTokens = this.removeEmpty(this.tokenize(newString, options)); return this.diffWithOptionsObj(oldTokens, newTokens, options, callback); } diffWithOptionsObj(oldTokens, newTokens, options, callback) { var _a; const done = (value) => { value = this.postProcess(value, options); if (callback) { setTimeout(function () { callback(value); }, 0); return undefined; } else { return value; } }; const newLen = newTokens.length, oldLen = oldTokens.length; let editLength = 1; let maxEditLength = newLen + oldLen; if (options.maxEditLength != null) { maxEditLength = Math.min(maxEditLength, options.maxEditLength); } const maxExecutionTime = (_a = options.timeout) !== null && _a !== void 0 ? _a : Infinity; const abortAfterTimestamp = Date.now() + maxExecutionTime; const bestPath = [{ oldPos: -1, lastComponent: undefined }]; // Seed editLength = 0, i.e. the content starts with the same values let newPos = this.extractCommon(bestPath[0], newTokens, oldTokens, 0, options); if (bestPath[0].oldPos + 1 >= oldLen && newPos + 1 >= newLen) { // Identity per the equality and tokenizer return done(this.buildValues(bestPath[0].lastComponent, newTokens, oldTokens)); } // Once we hit the right edge of the edit graph on some diagonal k, we can // definitely reach the end of the edit graph in no more than k edits, so // there's no point in considering any moves to diagonal k+1 any more (from // which we're guaranteed to need at least k+1 more edits). // Similarly, once we've reached the bottom of the edit graph, there's no // point considering moves to lower diagonals. // We record this fact by setting minDiagonalToConsider and // maxDiagonalToConsider to some finite value once we've hit the edge of // the edit graph. // This optimization is not faithful to the original algorithm presented in // Myers's paper, which instead pointlessly extends D-paths off the end of // the edit graph - see page 7 of Myers's paper which notes this point // explicitly and illustrates it with a diagram. This has major performance // implications for some common scenarios. For instance, to compute a diff // where the new text simply appends d characters on the end of the // original text of length n, the true Myers algorithm will take O(n+d^2) // time while this optimization needs only O(n+d) time. let minDiagonalToConsider = -Infinity, maxDiagonalToConsider = Infinity; // Main worker method. checks all permutations of a given edit length for acceptance. const execEditLength = () => { for (let diagonalPath = Math.max(minDiagonalToConsider, -editLength); diagonalPath <= Math.min(maxDiagonalToConsider, editLength); diagonalPath += 2) { let basePath; const removePath = bestPath[diagonalPath - 1], addPath = bestPath[diagonalPath + 1]; if (removePath) { // No one else is going to attempt to use this value, clear it // @ts-expect-error - perf optimisation. This type-violating value will never be read. bestPath[diagonalPath - 1] = undefined; } let canAdd = false; if (addPath) { // what newPos will be after we do an insertion: const addPathNewPos = addPath.oldPos - diagonalPath; canAdd = addPath && 0 <= addPathNewPos && addPathNewPos < newLen; } const canRemove = removePath && removePath.oldPos + 1 < oldLen; if (!canAdd && !canRemove) { // If this path is a terminal then prune // @ts-expect-error - perf optimisation. This type-violating value will never be read. bestPath[diagonalPath] = undefined; continue; } // Select the diagonal that we want to branch from. We select the prior // path whose position in the old string is the farthest from the origin // and does not pass the bounds of the diff graph if (!canRemove || (canAdd && removePath.oldPos < addPath.oldPos)) { basePath = this.addToPath(addPath, true, false, 0, options); } else { basePath = this.addToPath(removePath, false, true, 1, options); } newPos = this.extractCommon(basePath, newTokens, oldTokens, diagonalPath, options); if (basePath.oldPos + 1 >= oldLen && newPos + 1 >= newLen) { // If we have hit the end of both strings, then we are done return done(this.buildValues(basePath.lastComponent, newTokens, oldTokens)) || true; } else { bestPath[diagonalPath] = basePath; if (basePath.oldPos + 1 >= oldLen) { maxDiagonalToConsider = Math.min(maxDiagonalToConsider, diagonalPath - 1); } if (newPos + 1 >= newLen) { minDiagonalToConsider = Math.max(minDiagonalToConsider, diagonalPath + 1); } } } editLength++; }; // Performs the length of edit iteration. Is a bit fugly as this has to support the // sync and async mode which is never fun. Loops over execEditLength until a value // is produced, or until the edit length exceeds options.maxEditLength (if given), // in which case it will return undefined. if (callback) { (function exec() { setTimeout(function () { if (editLength > maxEditLength || Date.now() > abortAfterTimestamp) { return callback(undefined); } if (!execEditLength()) { exec(); } }, 0); }()); } else { while (editLength <= maxEditLength && Date.now() <= abortAfterTimestamp) { const ret = execEditLength(); if (ret) { return ret; } } } } addToPath(path, added, removed, oldPosInc, options) { const last = path.lastComponent; if (last && !options.oneChangePerToken && last.added === added && last.removed === removed) { return { oldPos: path.oldPos + oldPosInc, lastComponent: { count: last.count + 1, added: added, removed: removed, previousComponent: last.previousComponent } }; } else { return { oldPos: path.oldPos + oldPosInc, lastComponent: { count: 1, added: added, removed: removed, previousComponent: last } }; } } extractCommon(basePath, newTokens, oldTokens, diagonalPath, options) { const newLen = newTokens.length, oldLen = oldTokens.length; let oldPos = basePath.oldPos, newPos = oldPos - diagonalPath, commonCount = 0; while (newPos + 1 < newLen && oldPos + 1 < oldLen && this.equals(oldTokens[oldPos + 1], newTokens[newPos + 1], options)) { newPos++; oldPos++; commonCount++; if (options.oneChangePerToken) { basePath.lastComponent = { count: 1, previousComponent: basePath.lastComponent, added: false, removed: false }; } } if (commonCount && !options.oneChangePerToken) { basePath.lastComponent = { count: commonCount, previousComponent: basePath.lastComponent, added: false, removed: false }; } basePath.oldPos = oldPos; return newPos; } equals(left, right, options) { if (options.comparator) { return options.comparator(left, right); } else { return left === right || (!!options.ignoreCase && left.toLowerCase() === right.toLowerCase()); } } removeEmpty(array) { const ret = []; for (let i = 0; i < array.length; i++) { if (array[i]) { ret.push(array[i]); } } return ret; } // eslint-disable-next-line @typescript-eslint/no-unused-vars castInput(value, options) { return value; } // eslint-disable-next-line @typescript-eslint/no-unused-vars tokenize(value, options) { return Array.from(value); } join(chars) { // Assumes ValueT is string, which is the case for most subclasses. // When it's false, e.g. in diffArrays, this method needs to be overridden (e.g. with a no-op) // Yes, the casts are verbose and ugly, because this pattern - of having the base class SORT OF // assume tokens and values are strings, but not completely - is weird and janky. return chars.join(''); } postProcess(changeObjects, // eslint-disable-next-line @typescript-eslint/no-unused-vars options) { return changeObjects; } get useLongestToken() { return false; } buildValues(lastComponent, newTokens, oldTokens) { // First we convert our linked list of components in reverse order to an // array in the right order: const components = []; let nextComponent; while (lastComponent) { components.push(lastComponent); nextComponent = lastComponent.previousComponent; delete lastComponent.previousComponent; lastComponent = nextComponent; } components.reverse(); const componentLen = components.length; let componentPos = 0, newPos = 0, oldPos = 0; for (; componentPos < componentLen; componentPos++) { const component = components[componentPos]; if (!component.removed) { if (!component.added && this.useLongestToken) { let value = newTokens.slice(newPos, newPos + component.count); value = value.map(function (value, i) { const oldValue = oldTokens[oldPos + i]; return oldValue.length > value.length ? oldValue : value; }); component.value = this.join(value); } else { component.value = this.join(newTokens.slice(newPos, newPos + component.count)); } newPos += component.count; // Common case if (!component.added) { oldPos += component.count; } } else { component.value = this.join(oldTokens.slice(oldPos, oldPos + component.count)); oldPos += component.count; } } return components; } } class CharacterDiff extends Diff { } const characterDiff = new CharacterDiff(); function diffChars(oldStr, newStr, options) { return characterDiff.diff(oldStr, newStr, options); } function longestCommonPrefix(str1, str2) { let i; for (i = 0; i < str1.length && i < str2.length; i++) { if (str1[i] != str2[i]) { return str1.slice(0, i); } } return str1.slice(0, i); } function longestCommonSuffix(str1, str2) { let i; // Unlike longestCommonPrefix, we need a special case to handle all scenarios // where we return the empty string since str1.slice(-0) will return the // entire string. if (!str1 || !str2 || str1[str1.length - 1] != str2[str2.length - 1]) { return ''; } for (i = 0; i < str1.length && i < str2.length; i++) { if (str1[str1.length - (i + 1)] != str2[str2.length - (i + 1)]) { return str1.slice(-i); } } return str1.slice(-i); } function replacePrefix(string, oldPrefix, newPrefix) { if (string.slice(0, oldPrefix.length) != oldPrefix) { throw Error(`string ${JSON.stringify(string)} doesn't start with prefix ${JSON.stringify(oldPrefix)}; this is a bug`); } return newPrefix + string.slice(oldPrefix.length); } function replaceSuffix(string, oldSuffix, newSuffix) { if (!oldSuffix) { return string + newSuffix; } if (string.slice(-oldSuffix.length) != oldSuffix) { throw Error(`string ${JSON.stringify(string)} doesn't end with suffix ${JSON.stringify(oldSuffix)}; this is a bug`); } return string.slice(0, -oldSuffix.length) + newSuffix; } function removePrefix(string, oldPrefix) { return replacePrefix(string, oldPrefix, ''); } function removeSuffix(string, oldSuffix) { return replaceSuffix(string, oldSuffix, ''); } function maximumOverlap(string1, string2) { return string2.slice(0, overlapCount(string1, string2)); } // Nicked from https://stackoverflow.com/a/60422853/1709587 function overlapCount(a, b) { // Deal with cases where the strings differ in length let startA = 0; if (a.length > b.length) { startA = a.length - b.length; } let endB = b.length; if (a.length < b.length) { endB = a.length; } // Create a back-reference for each index // that should be followed in case of a mismatch. // We only need B to make these references: const map = Array(endB); let k = 0; // Index that lags behind j map[0] = 0; for (let j = 1; j < endB; j++) { if (b[j] == b[k]) { map[j] = map[k]; // skip over the same character (optional optimisation) } else { map[j] = k; } while (k > 0 && b[j] != b[k]) { k = map[k]; } if (b[j] == b[k]) { k++; } } // Phase 2: use these references while iterating over A k = 0; for (let i = startA; i < a.length; i++) { while (k > 0 && a[i] != b[k]) { k = map[k]; } if (a[i] == b[k]) { k++; } } return k; } /** * Returns true if the string consistently uses Windows line endings. */ function hasOnlyWinLineEndings(string) { return string.includes('\r\n') && !string.startsWith('\n') && !string.match(/[^\r]\n/); } /** * Returns true if the string consistently uses Unix line endings. */ function hasOnlyUnixLineEndings(string) { return !string.includes('\r\n') && string.includes('\n'); } function trailingWs(string) { // Yes, this looks overcomplicated and dumb - why not replace the whole function with // return string match(/\s*$/)[0] // you ask? Because: // 1. the trap described at https://markamery.com/blog/quadratic-time-regexes/ would mean doing // this would cause this function to take O(n²) time in the worst case (specifically when // there is a massive run of NON-TRAILING whitespace in `string`), and // 2. the fix proposed in the same blog post, of using a negative lookbehind, is incompatible // with old Safari versions that we'd like to not break if possible (see // https://github.com/kpdecker/jsdiff/pull/550) // It feels absurd to do this with an explicit loop instead of a regex, but I really can't see a // better way that doesn't result in broken behaviour. let i; for (i = string.length - 1; i >= 0; i--) { if (!string[i].match(/\s/)) { break; } } return string.substring(i + 1); } function leadingWs(string) { // Thankfully the annoying considerations described in trailingWs don't apply here: const match = string.match(/^\s*/); return match ? match[0] : ''; } // Based on https://en.wikipedia.org/wiki/Latin_script_in_Unicode // // Ranges and exceptions: // Latin-1 Supplement, 0080–00FF // - U+00D7 × Multiplication sign // - U+00F7 ÷ Division sign // Latin Extended-A, 0100–017F // Latin Extended-B, 0180–024F // IPA Extensions, 0250–02AF // Spacing Modifier Letters, 02B0–02FF // - U+02C7 ˇ ˇ Caron // - U+02D8 ˘ ˘ Breve // - U+02D9 ˙ ˙ Dot Above // - U+02DA ˚ ˚ Ring Above // - U+02DB ˛ ˛ Ogonek // - U+02DC ˜ ˜ Small Tilde // - U+02DD ˝ ˝ Double Acute Accent // Latin Extended Additional, 1E00–1EFF const extendedWordChars = 'a-zA-Z0-9_\\u{C0}-\\u{FF}\\u{D8}-\\u{F6}\\u{F8}-\\u{2C6}\\u{2C8}-\\u{2D7}\\u{2DE}-\\u{2FF}\\u{1E00}-\\u{1EFF}'; // Each token is one of the following: // - A punctuation mark plus the surrounding whitespace // - A word plus the surrounding whitespace // - Pure whitespace (but only in the special case where this the entire text // is just whitespace) // // We have to include surrounding whitespace in the tokens because the two // alternative approaches produce horribly broken results: // * If we just discard the whitespace, we can't fully reproduce the original // text from the sequence of tokens and any attempt to render the diff will // get the whitespace wrong. // * If we have separate tokens for whitespace, then in a typical text every // second token will be a single space character. But this often results in // the optimal diff between two texts being a perverse one that preserves // the spaces between words but deletes and reinserts actual common words. // See https://github.com/kpdecker/jsdiff/issues/160#issuecomment-1866099640 // for an example. // // Keeping the surrounding whitespace of course has implications for .equals // and .join, not just .tokenize. // This regex does NOT fully implement the tokenization rules described above. // Instead, it gives runs of whitespace their own "token". The tokenize method // then handles stitching whitespace tokens onto adjacent word or punctuation // tokens. const tokenizeIncludingWhitespace = new RegExp(`[${extendedWordChars}]+|\\s+|[^${extendedWordChars}]`, 'ug'); class WordDiff extends Diff { equals(left, right, options) { if (options.ignoreCase) { left = left.toLowerCase(); right = right.toLowerCase(); } return left.trim() === right.trim(); } tokenize(value, options = {}) { let parts; if (options.intlSegmenter) { const segmenter = options.intlSegmenter; if (segmenter.resolvedOptions().granularity != 'word') { throw new Error('The segmenter passed must have a granularity of "word"'); } parts = Array.from(segmenter.segment(value), segment => segment.segment); } else { parts = value.match(tokenizeIncludingWhitespace) || []; } const tokens = []; let prevPart = null; parts.forEach(part => { if ((/\s/).test(part)) { if (prevPart == null) { tokens.push(part); } else { tokens.push(tokens.pop() + part); } } else if (prevPart != null && (/\s/).test(prevPart)) { if (tokens[tokens.length - 1] == prevPart) { tokens.push(tokens.pop() + part); } else { tokens.push(prevPart + part); } } else { tokens.push(part); } prevPart = part; }); return tokens; } join(tokens) { // Tokens being joined here will always have appeared consecutively in the // same text, so we can simply strip off the leading whitespace from all the // tokens except the first (and except any whitespace-only tokens - but such // a token will always be the first and only token anyway) and then join them // and the whitespace around words and punctuation will end up correct. return tokens.map((token, i) => { if (i == 0) { return token; } else { return token.replace((/^\s+/), ''); } }).join(''); } postProcess(changes, options) { if (!changes || options.oneChangePerToken) { return changes; } let lastKeep = null; // Change objects representing any insertion or deletion since the last // "keep" change object. There can be at most one of each. let insertion = null; let deletion = null; changes.forEach(change => { if (change.added) { insertion = change; } else if (change.removed) { deletion = change; } else { if (insertion || deletion) { // May be false at start of text dedupeWhitespaceInChangeObjects(lastKeep, deletion, insertion, change); } lastKeep = change; insertion = null; deletion = null; } }); if (insertion || deletion) { dedupeWhitespaceInChangeObjects(lastKeep, deletion, insertion, null); } return changes; } } const wordDiff = new WordDiff(); function diffWords(oldStr, newStr, options) { // This option has never been documented and never will be (it's clearer to // just call `diffWordsWithSpace` directly if you need that behavior), but // has existed in jsdiff for a long time, so we retain support for it here // for the sake of backwards compatibility. if ((options === null || options === void 0 ? void 0 : options.ignoreWhitespace) != null && !options.ignoreWhitespace) { return diffWordsWithSpace(oldStr, newStr, options); } return wordDiff.diff(oldStr, newStr, options); } function dedupeWhitespaceInChangeObjects(startKeep, deletion, insertion, endKeep) { // Before returning, we tidy up the leading and trailing whitespace of the // change objects to eliminate cases where trailing whitespace in one object // is repeated as leading whitespace in the next. // Below are examples of the outcomes we want here to explain the code. // I=insert, K=keep, D=delete // 1. diffing 'foo bar baz' vs 'foo baz' // Prior to cleanup, we have K:'foo ' D:' bar ' K:' baz' // After cleanup, we want: K:'foo ' D:'bar ' K:'baz' // // 2. Diffing 'foo bar baz' vs 'foo qux baz' // Prior to cleanup, we have K:'foo ' D:' bar ' I:' qux ' K:' baz' // After cleanup, we want K:'foo ' D:'bar' I:'qux' K:' baz' // // 3. Diffing 'foo\nbar baz' vs 'foo baz' // Prior to cleanup, we have K:'foo ' D:'\nbar ' K:' baz' // After cleanup, we want K'foo' D:'\nbar' K:' baz' // // 4. Diffing 'foo baz' vs 'foo\nbar baz' // Prior to cleanup, we have K:'foo\n' I:'\nbar ' K:' baz' // After cleanup, we ideally want K'foo' I:'\nbar' K:' baz' // but don't actually manage this currently (the pre-cleanup change // objects don't contain enough information to make it possible). // // 5. Diffing 'foo bar baz' vs 'foo baz' // Prior to cleanup, we have K:'foo ' D:' bar ' K:' baz' // After cleanup, we want K:'foo ' D:' bar ' K:'baz' // // Our handling is unavoidably imperfect in the case where there's a single // indel between keeps and the whitespace has changed. For instance, consider // diffing 'foo\tbar\nbaz' vs 'foo baz'. Unless we create an extra change // object to represent the insertion of the space character (which isn't even // a token), we have no way to avoid losing information about the texts' // original whitespace in the result we return. Still, we do our best to // output something that will look sensible if we e.g. print it with // insertions in green and deletions in red. // Between two "keep" change objects (or before the first or after the last // change object), we can have either: // * A "delete" followed by an "insert" // * Just an "insert" // * Just a "delete" // We handle the three cases separately. if (deletion && insertion) { const oldWsPrefix = leadingWs(deletion.value); const oldWsSuffix = trailingWs(deletion.value); const newWsPrefix = leadingWs(insertion.value); const newWsSuffix = trailingWs(insertion.value); if (startKeep) { const commonWsPrefix = longestCommonPrefix(oldWsPrefix, newWsPrefix); startKeep.value = replaceSuffix(startKeep.value, newWsPrefix, commonWsPrefix); deletion.value = removePrefix(deletion.value, commonWsPrefix); insertion.value = removePrefix(insertion.value, commonWsPrefix); } if (endKeep) { const commonWsSuffix = longestCommonSuffix(oldWsSuffix, newWsSuffix); endKeep.value = replacePrefix(endKeep.value, newWsSuffix, commonWsSuffix); deletion.value = removeSuffix(deletion.value, commonWsSuffix); insertion.value = removeSuffix(insertion.value, commonWsSuffix); } } else if (insertion) { // The whitespaces all reflect what was in the new text rather than // the old, so we essentially have no information about whitespace // insertion or deletion. We just want to dedupe the whitespace. // We do that by having each change object keep its trailing // whitespace and deleting duplicate leading whitespace where // present. if (startKeep) { const ws = leadingWs(insertion.value); insertion.value = insertion.value.substring(ws.length); } if (endKeep) { const ws = leadingWs(endKeep.value); endKeep.value = endKeep.value.substring(ws.length); } // otherwise we've got a deletion and no insertion } else if (startKeep && endKeep) { const newWsFull = leadingWs(endKeep.value), delWsStart = leadingWs(deletion.value), delWsEnd = trailingWs(deletion.value); // Any whitespace that comes straight after startKeep in both the old and // new texts, assign to startKeep and remove from the deletion. const newWsStart = longestCommonPrefix(newWsFull, delWsStart); deletion.value = removePrefix(deletion.value, newWsStart); // Any whitespace that comes straight before endKeep in both the old and // new texts, and hasn't already been assigned to startKeep, assign to // endKeep and remove from the deletion. const newWsEnd = longestCommonSuffix(removePrefix(newWsFull, newWsStart), delWsEnd); deletion.value = removeSuffix(deletion.value, newWsEnd); endKeep.value = replacePrefix(endKeep.value, newWsFull, newWsEnd); // If there's any whitespace from the new text that HASN'T already been // assigned, assign it to the start: startKeep.value = replaceSuffix(startKeep.value, newWsFull, newWsFull.slice(0, newWsFull.length - newWsEnd.length)); } else if (endKeep) { // We are at the start of the text. Preserve all the whitespace on // endKeep, and just remove whitespace from the end of deletion to the // extent that it overlaps with the start of endKeep. const endKeepWsPrefix = leadingWs(endKeep.value); const deletionWsSuffix = trailingWs(deletion.value); const overlap = maximumOverlap(deletionWsSuffix, endKeepWsPrefix); deletion.value = removeSuffix(deletion.value, overlap); } else if (startKeep) { // We are at the END of the text. Preserve all the whitespace on // startKeep, and just remove whitespace from the start of deletion to // the extent that it overlaps with the end of startKeep. const startKeepWsSuffix = trailingWs(startKeep.value); const deletionWsPrefix = leadingWs(deletion.value); const overlap = maximumOverlap(startKeepWsSuffix, deletionWsPrefix); deletion.value = removePrefix(deletion.value, overlap); } } class WordsWithSpaceDiff extends Diff { tokenize(value) { // Slightly different to the tokenizeIncludingWhitespace regex used above in // that this one treats each individual newline as a distinct tokens, rather // than merging them into other surrounding whitespace. This was requested // in https://github.com/kpdecker/jsdiff/issues/180 & // https://github.com/kpdecker/jsdiff/issues/211 const regex = new RegExp(`(\\r?\\n)|[${extendedWordChars}]+|[^\\S\\n\\r]+|[^${extendedWordChars}]`, 'ug'); return value.match(regex) || []; } } const wordsWithSpaceDiff = new WordsWithSpaceDiff(); function diffWordsWithSpace(oldStr, newStr, options) { return wordsWithSpaceDiff.diff(oldStr, newStr, options); } function generateOptions(options, defaults) { if (typeof options === 'function') { defaults.callback = options; } else if (options) { for (const name in options) { /* istanbul ignore else */ if (Object.prototype.hasOwnProperty.call(options, name)) { defaults[name] = options[name]; } } } return defaults; } class LineDiff extends Diff { constructor() { super(...arguments); this.tokenize = tokenize; } equals(left, right, options) { // If we're ignoring whitespace, we need to normalise lines by stripping // whitespace before checking equality. (This has an annoying interaction // with newlineIsToken that requires special handling: if newlines get their // own token, then we DON'T want to trim the *newline* tokens down to empty // strings, since this would cause us to treat whitespace-only line content // as equal to a separator between lines, which would be weird and // inconsistent with the documented behavior of the options.) if (options.ignoreWhitespace) { if (!options.newlineIsToken || !left.includes('\n')) { left = left.trim(); } if (!options.newlineIsToken || !right.includes('\n')) { right = right.trim(); } } else if (options.ignoreNewlineAtEof && !options.newlineIsToken) { if (left.endsWith('\n')) { left = left.slice(0, -1); } if (right.endsWith('\n')) { right = right.slice(0, -1); } } return super.equals(left, right, options); } } const lineDiff = new LineDiff(); function diffLines(oldStr, newStr, options) { return lineDiff.diff(oldStr, newStr, options); } function diffTrimmedLines(oldStr, newStr, options) { options = generateOptions(options, { ignoreWhitespace: true }); return lineDiff.diff(oldStr, newStr, options); } // Exported standalone so it can be used from jsonDiff too. function tokenize(value, options) { if (options.stripTrailingCr) { // remove one \r before \n to match GNU diff's --strip-trailing-cr behavior value = value.replace(/\r\n/g, '\n'); } const retLines = [], linesAndNewlines = value.split(/(\n|\r\n)/); // Ignore the final empty token that occurs if the string ends with a new line if (!linesAndNewlines[linesAndNewlines.length - 1]) { linesAndNewlines.pop(); } // Merge the content and line separators into single tokens for (let i = 0; i < linesAndNewlines.length; i++) { const line = linesAndNewlines[i]; if (i % 2 && !options.newlineIsToken) { retLines[retLines.length - 1] += line; } else { retLines.push(line); } } return retLines; } function isSentenceEndPunct(char) { return char == '.' || char == '!' || char == '?'; } class SentenceDiff extends Diff { tokenize(value) { var _a; // If in future we drop support for environments that don't support lookbehinds, we can replace // this entire function with: // return value.split(/(?<=[.!?])(\s+|$)/); // but until then, for similar reasons to the trailingWs function in string.ts, we are forced // to do this verbosely "by hand" instead of using a regex. const result = []; let tokenStartI = 0; for (let i = 0; i < value.length; i++) { if (i == value.length - 1) { result.push(value.slice(tokenStartI)); break; } if (isSentenceEndPunct(value[i]) && value[i + 1].match(/\s/)) { // We've hit a sentence break - i.e. a punctuation mark followed by whitespace. // We now want to push TWO tokens to the result: // 1. the sentence result.push(value.slice(tokenStartI, i + 1)); // 2. the whitespace i = tokenStartI = i + 1; while ((_a = value[i + 1]) === null || _a === void 0 ? void 0 : _a.match(/\s/)) { i++; } result.push(value.slice(tokenStartI, i + 1)); // Then the next token (a sentence) starts on the character after the whitespace. // (It's okay if this is off the end of the string - then the outer loop will terminate // here anyway.) tokenStartI = i + 1; } } return result; } } const sentenceDiff = new SentenceDiff(); function diffSentences(oldStr, newStr, options) { return sentenceDiff.diff(oldStr, newStr, options); } class CssDiff extends Diff { tokenize(value) { return value.split(/([{}:;,]|\s+)/); } } const cssDiff = new CssDiff(); function diffCss(oldStr, newStr, options) { return cssDiff.diff(oldStr, newStr, options); } class JsonDiff extends Diff { constructor() { super(...arguments); this.tokenize = tokenize; } get useLongestToken() { // Discriminate between two lines of pretty-printed, serialized JSON where one of them has a // dangling comma and the other doesn't. Turns out including the dangling comma yields the nicest output: return true; } castInput(value, options) { const { undefinedReplacement, stringifyReplacer = (k, v) => typeof v === 'undefined' ? undefinedReplacement : v } = options; return typeof value === 'string' ? value : JSON.stringify(canonicalize(value, null, null, stringifyReplacer), null, ' '); } equals(left, right, options) { return super.equals(left.replace(/,([\r\n])/g, '$1'), right.replace(/,([\r\n])/g, '$1'), options); } } const jsonDiff = new JsonDiff(); function diffJson(oldStr, newStr, options) { return jsonDiff.diff(oldStr, newStr, options); } // This function handles the presence of circular references by bailing out when encountering an // object that is already on the "stack" of items being processed. Accepts an optional replacer function canonicalize(obj, stack, replacementStack, replacer, key) { stack = stack || []; replacementStack = replacementStack || []; if (replacer) { obj = replacer(key === undefined ? '' : key, obj); } let i; for (i = 0; i < stack.length; i += 1) { if (stack[i] === obj) { return replacementStack[i]; } } let canonicalizedObj; if ('[object Array]' === Object.prototype.toString.call(obj)) { stack.push(obj); canonicalizedObj = new Array(obj.length); replacementStack.push(canonicalizedObj); for (i = 0; i < obj.length; i += 1) { canonicalizedObj[i] = canonicalize(obj[i], stack, replacementStack, replacer, String(i)); } stack.pop(); replacementStack.pop(); return canonicalizedObj; } if (obj && obj.toJSON) { obj = obj.toJSON(); } if (typeof obj === 'object' && obj !== null) { stack.push(obj); canonicalizedObj = {}; replacementStack.push(canonicalizedObj); const sortedKeys = []; let key; for (key in obj) { /* istanbul ignore else */ if (Object.prototype.hasOwnProperty.call(obj, key)) { sortedKeys.push(key); } } sortedKeys.sort(); for (i = 0; i < sortedKeys.length; i += 1) { key = sortedKeys[i]; canonicalizedObj[key] = canonicalize(obj[key], stack, replacementStack, replacer, key); } stack.pop(); replacementStack.pop(); } else { canonicalizedObj = obj; } return canonicalizedObj; } class ArrayDiff extends Diff { tokenize(value) { return value.slice(); } join(value) { return value; } removeEmpty(value) { return value; } } const arrayDiff = new ArrayDiff(); function diffArrays(oldArr, newArr, options) { return arrayDiff.diff(oldArr, newArr, options); } function unixToWin(patch) { if (Array.isArray(patch)) { // It would be cleaner if instead of the line below we could just write // return patch.map(unixToWin) // but mysteriously TypeScript (v5.7.3 at the time of writing) does not like this and it will // refuse to compile, thinking that unixToWin could then return StructuredPatch[][] and the // result would be incompatible with the overload signatures. // See bug report at https://github.com/microsoft/TypeScript/issues/61398. return patch.map(p => unixToWin(p)); } return Object.assign(Object.assign({}, patch), { hunks: patch.hunks.map(hunk => (Object.assign(Object.assign({}, hunk), { lines: hunk.lines.map((line, i) => { var _a; return (line.startsWith('\\') || line.endsWith('\r') || ((_a = hunk.lines[i + 1]) === null || _a === void 0 ? void 0 : _a.startsWith('\\'))) ? line : line + '\r'; }) }))) }); } function winToUnix(patch) { if (Array.isArray(patch)) { // (See comment above equivalent line in unixToWin) return patch.map(p => winToUnix(p)); } return Object.assign(Object.assign({}, patch), { hunks: patch.hunks.map(hunk => (Object.assign(Object.assign({}, hunk), { lines: hunk.lines.map(line => line.endsWith('\r') ? line.substring(0, line.length - 1) : line) }))) }); } /** * Returns true if the patch consistently uses Unix line endings (or only involves one line and has * no line endings). */ function isUnix(patch) { if (!Array.isArray(patch)) { patch = [patch]; } return !patch.some(index => index.hunks.some(hunk => hunk.lines.some(line => !line.startsWith('\\') && line.endsWith('\r')))); } /** * Returns true if the patch uses Windows line endings and only Windows line endings. */ function isWin(patch) { if (!Array.isArray(patch)) { patch = [patch]; } return patch.some(index => index.hunks.some(hunk => hunk.lines.some(line => line.endsWith('\r')))) && patch.every(index => index.hunks.every(hunk => hunk.lines.every((line, i) => { var _a; return line.startsWith('\\') || line.endsWith('\r') || ((_a = hunk.lines[i + 1]) === null || _a === void 0 ? void 0 : _a.startsWith('\\')); }))); } /** * Parses a patch into structured data, in the same structure returned by `structuredPatch`. * * @return a JSON object representation of the a patch, suitable for use with the `applyPatch` method. */ function parsePatch(uniDiff) { const diffstr = uniDiff.split(/\n/), list = []; let i = 0; function parseIndex() { const index = {}; list.push(index); // Parse diff metadata while (i < diffstr.length) { const line = diffstr[i]; // File header found, end parsing diff metadata if ((/^(---|\+\+\+|@@)\s/).test(line)) { break; } // Diff index const header = (/^(?:Index:|diff(?: -r \w+)+)\s+(.+?)\s*$/).exec(line); if (header) { index.index = header[1]; } i++; } // Parse file headers if they are defined. Unified diff requires them, but // there's no technical issues to have an isolated hunk without file header parseFileHeader(index); parseFileHeader(index); // Parse hunks index.hunks = []; while (i < diffstr.length) { const line = diffstr[i]; if ((/^(Index:\s|diff\s|---\s|\+\+\+\s|===================================================================)/).test(line)) { break; } else if ((/^@@/).test(line)) { index.hunks.push(parseHunk()); } else if (line) { throw new Error('Unknown line ' + (i + 1) + ' ' + JSON.stringify(line)); } else { i++; } } } // Parses the --- and +++ headers, if none are found, no lines // are consumed. function parseFileHeader(index) { const fileHeader = (/^(---|\+\+\+)\s+(.*)\r?$/).exec(diffstr[i]); if (fileHeader) { const data = fileHeader[2].split('\t', 2), header = (data[1] || '').trim(); let fileName = data[0].replace(/\\\\/g, '\\'); if ((/^".*"$/).test(fileName)) { fileName = fileName.substr(1, fileName.length - 2); } if (fileHeader[1] === '---') { index.oldFileName = fileName; index.oldHeader = header; } else { index.newFileName = fileName; index.newHeader = header; } i++; } } // Parses a hunk // This assumes that we are at the start of a hunk. function parseHunk() { var _a; const chunkHeaderIndex = i, chunkHeaderLine = diffstr[i++], chunkHeader = chunkHeaderLine.split(/@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@/); const hunk = { oldStart: +chunkHeader[1], oldLines: typeof chunkHeader[2] === 'undefined' ? 1 : +chunkHeader[2], newStart: +chunkHeader[3], newLines: typeof chunkHeader[4] === 'undefined' ? 1 : +chunkHeader[4], lines: [] }; // Unified Diff Format quirk: If the chunk size is 0, // the first number is one lower than one would expect. // https://www.artima.com/weblogs/viewpost.jsp?thread=164293 if (hunk.oldLines === 0) { hunk.oldStart += 1; } if (hunk.newLines === 0) { hunk.newStart += 1; } let addCount = 0, removeCount = 0; for (; i < diffstr.length && (removeCount < hunk.oldLines || addCount < hunk.newLines || ((_a = diffstr[i]) === null || _a === void 0 ? void 0 : _a.startsWith('\\'))); i++) { const operation = (diffstr[i].length == 0 && i != (diffstr.length - 1)) ? ' ' : diffstr[i][0]; if (operation === '+' || operation === '-' || operation === ' ' || operation === '\\') { hunk.lines.push(diffstr[i]); if (operation === '+') { addCount++; } else if (operation === '-') { removeCount++; } else if (operation === ' ') { addCount++; removeCount++; } } else { throw new Error(`Hunk at line ${chunkHeaderIndex + 1} contained invalid line ${diffstr[i]}`); } } // Handle the empty block count case if (!addCount && hunk.newLines === 1) { hunk.newLines = 0; } if (!removeCount && hunk.oldLines === 1) { hunk.oldLines = 0; } // Perform sanity checking if (addCount !== hunk.newLines) { throw new Error('Added line count did not match for hunk at line ' + (chunkHeaderIndex + 1)); } if (removeCount !== hunk.oldLines) { throw new Error('Removed line count did not match for hunk at line ' + (chunkHeaderIndex + 1)); } return hunk; } while (i < diffstr.length) { parseIndex(); } return list; } // Iterator that traverses in the range of [min, max], stepping // by distance fro