UNPKG

@sanity/diff-match-patch

Version:

Robust diff, match and patch algorithms to perform operations required for synchronizing plain text

296 lines (260 loc) 8.36 kB
import {isHighSurrogate, isLowSurrogate} from '../utils/surrogatePairs.js' import {cleanupMerge} from './cleanup.js' import {getCommonPrefix} from './commonPrefix.js' import {getCommonSuffix} from './commonSuffix.js' import {computeDiff} from './compute.js' /** * Diff type for deleted text. * * @public */ export const DIFF_DELETE = -1 /** * Diff type for inserted text. * * @public */ export const DIFF_INSERT = 1 /** * Diff type for text that is equal. * * @public */ export const DIFF_EQUAL = 0 /** * The three different types of changes possible in a diff: * - `DIFF_DELETE`: a deletion of text * - `DIFF_INSERT`: an insertion of text * - `DIFF_EQUAL` : an equal text * * @public */ export type DiffType = typeof DIFF_DELETE | typeof DIFF_INSERT | typeof DIFF_EQUAL /** * The data structure representing a diff is an array of tuples: * [[DIFF_DELETE, 'Hello'], [DIFF_INSERT, 'Goodbye'], [DIFF_EQUAL, ' world.']] * which means: delete 'Hello', add 'Goodbye' and keep ' world.' * * @public */ export type Diff = [DiffType, string] /** * Options for generating a diff. * * @public */ export interface DiffOptions { checkLines: boolean timeout: number } /** * @internal */ export interface InternalDiffOptions { checkLines: boolean /** * Time when the diff should be complete by. */ deadline: number } /** * Find the differences between two texts. Simplifies the problem by stripping * any common prefix or suffix off the texts before diffing. * * @param textA - Old string to be diffed. * @param textA - New string to be diffed. * @returns Array of diff tuples. * @public */ export function diff( textA: null | string, textB: null | string, opts?: Partial<DiffOptions>, ): Diff[] { // Check for null inputs. if (textA === null || textB === null) { throw new Error('Null input. (diff)') } const diffs = doDiff(textA, textB, createInternalOpts(opts || {})) adjustDiffForSurrogatePairs(diffs) return diffs } /** * Find the differences between two texts. Simplifies the problem by stripping * any common prefix or suffix off the texts before diffing. * * @param textA - Old string to be diffed. * @param textB - New string to be diffed. * @returns Array of diff tuples. * @internal */ export function doDiff(textA: string, textB: string, options: InternalDiffOptions): Diff[] { // Don't reassign fn params let text1 = textA let text2 = textB // Check for equality (speedup). if (text1 === text2) { return text1 ? [[DIFF_EQUAL, text1]] : [] } // Trim off common prefix (speedup). let commonlength = getCommonPrefix(text1, text2) const commonprefix = text1.substring(0, commonlength) text1 = text1.substring(commonlength) text2 = text2.substring(commonlength) // Trim off common suffix (speedup). commonlength = getCommonSuffix(text1, text2) const commonsuffix = text1.substring(text1.length - commonlength) text1 = text1.substring(0, text1.length - commonlength) text2 = text2.substring(0, text2.length - commonlength) // Compute the diff on the middle block. let diffs = computeDiff(text1, text2, options) // Restore the prefix and suffix. if (commonprefix) { diffs.unshift([DIFF_EQUAL, commonprefix]) } if (commonsuffix) { diffs.push([DIFF_EQUAL, commonsuffix]) } diffs = cleanupMerge(diffs) return diffs } function createDeadLine(timeout: undefined | number): number { let t = 1 if (typeof timeout !== 'undefined') { t = timeout <= 0 ? Number.MAX_VALUE : timeout } return Date.now() + t * 1000 } function createInternalOpts(opts: Partial<DiffOptions>): InternalDiffOptions { return { checkLines: true, deadline: createDeadLine(opts.timeout || 1.0), ...opts, } } function combineChar(data: string, char: string, dir: 1 | -1) { return dir === 1 ? data + char : char + data } /** * Splits out a character in a given direction. */ function splitChar(data: string, dir: 1 | -1): [string, string] { return dir === 1 ? [data.substring(0, data.length - 1), data[data.length - 1]] : [data.substring(1), data[0]] } /** * Checks if two entries of the diff has the same character in the same "direction". */ function hasSharedChar(diffs: Diff[], i: number, j: number, dir: 1 | -1): boolean { return dir === 1 ? diffs[i][1][diffs[i][1].length - 1] === diffs[j][1][diffs[j][1].length - 1] : diffs[i][1][0] === diffs[j][1][0] } /** * Takes in a position of an EQUAL diff-type and attempts to "deisolate" the character for a given direction. * By this we mean that we attempt to either "shift" it to the later diffs, or bring another character next into this one. * * It's easier to understand with an example: * [INSERT a, DELETE b, EQUAL cde, INSERT f, DELETE g] * shifting this forward will produce * [INSERT a, DELETE b, EQUAL cd, INSERT ef, DELETE eg] * * This behavior is useful when `e` is actually a high surrogate character. * * Shifting it backwards produces * [INSERT ac, DELETE bc, EQUAL cde, INSERT f, DELETE g] * which is useful when `c` is a low surrogate character. * * Note that these diffs are 100% semantically equal. * * If there's not a matching INSERT/DELETE then it's forced to insert an additional entry: * [EQUAL abc, INSERT d, EQUAL e] * shifted forward becomes: * [EQUAL ab, INSERT cd, DELETE c, EQUAL e] * * If the INSERT and DELETE ends with the same character it will instead deisolate it by * bring that charcter into _this_ equal: * [EQUAL abc, INSERT de, DELETE df] * shifted forward actually becomes * [EQUAL abcd, INSERT e, DELETE f] * * The original diff here is typically never produced by the diff algorithm directly, * but they occur when we isolate characters in other places. */ function deisolateChar(diffs: Diff[], i: number, dir: 1 | -1) { const inv = dir === 1 ? -1 : 1 let insertIdx: null | number = null let deleteIdx: null | number = null let j = i + dir for (; j >= 0 && j < diffs.length && (insertIdx === null || deleteIdx === null); j += dir) { const [op, text] = diffs[j] if (text.length === 0) { continue } if (op === DIFF_INSERT) { if (insertIdx === null) { insertIdx = j } continue } else if (op === DIFF_DELETE) { if (deleteIdx === null) { deleteIdx = j } continue } else if (op === DIFF_EQUAL) { if (insertIdx === null && deleteIdx === null) { // This means that there was two consecutive EQUAL. Kinda weird, but easy to handle. const [rest, char] = splitChar(diffs[i][1], dir) diffs[i][1] = rest diffs[j][1] = combineChar(diffs[j][1], char, inv) return } break } } if (insertIdx !== null && deleteIdx !== null && hasSharedChar(diffs, insertIdx, deleteIdx, dir)) { // Special case. const [insertText, insertChar] = splitChar(diffs[insertIdx][1], inv) const [deleteText] = splitChar(diffs[deleteIdx][1], inv) diffs[insertIdx][1] = insertText diffs[deleteIdx][1] = deleteText diffs[i][1] = combineChar(diffs[i][1], insertChar, dir) return } const [text, char] = splitChar(diffs[i][1], dir) diffs[i][1] = text if (insertIdx === null) { diffs.splice(j, 0, [DIFF_INSERT, char]) // We need to adjust deleteIdx here since it's been shifted if (deleteIdx !== null && deleteIdx >= j) deleteIdx++ } else { diffs[insertIdx][1] = combineChar(diffs[insertIdx][1], char, inv) } if (deleteIdx === null) { diffs.splice(j, 0, [DIFF_DELETE, char]) } else { diffs[deleteIdx][1] = combineChar(diffs[deleteIdx][1], char, inv) } } function adjustDiffForSurrogatePairs(diffs: Diff[]) { // Go over each pair of diffs and see if there was a split at a surrogate pair for (let i = 0; i < diffs.length; i++) { const [diffType, diffText] = diffs[i] if (diffText.length === 0) continue const firstChar = diffText[0] const lastChar = diffText[diffText.length - 1] if (isHighSurrogate(lastChar) && diffType === DIFF_EQUAL) { deisolateChar(diffs, i, 1) } if (isLowSurrogate(firstChar) && diffType === DIFF_EQUAL) { deisolateChar(diffs, i, -1) } } for (let i = 0; i < diffs.length; i++) { // Remove any empty diffs if (diffs[i][1].length === 0) { diffs.splice(i, 1) } } }