UNPKG

@sanity/diff-match-patch

Version:

Robust diff, match and patch algorithms to perform operations required for synchronizing plain text

262 lines (230 loc) 8.23 kB
import {cleanupEfficiency, cleanupSemantic} from '../diff/cleanup.js' import {type Diff, diff, DIFF_DELETE, DIFF_EQUAL, DIFF_INSERT} from '../diff/diff.js' import {diffText1} from '../diff/diffText.js' import {isLowSurrogate} from '../utils/surrogatePairs.js' import {countUtf8Bytes} from '../utils/utf8Indices.js' import {MAX_BITS} from './constants.js' import {createPatchObject, type Patch} from './createPatchObject.js' /** * Options for patch generation. * * @public */ export interface MakePatchOptions { // Chunk size for context length. margin: number } const DEFAULT_OPTS: MakePatchOptions = { margin: 4, } function getDefaultOpts(opts: Partial<MakePatchOptions> = {}): MakePatchOptions { return { ...DEFAULT_OPTS, ...opts, } } /** * Compute a list of patches to turn based on passed diffs. * * @param diffs - Array of diff tuples. * @param options - Options for the patch generation. * @returns Array of Patch objects. * @public */ export function make(diffs: Diff[], options?: Partial<MakePatchOptions>): Patch[] /** * Compute a list of patches to turn textA into textB. * * @param textA - Original text. * @param textB - New text. * @param options - Options for the patch generation. * @returns Array of Patch objects. * @public */ export function make(textA: string, textB: string, options?: Partial<MakePatchOptions>): Patch[] /** * Compute a list of patches to turn textA into textB using provided diff tuples. * * @param textA - Original text. * @param diffs - Diff tuples to use as base. * @param options - Options for the patch generation. * @returns Array of Patch objects. * @public */ export function make(textA: string, diffs: Diff[], options?: Partial<MakePatchOptions>): Patch[] export function make( a: Diff[] | string, b?: Partial<MakePatchOptions> | string | Diff[], options?: Partial<MakePatchOptions>, ): Patch[] { if (typeof a === 'string' && typeof b === 'string') { // Method 1: textA, textB // Compute diffs from textA and textB. let diffs = diff(a, b, {checkLines: true}) if (diffs.length > 2) { diffs = cleanupSemantic(diffs) diffs = cleanupEfficiency(diffs) } return _make(a, diffs, getDefaultOpts(options)) } if (a && Array.isArray(a) && typeof b === 'undefined') { // Method 2: diffs // Compute textA from diffs. return _make(diffText1(a), a, getDefaultOpts(options)) } if (typeof a === 'string' && b && Array.isArray(b)) { // Method 3: textA, diffs return _make(a, b, getDefaultOpts(options)) } throw new Error('Unknown call format to make()') } function _make(textA: string, diffs: Diff[], options: MakePatchOptions): Patch[] { if (diffs.length === 0) { return [] // Get rid of the null case. } const patches: Patch[] = [] let patch = createPatchObject(0, 0) let patchDiffLength = 0 // Keeping our own length var is faster in JS. let charCount1 = 0 // Number of characters into the textA string. let charCount2 = 0 // Number of characters into the textB string. let utf8Count1 = 0 // Number of utf-8 bytes into the textA string. let utf8Count2 = 0 // Number of utf-8 bytes into the textB string. // Start with textA (prepatchText) and apply the diffs until we arrive at // textB (postpatchText). We recreate the patches one by one to determine // context info. let prepatchText = textA let postpatchText = textA for (let x = 0; x < diffs.length; x++) { const currentDiff = diffs[x] const [diffType, diffText] = currentDiff const diffTextLength = diffText.length const diffByteLength = countUtf8Bytes(diffText) if (!patchDiffLength && diffType !== DIFF_EQUAL) { // A new patch starts here. patch.start1 = charCount1 patch.start2 = charCount2 patch.utf8Start1 = utf8Count1 patch.utf8Start2 = utf8Count2 } switch (diffType) { case DIFF_INSERT: patch.diffs[patchDiffLength++] = currentDiff patch.length2 += diffTextLength patch.utf8Length2 += diffByteLength postpatchText = postpatchText.substring(0, charCount2) + diffText + postpatchText.substring(charCount2) break case DIFF_DELETE: patch.length1 += diffTextLength patch.utf8Length1 += diffByteLength patch.diffs[patchDiffLength++] = currentDiff postpatchText = postpatchText.substring(0, charCount2) + postpatchText.substring(charCount2 + diffTextLength) break case DIFF_EQUAL: if (diffTextLength <= 2 * options.margin && patchDiffLength && diffs.length !== x + 1) { // Small equality inside a patch. patch.diffs[patchDiffLength++] = currentDiff patch.length1 += diffTextLength patch.length2 += diffTextLength patch.utf8Length1 += diffByteLength patch.utf8Length2 += diffByteLength } else if (diffTextLength >= 2 * options.margin) { // Time for a new patch. if (patchDiffLength) { addContext(patch, prepatchText, options) patches.push(patch) patch = createPatchObject(-1, -1) patchDiffLength = 0 // Unlike Unidiff, our patch lists have a rolling context. // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff // Update prepatch text & pos to reflect the application of the // just completed patch. prepatchText = postpatchText charCount1 = charCount2 utf8Count1 = utf8Count2 } } break default: throw new Error('Unknown diff type') } // Update the current character count. if (diffType !== DIFF_INSERT) { charCount1 += diffTextLength utf8Count1 += diffByteLength } if (diffType !== DIFF_DELETE) { charCount2 += diffTextLength utf8Count2 += diffByteLength } } // Pick up the leftover patch if not empty. if (patchDiffLength) { addContext(patch, prepatchText, options) patches.push(patch) } return patches } /** * Increase the context until it is unique, * but don't let the pattern expand beyond MAX_BITS. * * @param patch - The patch to grow. * @param text - Source text. * @param opts * @internal */ export function addContext(patch: Patch, text: string, opts: MakePatchOptions): void { if (text.length === 0) { return } let pattern = text.substring(patch.start2, patch.start2 + patch.length1) let padding = 0 // Look for the first and last matches of pattern in text. If two different // matches are found, increase the pattern length. while ( text.indexOf(pattern) !== text.lastIndexOf(pattern) && pattern.length < MAX_BITS - opts.margin - opts.margin ) { padding += opts.margin pattern = text.substring(patch.start2 - padding, patch.start2 + patch.length1 + padding) } // Add one chunk for good luck. padding += opts.margin // Add the prefix. // Avoid splitting inside a surrogate. let prefixStart = patch.start2 - padding if (prefixStart >= 1 && isLowSurrogate(text[prefixStart])) { prefixStart-- } const prefix = text.substring(prefixStart, patch.start2) if (prefix) { patch.diffs.unshift([DIFF_EQUAL, prefix]) } const prefixLength = prefix.length const prefixUtf8Length = countUtf8Bytes(prefix) // Add the suffix. // Avoid splitting inside a surrogate. let suffixEnd = patch.start2 + patch.length1 + padding if (suffixEnd < text.length && isLowSurrogate(text[suffixEnd])) { suffixEnd++ } const suffix = text.substring(patch.start2 + patch.length1, suffixEnd) if (suffix) { patch.diffs.push([DIFF_EQUAL, suffix]) } const suffixLength = suffix.length const suffixUtf8Length = countUtf8Bytes(suffix) // Roll back the start points. patch.start1 -= prefixLength patch.start2 -= prefixLength patch.utf8Start1 -= prefixUtf8Length patch.utf8Start2 -= prefixUtf8Length // Extend the lengths. patch.length1 += prefixLength + suffixLength patch.length2 += prefixLength + suffixLength patch.utf8Length1 += prefixUtf8Length + suffixUtf8Length patch.utf8Length2 += prefixUtf8Length + suffixUtf8Length }