UNPKG

@sanity/diff-patch

Version:

Generates a set of Sanity patches needed to change an item (usually a document) from one shape to another

677 lines (589 loc) 23.1 kB
import {makePatches, stringifyPatches} from '@sanity/diff-match-patch' import {DiffError} from './diffError.js' import {isKeyedObject, type KeyedSanityObject, type Path, pathToString} from './paths.js' import {validateProperty} from './validate.js' import { type Patch, type UnsetPatch, type DiffMatchPatch, type SanityPatchMutation, type SanityPatchOperations, type SanitySetPatchOperation, type SanityUnsetPatchOperation, type SanityInsertPatchOperation, type SanityDiffMatchPatchOperation, } from './patches.js' import {difference, intersection} from './setOperations.js' /** * Document keys that are ignored during diff operations. * These are system-managed fields that should not be included in patches on * top-level documents and should not be diffed with diff-match-patch. */ const SYSTEM_KEYS = ['_id', '_type', '_createdAt', '_updatedAt', '_rev'] /** * Maximum size of strings to consider for diff-match-patch (1MB) * Based on testing showing consistently good performance up to this size */ const DMP_MAX_STRING_SIZE = 1_000_000 /** * Maximum difference in string length before falling back to set operations (40%) * Above this threshold, likely indicates text replacement which can be slow */ const DMP_MAX_STRING_LENGTH_CHANGE_RATIO = 0.4 /** * Minimum string size to apply change ratio check (10KB) * Small strings are always fast regardless of change ratio */ const DMP_MIN_SIZE_FOR_RATIO_CHECK = 10_000 /** * An object (record) that _may_ have a `_key` property * * @internal */ export type SanityObject = KeyedSanityObject | Partial<KeyedSanityObject> /** * Represents a partial Sanity document (eg a "stub"). * * @public */ export interface DocumentStub { _id?: string _type?: string _rev?: string _createdAt?: string _updatedAt?: string [key: string]: unknown } /** * Options for the patch generator * * @public */ export interface PatchOptions { /** * Document ID to apply the patch to. * * @defaultValue `undefined` - tries to extract `_id` from passed document */ id?: string /** * Base path to apply the patch to - useful if diffing sub-branches of a document. * * @defaultValue `[]` - eg root of the document */ basePath?: Path /** * Only apply the patch if the document revision matches this value. * If the property is the boolean value `true`, it will attempt to extract * the revision from the document `_rev` property. * * @defaultValue `undefined` (do not apply revision check) */ ifRevisionID?: string | true } /** * Generates an array of mutations for Sanity, based on the differences between * the two passed documents/trees. * * @param source - The first document/tree to compare * @param target - The second document/tree to compare * @param opts - Options for the diff generation * @returns Array of mutations * @public */ export function diffPatch( source: DocumentStub, target: DocumentStub, options: PatchOptions = {}, ): SanityPatchMutation[] { const id = options.id || (source._id === target._id && source._id) const revisionLocked = options.ifRevisionID const ifRevisionID = typeof revisionLocked === 'boolean' ? source._rev : revisionLocked const basePath = options.basePath || [] if (!id) { throw new Error( '_id on source and target not present or differs, specify document id the mutations should be applied to', ) } if (revisionLocked === true && !ifRevisionID) { throw new Error( '`ifRevisionID` is set to `true`, but no `_rev` was passed in item A. Either explicitly set `ifRevisionID` to a revision, or pass `_rev` as part of item A.', ) } if (basePath.length === 0 && source._type !== target._type) { throw new Error(`_type is immutable and cannot be changed (${source._type} => ${target._type})`) } const operations = diffItem(source, target, basePath, []) return serializePatches(operations).map((patchOperations, i) => ({ patch: { id, // only add `ifRevisionID` to the first patch ...(i === 0 && ifRevisionID && {ifRevisionID}), ...patchOperations, }, })) } /** * Generates an array of patch operation objects for Sanity, based on the * differences between the two passed values * * @param source - The source value to start off with * @param target - The target value that the patch operations will aim to create * @param basePath - An optional path that will be prefixed to all subsequent patch operations * @returns Array of mutations * @public */ export function diffValue( source: unknown, target: unknown, basePath: Path = [], ): SanityPatchOperations[] { return serializePatches(diffItem(source, target, basePath)) } function diffItem( source: unknown, target: unknown, path: Path = [], patches: Patch[] = [], ): Patch[] { if (source === target) { return patches } if (typeof source === 'string' && typeof target === 'string') { diffString(source, target, path, patches) return patches } if (Array.isArray(source) && Array.isArray(target)) { diffArray(source, target, path, patches) return patches } if (isRecord(source) && isRecord(target)) { diffObject(source, target, path, patches) return patches } if (target === undefined) { patches.push({op: 'unset', path}) return patches } patches.push({op: 'set', path, value: target}) return patches } function diffObject( source: Record<string, unknown>, target: Record<string, unknown>, path: Path, patches: Patch[], ) { const atRoot = path.length === 0 const aKeys = Object.keys(source) .filter(atRoot ? isNotIgnoredKey : yes) .map((key) => validateProperty(key, source[key], path)) const aKeysLength = aKeys.length const bKeys = Object.keys(target) .filter(atRoot ? isNotIgnoredKey : yes) .map((key) => validateProperty(key, target[key], path)) const bKeysLength = bKeys.length // Check for deleted items for (let i = 0; i < aKeysLength; i++) { const key = aKeys[i] if (!(key in target)) { patches.push({op: 'unset', path: path.concat(key)}) } } // Check for changed items for (let i = 0; i < bKeysLength; i++) { const key = bKeys[i] diffItem(source[key], target[key], path.concat([key]), patches) } return patches } function diffArray(source: unknown[], target: unknown[], path: Path, patches: Patch[]) { if (isUniquelyKeyed(source) && isUniquelyKeyed(target)) { return diffArrayByKey(source, target, path, patches) } return diffArrayByIndex(source, target, path, patches) } function diffArrayByIndex(source: unknown[], target: unknown[], path: Path, patches: Patch[]) { // Check for new items if (target.length > source.length) { patches.push({ op: 'insert', position: 'after', path: path.concat([-1]), items: target.slice(source.length).map(nullifyUndefined), }) } // Check for deleted items if (target.length < source.length) { const isSingle = source.length - target.length === 1 const unsetItems = source.slice(target.length) // If we have unique array keys, we'll want to unset by key, as this is // safer in a realtime, collaborative setting if (isUniquelyKeyed(unsetItems)) { patches.push( ...unsetItems.map( (item): UnsetPatch => ({op: 'unset', path: path.concat({_key: item._key})}), ), ) } else { patches.push({ op: 'unset', path: path.concat([isSingle ? target.length : [target.length, '']]), }) } } // Check for illegal array contents for (let i = 0; i < target.length; i++) { if (Array.isArray(target[i])) { throw new DiffError('Multi-dimensional arrays not supported', path.concat(i), target[i]) } } const overlapping = Math.min(source.length, target.length) const segmentA = source.slice(0, overlapping) const segmentB = target.slice(0, overlapping) for (let i = 0; i < segmentA.length; i++) { diffItem(segmentA[i], nullifyUndefined(segmentB[i]), path.concat(i), patches) } return patches } /** * Diffs two arrays of keyed objects by their `_key` properties. * * This approach is preferred over index-based diffing for collaborative editing scenarios * because it generates patches that are more resilient to concurrent modifications. * When multiple users edit the same array simultaneously, key-based patches have better * conflict resolution characteristics than index-based patches. * * The function handles three main operations: * 1. **Reordering**: When existing items change positions within the array * 2. **Content changes**: When the content of existing items is modified * 3. **Insertions/Deletions**: When items are added or removed from the array * * @param source - The original array with keyed objects * @param target - The target array with keyed objects * @param path - The path to this array within the document * @param patches - Array to accumulate generated patches * @returns The patches array with new patches appended */ function diffArrayByKey( source: KeyedSanityObject[], target: KeyedSanityObject[], path: Path, patches: Patch[], ) { // Create lookup maps for efficient key-based access to array items const sourceItemsByKey = new Map(source.map((item) => [item._key, item])) const targetItemsByKey = new Map(target.map((item) => [item._key, item])) // Categorize keys by their presence in source vs target arrays const sourceKeys = new Set(sourceItemsByKey.keys()) const targetKeys = new Set(targetItemsByKey.keys()) const keysRemovedFromSource = difference(sourceKeys, targetKeys) const keysAddedToTarget = difference(targetKeys, sourceKeys) const keysInBothArrays = intersection(sourceKeys, targetKeys) // Handle reordering of existing items within the array. // We detect reordering by comparing the relative positions of keys that exist in both arrays, // excluding keys that were added or removed (since they don't participate in reordering). const sourceKeysStillPresent = Array.from(difference(sourceKeys, keysRemovedFromSource)) const targetKeysAlreadyPresent = Array.from(difference(targetKeys, keysAddedToTarget)) // Track which keys need to be reordered by comparing their relative positions const keyReorderOperations: {sourceKey: string; targetKey: string}[] = [] for (let i = 0; i < keysInBothArrays.size; i++) { const keyAtPositionInSource = sourceKeysStillPresent[i] const keyAtPositionInTarget = targetKeysAlreadyPresent[i] // If different keys occupy the same relative position, a reorder is needed if (keyAtPositionInSource !== keyAtPositionInTarget) { keyReorderOperations.push({ sourceKey: keyAtPositionInSource, targetKey: keyAtPositionInTarget, }) } } // Generate reorder patch if any items changed positions if (keyReorderOperations.length) { patches.push({ op: 'reorder', path, snapshot: source, reorders: keyReorderOperations, }) } // Process content changes for items that exist in both arrays for (const key of keysInBothArrays) { diffItem(sourceItemsByKey.get(key), targetItemsByKey.get(key), [...path, {_key: key}], patches) } // Remove items that no longer exist in the target array for (const keyToRemove of keysRemovedFromSource) { patches.push({op: 'unset', path: [...path, {_key: keyToRemove}]}) } // Insert new items that were added to the target array // We batch consecutive insertions for efficiency and insert them at the correct positions if (keysAddedToTarget.size) { let insertionAnchorKey: string // The key after which we'll insert pending items let itemsPendingInsertion: unknown[] = [] const flushPendingInsertions = () => { if (itemsPendingInsertion.length) { patches.push({ op: 'insert', // Insert after the anchor key if we have one, otherwise insert at the beginning ...(insertionAnchorKey ? {position: 'after', path: [...path, {_key: insertionAnchorKey}]} : {position: 'before', path: [...path, 0]}), items: itemsPendingInsertion, }) } } // Walk through the target array to determine where new items should be inserted for (const key of targetKeys) { if (keysAddedToTarget.has(key)) { // This is a new item - add it to the pending insertion batch itemsPendingInsertion.push(targetItemsByKey.get(key)!) } else if (keysInBothArrays.has(key)) { // This is an existing item - flush any pending insertions before it flushPendingInsertions() insertionAnchorKey = key itemsPendingInsertion = [] } } // Flush any remaining insertions at the end flushPendingInsertions() } return patches } /** * Determines whether to use diff-match-patch or fallback to a `set` operation * when creating a patch to transform a `source` string to `target` string. * * `diffMatchPatch` patches are typically preferred to `set` operations because * they handle conflicts better (when multiple editors work simultaneously) by * preserving the user's intended and allowing for 3-way merges. * * **Heuristic rationale:** * * Perf analysis revealed that string length has minimal impact on small, * keystroke-level changes, but large text replacements (high change ratio) can * trigger worst-case algorithm behavior. The 40% change ratio threshold is a * simple heuristic that catches problematic replacement scenarios while * allowing the algorithm to excel at insertions and deletions. * * **Performance characteristics (tested on M2 MacBook Pro):** * * *Keystroke-level editing (most common use case):* * - Small strings (1KB-10KB): 0ms for 1-5 keystrokes, consistently sub-millisecond * - Medium strings (50KB-200KB): 0ms for 1-5 keystrokes, consistently sub-millisecond * - 10 simultaneous keystrokes: ~12ms on 100KB strings * * *Copy-paste operations (less frequent):* * - Small copy-paste operations (<50KB): 0-10ms regardless of string length * - Large insertions/deletions (50KB+): 0-50ms (excellent performance) * - Large text replacements (50KB+): 70ms-2s+ (can be slow due to algorithm complexity) * * **Algorithm details:** * Uses Myers' diff algorithm with O(ND) time complexity where N=text length and D=edit distance. * Includes optimizations: common prefix/suffix removal, line-mode processing, and timeout protection. * * * **Test methodology:** * - Generated realistic word-based text patterns * - Simulated actual editing behaviors (keystrokes vs copy-paste) * - Measured performance across string sizes from 1KB to 10MB * - Validated against edge cases including repetitive text and scattered changes * * @param source - The previous version of the text * @param target - The new version of the text * @returns true if diff-match-patch should be used, false if fallback to set operation * * @example * ```typescript * // Keystroke editing - always fast * shouldUseDiffMatchPatch(largeDoc, largeDocWithTypo) // true, ~0ms * * // Small paste - always fast * shouldUseDiffMatchPatch(doc, docWithSmallInsertion) // true, ~0ms * * // Large replacement - potentially slow * shouldUseDiffMatchPatch(article, completelyDifferentArticle) // false, use set * ``` * * Compatible with @sanity/diff-match-patch@3.2.0 */ export function shouldUseDiffMatchPatch(source: string, target: string): boolean { const maxLength = Math.max(source.length, target.length) // Always reject strings larger than our tested size limit if (maxLength > DMP_MAX_STRING_SIZE) { return false } // For small strings, always use diff-match-patch regardless of change ratio // Performance testing showed these are always fast (<10ms) if (maxLength < DMP_MIN_SIZE_FOR_RATIO_CHECK) { return true } // Calculate the change ratio to detect large text replacements // High ratios indicate replacement scenarios which can trigger slow algorithm paths const lengthDifference = Math.abs(target.length - source.length) const changeRatio = lengthDifference / maxLength // If change ratio is high, likely a replacement operation that could be slow // Fall back to set operation for better user experience if (changeRatio > DMP_MAX_STRING_LENGTH_CHANGE_RATIO) { return false } // All other cases: use diff-match-patch // This covers keystroke editing and insertion/deletion scenarios which perform excellently return true } function getDiffMatchPatch(source: string, target: string, path: Path): DiffMatchPatch | undefined { const last = path.at(-1) // don't use diff-match-patch for system keys if (typeof last === 'string' && last.startsWith('_')) return undefined if (!shouldUseDiffMatchPatch(source, target)) return undefined try { // Using `makePatches(string, string)` directly instead of the multi-step approach e.g. // `stringifyPatches(makePatches(cleanupEfficiency(makeDiff(source, target))))`. // this is because `makePatches` internally handles diff generation and // automatically applies both `cleanupSemantic()` and `cleanupEfficiency()` // when beneficial, resulting in cleaner code with near identical performance and // better error handling. // [source](https://github.com/sanity-io/diff-match-patch/blob/v3.2.0/src/patch/make.ts#L67-L76) // // Performance validation (M2 MacBook Pro): // Both approaches measured at identical performance: // - 10KB strings: 0-1ms total processing time // - 100KB strings: 0-1ms total processing time // - Individual step breakdown: makeDiff(0ms) + cleanup(0ms) + makePatches(0ms) + stringify(~1ms) const strPatch = stringifyPatches(makePatches(source, target)) return {op: 'diffMatchPatch', path, value: strPatch} } catch (err) { // Fall back to using regular set patch return undefined } } function diffString(source: string, target: string, path: Path, patches: Patch[]) { const dmp = getDiffMatchPatch(source, target, path) patches.push(dmp ?? {op: 'set', path, value: target}) return patches } function isNotIgnoredKey(key: string) { return SYSTEM_KEYS.indexOf(key) === -1 } // mutually exclusive operations type SanityPatchOperation = | SanitySetPatchOperation | SanityUnsetPatchOperation | SanityInsertPatchOperation | SanityDiffMatchPatchOperation function serializePatches(patches: Patch[], curr?: SanityPatchOperation): SanityPatchOperations[] { const [patch, ...rest] = patches if (!patch) return curr ? [curr] : [] switch (patch.op) { case 'set': case 'diffMatchPatch': { // TODO: reconfigure eslint to use @typescript-eslint/no-unused-vars // eslint-disable-next-line no-unused-vars type CurrentOp = Extract<SanityPatchOperation, {[K in typeof patch.op]: {}}> const emptyOp = {[patch.op]: {}} as CurrentOp if (!curr) return serializePatches(patches, emptyOp) if (!(patch.op in curr)) return [curr, ...serializePatches(patches, emptyOp)] Object.assign((curr as CurrentOp)[patch.op], {[pathToString(patch.path)]: patch.value}) return serializePatches(rest, curr) } case 'unset': { const emptyOp = {unset: []} if (!curr) return serializePatches(patches, emptyOp) if (!('unset' in curr)) return [curr, ...serializePatches(patches, emptyOp)] curr.unset.push(pathToString(patch.path)) return serializePatches(rest, curr) } case 'insert': { if (curr) return [curr, ...serializePatches(patches)] return [ { insert: { [patch.position]: pathToString(patch.path), items: patch.items, }, } as SanityInsertPatchOperation, ...serializePatches(rest), ] } case 'reorder': { if (curr) return [curr, ...serializePatches(patches)] // REORDER STRATEGY: Two-phase approach to avoid key collisions // // Problem: Direct key swaps can cause collisions. For example, swapping A↔B: // - Set A's content to B: ✓ // - Set B's content to A: ✗ (A's content was already overwritten) // // Solution: Use temporary keys as an intermediate step // Phase 1: Move all items to temporary keys with their final content // Phase 2: Update just the _key property to restore the final keys // Phase 1: Move items to collision-safe temporary keys const tempKeyOperations: SanityPatchOperations = {} tempKeyOperations.set = {} for (const {sourceKey, targetKey} of patch.reorders) { const temporaryKey = `__temp_reorder_${sourceKey}__` const finalContentForThisPosition = patch.snapshot[getIndexForKey(patch.snapshot, targetKey)] Object.assign(tempKeyOperations.set, { [pathToString([...patch.path, {_key: sourceKey}])]: { ...finalContentForThisPosition, _key: temporaryKey, }, }) } // Phase 2: Update _key properties to restore the intended final keys const finalKeyOperations: SanityPatchOperations = {} finalKeyOperations.set = {} for (const {sourceKey, targetKey} of patch.reorders) { const temporaryKey = `__temp_reorder_${sourceKey}__` Object.assign(finalKeyOperations.set, { [pathToString([...patch.path, {_key: temporaryKey}, '_key'])]: targetKey, }) } return [tempKeyOperations, finalKeyOperations, ...serializePatches(rest)] } default: { return [] } } } function isUniquelyKeyed(arr: unknown[]): arr is KeyedSanityObject[] { const seenKeys = new Set<string>() for (const item of arr) { // Each item must be a keyed object with a _key property if (!isKeyedObject(item)) return false // Each _key must be unique within the array if (seenKeys.has(item._key)) return false seenKeys.add(item._key) } return true } // Cache to avoid recomputing key-to-index mappings for the same array const keyToIndexCache = new WeakMap<KeyedSanityObject[], Record<string, number>>() function getIndexForKey(keyedArray: KeyedSanityObject[], targetKey: string) { const cachedMapping = keyToIndexCache.get(keyedArray) if (cachedMapping) return cachedMapping[targetKey] // Build a mapping from _key to array index const keyToIndexMapping = keyedArray.reduce<Record<string, number>>( (mapping, {_key}, arrayIndex) => { mapping[_key] = arrayIndex return mapping }, {}, ) keyToIndexCache.set(keyedArray, keyToIndexMapping) return keyToIndexMapping[targetKey] } function isRecord(value: unknown): value is Record<string, unknown> { return typeof value === 'object' && !!value && !Array.isArray(value) } /** * Simplify returns `null` if the value given was `undefined`. This behavior * is the same as how `JSON.stringify` works so this is relatively expected * behavior. */ function nullifyUndefined(item: unknown) { if (item === undefined) return null return item } function yes(_: unknown) { return true }