UNPKG

@asmartbear/diff-merge

Version:

Text and arbitrary-array diff and merge, fast, multiple algorithms

510 lines (509 loc) 24.6 kB
/** * Returns the number of characters that both strings have in common, or 0 if none or at least one is null or undefined. * * Contains a few optimizations (that benchmarks indicate are, in fact, useful), and is Unicode-safe. */ export declare function getCommonPrefixLength(a: string | null | undefined, b?: string | null | undefined): number; /** * Holds an array with offset and length, so we can create a pseudo-array "view" into an array without * making large-array copies or allocations. The "end" is inclusive! */ export declare class ArrayView<T> { readonly a: T[]; readonly start: number; readonly end: number; constructor(a: T[], start?: number, end?: number); static createEmpty<T>(): ArrayView<T>; /** * The number of elements in this view. */ get length(): number; /** * True if this view is empty, i.e. represents the empty array, and has length 0. */ get empty(): boolean; /** * Like Array.indexOf(), but includes start (relative to the actual start of the underlying array) and end (inclusive * and relative to the actual underlying array). */ indexOf(target: T, start?: number, end?: number): number; /** * Same behavior as String#indexOf: Find another ArrayView as a consecutive substring on this array, returning the * (absolute) index of the first such position, or -1 if not found. */ indexOfSubstring(target: ArrayView<T>, start?: number, end?: number): number; /** * Creates a copy of this subsection of the array, which is therefore modifable, and allocates memory. */ getCopy(): T[]; /** * Like Array.forEach(), but on this view of the array, and more restrictions on the callback function. */ forEach(f: (el: T) => void): void; /** * Like Array.map(), but on this view of the array, and more restrictions on the callback function. */ map(f: (el: T) => T): T[]; /** * Like Array.map(), but on this view of the array, and the mapping function converts to a string. */ mapToString(f: (el: T) => string): string[]; /** * Pushes all of the elements from this view onto the end of the given array. */ pushAll(arry: T[]): void; /** * Retrieves an element from the array, relative to the start of this view. */ getElement(relative_index: number): T; /** * Return an ArrayView that is a subsequence, starting at an offset relative to `this,` and with a given length. * If the result would be identical to `this`, then `this` itself is returned without allocating a new view. * If length is missing, the subsequence goes to the end. */ getSubsequence(relative_start: number, relative_length?: number): ArrayView<T>; /** * Gets an array in reverse order, just the elements inside this view, as a new view. * This allocates memory. */ getReverse(): ArrayView<T>; toString(): string; /** * True if this subsequence is equal to the other subsequence, false otherwise. */ equals(that: ArrayView<T>): boolean; /** * Returns -1, 0, or 1, indicating whether `this` is less, equal, or greater than `that`, comparing like strings, e.g. one element at a time, * returning the answer if they're unequal, and if the shorter is equal to the prefix of the latter, the shorter is deemed smaller. */ compare(that: ArrayView<T>): -1 | 0 | 1; /** * Concatenates a sequence onto this one, NOT changing the current one, but rather returning a new result. * This is akin to Array#concat() with just one parameter. * Various optimizations prevent allocating memory whenever possible. */ concat(that: ArrayView<T>): ArrayView<T>; /** * Returns the number of elements that are common to the first elements in both arrays. * Each array can be any size, including empty, but cannot be null or undefined. */ getLengthOfCommonPrefix(that: ArrayView<T>): number; /** * Returns the number of elements that are common to the last elements in both arrays. * Each array can be any size, including empty, but cannot be null or undefined. */ getLengthOfCommonSuffix(that: ArrayView<T>): number; /** * Find the longest length L of a suffix of `this` which overlaps with a prefix of `that` also of length L. * Each array can be any size, including empty, but not null or undefined. Returns 0 if there is no commonality. */ getLengthOfOverlapAtMyEnd(that: ArrayView<T>): number; /** * Returns a pair of ArrayViews containing the longest common substring in `this` and `that` respectively. * Both views in the pair will be empty if there's no common substring. * * This particular algorithm is slow -- O(N*M) -- but always finds the right answer, is simple, it approaches * O(N) if the longest substring is long, and it uses only O(1) additional memory. * It uses a few techniques to speed up, for finding the first element of a substring, * and by anticipating that a found-substring might be extended in-place to speed up the best-so-far length. * In short, the N-loop is slow but the M-loop is fast, and further, it orders strings such that N <= M. */ static getLongestCommonSubstringGrowingLinearScan<T>(ths: ArrayView<T>, tht: ArrayView<T>): [ArrayView<T>, ArrayView<T>]; /** * Returns a pair of ArrayViews containing the longest common substring in `this` and `that` respectively. * Both views in the pair will be empty if there's no common substring. * * This algorithm uses an "optimistic" method, in which if there are long common substrings (relative to the * shortest string), it will be found quickly, in O(M) comparisons. It is still O(NM) in the worst case, * which is the lack of a common substring, or a common of 1 character. */ static getLongestCommonSubstringOptimisticBisect<T>(ths: ArrayView<T>, tht: ArrayView<T>): [ArrayView<T>, ArrayView<T>]; /** * Finds the longest common substring between two strings, returning the offset in the two input strings, * and the length. * * This algorithm uses an "optimistic" method, in which if there are long common substrings (relative to the * shortest string), it will be found quickly, in O(M) comparisons. It is still O(NM) in the worst case, * which is the lack of a common substring, or a common of 1 character. */ static getLongestCommonSubstringOptimisticBisectString(a: string, b: string): { a_offset: number; b_offset: number; len: number; }; /** * Returns the index coordinates of the middle of the longest common subsequence between the two arrays. * The longest subsequence is the longest set of ordered (but not necessarily consecutive) equal elements * from both arrays. The middle is often but not always along a common substring. Even when it is a common * substring, it is often not the globally-longest common substring. * * The index is relative to the start of each ArrayView, not relative to the absolute position inside the * underlying array. * * If there is no common subsequence whatsoever, `null` is returned. */ getLongestCommonSubsequenceMiddleMyers(that: ArrayView<T>): [number, number] | null; /** * Breaks a string into individual characters, and returns an array view of those characters. */ static fromCharacters(str: string): ArrayView<string>; /** * Breaks a string into tokens based on any regular expression, and returns an array view of those tokens. */ static fromTokens(str: string, re: RegExp): ArrayView<string>; /** * Tokenizes a string assuming the use-case is plain-text prose. */ static fromPlainProse(str: string): ArrayView<string>; /** * Tokenizes a string by lines. Includes the line-ending character as well. */ static fromLines(str: string): ArrayView<string>; } declare type SymbolInformation = { id: string | number; count: number; first_offset: number; }; /** * Generates a histogram from a set of tokens, counting the number of times each appears, and the location * that the first instance of that token appears. */ export declare class Histogram<T> { /** * Map of each unique symbol's ID to its information record. */ readonly histogram: Record<string | number, SymbolInformation>; /** * Map of the relative-position in the original input, to the symbol ID, and a field that can be used in algorithms to */ readonly ordered: { id: string | number; other_offset?: number; }[]; constructor(input: ArrayView<T>); /** * Given an element from the original input, returns its histogram record, or `undefined` if it's not present in the original input */ lookupElement(el: T): SymbolInformation | undefined; /** * Converts any type of element into a unique ID that is used as a key in the histogram array. */ private getIdForElement; } export declare class Edit<T> { readonly prev: ArrayView<T>; readonly next: ArrayView<T>; constructor(prev: ArrayView<T>, next: ArrayView<T>); static createEquality<T>(keep: ArrayView<T>): Edit<T>; static createPureInsertion<T>(ins: ArrayView<T>): Edit<T>; static createPureDeletion<T>(del: ArrayView<T>): Edit<T>; /** * True if this edit represents equal subsequences in the previous and next. * * @readonly */ isEquality(): boolean; /** * True if this edit is an insertion, with no deletion. * * @readonly */ isPureInsertion(): boolean; /** * True if this edit is a deletion, with no insertion. * * @readonly */ isPureDeletion(): boolean; /** * True if this edit is a modification, i.e. both a non-trivial delete and a non-trivial insert at the same location. * * @readonly */ isModification(): boolean; /** * Creates and returns a new Edit object, that is the same as this one, but the opposite. * Inserts become deletes. Equalities are returned without creating a new Edit object. * * @readonly */ getConverse(): Edit<T>; /** * Returns a human-readable, but not machine-usable, representation of this edit * * @param matched {boolean} if true, "equality" operations are surrounded by parenthesis to be explicit, otherwise they are plain for easier readability */ toString(matched?: boolean): string; } export declare class EditScript<T> { private readonly edits; constructor(); get length(): number; get empty(): boolean; append(e: Edit<T>): EditScript<T>; prepend(e: Edit<T>): EditScript<T>; /** * Returns a human-readable, but not machine-usable, representation of the entire edit script. * * @param matched {boolean} if true, "equality" operations are surrounded by parenthesis to be explicit, otherwise they are plain for easier readability */ toString(matched?: boolean): string; /** * Given the output of `EditScript<T>.toString()`, parses and returns the result as a string-typed EditScript. */ static fromString(s: string): EditScript<string>; private static re_delete; private static re_insert; private static re_equal; /** * Recreate the "previous" sequence using only edits, concatenating back to an array. */ getPrev(): T[]; /** * Recreate the "next" sequence using only edits, concatenating back to an array. */ getNext(): T[]; /** * Creates and returns a new EditScript, that is the same as this one, but goes the opposite direction. * Inserts become deletes. */ getConverse(): EditScript<T>; /** * Creates a string, assuming the script represents strings of text, emitting lines with a gutter * of '+' for insertion, '-' for deletion, or ' ' for equality. */ getScriptAsFormattedLines(): string; /** * Visits all Edits, in forward order (which disallows changing the edit list while iterating). */ visitEditsForward(f_visit: (edit: Edit<T>) => void): void; /** * Visits all Edits, in reverse order (which allows for changes to the underlying edit script). * * The callback function can return null to indicate that no change should be made in the edit script, * or it can return an array which replaces the two edits completely. If some of the edits should be * preserved, just include them in the array. It is legal for the array to be any length, including empty. */ visitEdits(f_visit: (edit: Edit<T>) => Edit<T>[] | null): void; /** * Visits all pairs of Edits, in reverse order (which allows for changes to the underlying edit script). * Won't visit anything if there's just one Edit. * * The callback function can return null to indicate that no change should be made in the edit script, * or it can return an array which replaces the two edits completely. If some of the edits should be * preserved, just include them in the array. It is legal for the array to be any length, including empty. */ visitEditPairs(f_visit: (left: Edit<T>, right: Edit<T>) => Edit<T>[] | null): void; /** * Visits all trios of Edits, in reverse order (which allows for changes to the underlying edit script). * Won't visit anything if there are fewer than three Edits. * * The callback function can return null to indicate that no change should be made in the edit script, * or it can return an array which replaces the three edits completely. If some of the edits should be * preserved, just include them in the array. It is legal for the array to be any length, including empty. */ visitEditTrios(f_visit: (left: Edit<T>, middle: Edit<T>, right: Edit<T>) => Edit<T>[] | null): void; /** * Scans for an Equality edit (the "middle"), surrounded on both sides by either two insertions, two deletions, * or one modification and any change on the other side. These are the conditions in which it is * legal to join the left, middle, and right edits into a single edit. While the resulting script is * identical, it reduces the total number of edits in a way that might be preferable, usually for semantic * reasons. For example, a single space separating changes to words on either side, probably should be * folded into a single semantically-meaningful change. * * A callback function is consulted to determine whether this transformation should actually be executed. * The callback is provided the Middle content, and is called only if the surrounding Edits are valid for * this transformation. */ collapseMiddleEquality(f_should_collapse: (middle: ArrayView<T>) => boolean): void; /** * Attempts to further simplify all non-pure modifications, changing the script in-place. * Returns true if any were transformed, otherwise returns false. * * Supply a preconfigured engine to execute simplifications. */ reeditModifications(simplification_engine: Engine<T>): boolean; /** * Reduces edits until they alternate between equality and modification. * Returns true if there was at least one coalesing of a modification-style edit. */ coalesce(): boolean; /** * Rewrites Edit records to shift rightward when it is possible to do so without changing the result. * This is a way to normalize output, as well as often semantically better, as it makes insertions closer * to an append. For example, with "Hello." vs "Hello...", you could report "Hello{..}." but "Hello.{..}" * is more likely to be what is intended. * * It is possible that an equality is completely eliminated as a result of this. If that happens, this will * automatically run a coalese() to restore the system to its normal state. * * This will also shift edits leftward in the special case that doing so would cause one "equality" Edit to * completely disappear, thus collecting into fewer total edits. In all other cases, shifting is rightward-only. * * Returns `true` if any changes were made, `false` otherwise. */ shiftEdits(): boolean; } export declare enum MergeConflictingInsertsAlgorithm { KEEP_STATUS_QUO = 0, KEEP_BOTH = 1, MERGE = 2 } /** * An engine that can perform two-way or three-way merge, with configuration settings. */ export declare class Merge<T> { /** * If true, examine inserts that happen at exactly the same place, combining common prefix, suffix, or when a prefix * of one matches a suffix of the other. * * When diffs might have already been applied, or in the case where the alphabet contains unique items, this will merge * better. Otherwise it can incorrectly merge things that are truly separate, for example if both sides append a new * bullet point to a section, this setting will incorrectly "merge" the two bullet points, when in fact they are logically * completely separate the minds of the authors. On the other hand, if one person pasted part of a change that another * person made, this option will resolve that properly, instead of actually having two copies of the pasted part. * * Default: False. */ opt_combine_overlapping_inserts: MergeConflictingInsertsAlgorithm; /** * If we are combining overlapping inserts (see other options), this is the algorithm used to merge. */ opt_algorithm_combined_overlapping_inserts: DiffAlgorithm; /** * When one side makes an insertion, but the other side deletes the region surrounding the insertion point, * the default behavior is to silently ignore the insertion. Reason: Imagine that Alice makes a typo correction * within a sentence, while Bob deletes the entire sentence. It makes sense for Alice's change to be ignored. * * However, it can be useful to preserve the insertion, either in a separate "exceptions" list, or in-line in the * content, perhaps formatting in a special way to indicate that something exceptional happened. * * This function will be called at the moment that an insertion is being rejected. The output array will have been * fully populated to this point, and it can be altered; not just append, but any alteration. Of course the caller * can also update some arbitrary external state. */ opt_f_handle_deleted_insert: (output: T[], insertion: ArrayView<T>) => void; private take_both_diff_engine; constructor(); /** * Given two arrays, which are theoretically "matched up" (e.g. two insertions at the same location), create a * new array that "merges" both, meaning taking content from both, but including duplicated content only once. * There is no such thing as a "deletion." All change are assumed to be content that needs to be kept. * * For example, ("a","b") => "ab", but ("hi there","hi you") => "hi thereyou". */ takeBoth(a: ArrayView<T>, b: ArrayView<T>, algorithm: DiffAlgorithm): T[]; /** * Three-way merge between a common state and the "status quo" (which is the default "winner," when we need a tie-breaker), and * a set of differences between the same common state and a state to "apply." * * Most situations are symmetrical, but not all, hence the difference in semantics. * * If the common state is available, it can be passed in. Otherwise, it is computed from the differences. */ merge3(status_quo: EditScript<T>, apply: EditScript<T>, common?: T[] | null): T[]; } export declare enum DiffAlgorithm { PREPROCESSOR_ONLY = 0, SIMPLE = 1, LCS = 2, MYERS = 3, HECKEL = 4, PATIENCE = 5 } export declare class Engine<T> { opt_algorithm: DiffAlgorithm; opt_secondary_algorithm: DiffAlgorithm | null; opt_shift_rightward: boolean; opt_f_collapse_equalities: ((middle: ArrayView<T>) => boolean) | null; opt_f_lcs: (a: ArrayView<T>, b: ArrayView<T>) => [ArrayView<T>, ArrayView<T>]; constructor(); /** * Computes and returns the edit sequence that transforms `prev` into `next`, according to the configured options. */ getEdits(prev: ArrayView<T>, next: ArrayView<T>): EditScript<T>; /** * Computes the edit sequence that transforms `prev` into `next`, using a few fast operations, taking O(1), O(min(n,M)), or O(N) speed * and O(1) space. In simple cases, the entire edit sequence is determined; in complex cases, the simpler aspects are removed, leaving only * a challenge. * * The processor is called only if there is a remaining non-trivial difference. The arguments will be non-empty, and they will mis-match * at both their start and end, and they will be at least 2 elements in length; it can be useful for the subsequence processor to make use * of these facts to simply its algorithm. */ private executePreprocessor; /** * A trivial execution processor, that just emits a single "modification" edit, sending `prev` to `next`. */ private executeSingleModification; /** * Attempts to completely evaluate the differences using only simple and fast algorithms, i.e. when worse-case is * O(1) or O(N). If those cases are handled, the subprocessor will not be invoked; otherwise it will be invoked * as the simpler cases were unsuccessful. */ private executeEasyCases; /** * Identifies a few straightforward cases, such as when there's an overlap at the start or end, or when the smaller is a * subset of the longer. If no straightforward case is successful, the given process is called to continue working on the * problem, otherwise it is not invoked. */ private executeSimple; /** * Finds the longest common substring between two sides, keeping that as an "equality" edit, and recursively * computing the edit script for the two surrounding pieces. */ private executeLongestCommonSubstring; /** * Retains the longest common subsequence as equalities, producing minimal Edits for insert/delete. * This can miss the longest common substring, or "interesting" common elements, but does maximize * how much total material is kept. */ private executeMyers; /** * Runs a version of the algorithm of Heckel 1978, in which a histogram is used to locate the longest substring * that are not only common to both, but appear only once in both, then taking an equality there. * * This can create suboptimal modifications as it deals with "move" operations. If you want to preserve the concept * of a move, then those aren't suboptimal! If you don't, they can be coalesed into something simpler. */ private executeHeckel; } export interface StringInsert { op: "insert"; idx: number; txt: string; } export interface StringDelete { op: "delete"; idx: number; len: number; } export declare type StringOp = StringInsert | StringDelete; export declare class StringEngine extends Engine<string> { /** * Returns edits, with differences on a per-character basis. This is most-detailed, but often not what a human might expect to see. */ getEditsByCharacter(prev: string, next: string): EditScript<string>; /** * Returns edits, first splitting the string into tokens, treating anything not matching a token as an individual character. */ getEditsByToken(prev: string, next: string, re: RegExp): EditScript<string>; /** * Returns edits, using the line as the unit of comparison. */ getEditsByLine(prev: string, next: string): EditScript<string>; /** * Returns edits, assuming configuration that is better for prose. */ getEditsByProse(prev: string, next: string): EditScript<string>; /** * Converts an `EditScript<string>` that was generated by a difference engine into a list * of `insert` and `delete` instructions relative to the "previous" string used in the * original diff. * * @param rollingUpdates if true, indexes are listed as if the underlying string is being transformed as we go, otherwise they are relative to the original string */ static getStringOffsets(script: EditScript<string>, rollingUpdates: boolean): StringOp[]; } export {};