lib0
Version:
> Monorepo of isomorphic utility functions
227 lines (217 loc) • 6.78 kB
JavaScript
/**
* A very simple diff algorithm. Slightly adapted to support splitting at different stages (e.g.
* first diff lines, then diff words)
*
* https://bramcohen.livejournal.com/73318.html
*
* @experiemantal This API will likely change.
*/
import * as map from '../map.js'
import * as math from '../math.js'
import * as array from '../array.js'
/**
* Implementation of patience diff. Expects that content is pre-split (e.g. by newline).
*
* @param {Array<string>} as
* @param {Array<string>} bs
* @return {Array<{ index: number, remove: Array<string>, insert: Array<string>}>} changeset @todo should use delta instead
*/
export const diff = (as, bs) => {
const {
middleAs,
middleBs,
commonPrefix
} = removeCommonPrefixAndSuffix(as, bs)
return lcs(middleAs, middleBs, commonPrefix)
}
/**
* @param {string} a
* @param {string} b
* @param {RegExp|string} _regexp
*/
export const diffSplitBy = (a, b, _regexp) => {
const isStringSeparator = typeof _regexp === 'string'
const separator = isStringSeparator ? _regexp : ''
const regexp = isStringSeparator ? new RegExp(_regexp, 'g') : _regexp
const as = splitByRegexp(a, regexp, !isStringSeparator)
const bs = splitByRegexp(b, regexp, !isStringSeparator)
const changes = diff(as, bs)
let prevSplitIndex = 0
let prevStringIndex = 0
return changes.map(change => {
for (; prevSplitIndex < change.index; prevSplitIndex++) {
prevStringIndex += as[prevSplitIndex].length
}
return {
index: prevStringIndex,
remove: change.remove.join(separator),
insert: change.insert.join(separator)
}
})
}
/**
* Sensible default for diffing strings using patience (it's fast though).
*
* Perform different types of patience diff on the content. Diff first by newline, then paragraphs, then by word
* (split by space, brackets, punctuation)
*
* @param {string} a
* @param {string} b
*/
export const diffAuto = (a, b) =>
diffSplitBy(a, b, '\n').map(d =>
diffSplitBy(d.remove, d.insert, /\. |[a-zA-Z0-9]+|[. ()[\],;{}]/g).map(dd => ({
insert: dd.insert,
remove: dd.remove,
index: dd.index + d.index
}))
).flat()
/**
* @param {Array<string>} as
* @param {Array<string>} bs
*/
const removeCommonPrefixAndSuffix = (as, bs) => {
const commonLen = math.min(as.length, bs.length)
let commonPrefix = 0
let commonSuffix = 0
// match start
for (; commonPrefix < commonLen && as[commonPrefix] === bs[commonPrefix]; commonPrefix++) { /* nop */ }
// match end
for (; commonSuffix < commonLen - commonPrefix && as[as.length - 1 - commonSuffix] === bs[bs.length - 1 - commonSuffix]; commonSuffix++) { /* nop */ }
const middleAs = as.slice(commonPrefix, as.length - commonSuffix)
const middleBs = bs.slice(commonPrefix, bs.length - commonSuffix)
return {
middleAs, middleBs, commonPrefix, commonSuffix
}
}
/**
* Splits string by regex and returns all strings as an array. The matched parts are also returned.
*
* @param {string} str
* @param {RegExp} regexp
* @param {boolean} includeSeparator
*/
const splitByRegexp = (str, regexp, includeSeparator) => {
const matches = [...str.matchAll(regexp)]
let prevIndex = 0
/**
* @type {Array<string>}
*/
const res = []
matches.forEach(m => {
prevIndex < (m.index || 0) && res.push(str.slice(prevIndex, m.index))
includeSeparator && res.push(m[0]) // is always non-empty
prevIndex = /** @type {number} */ (m.index) + m[0].length
})
const end = str.slice(prevIndex)
end.length > 0 && res.push(end)
return res
}
/**
* An item may have multiple occurances (not when matching unique entries). It also may have a
* reference to the stack of other items (from as to bs).
*/
class Item {
constructor () {
/**
* @type {Array<number>}
*/
this.indexes = []
/**
* The matching item from the other side
* @type {Item?}
*/
this.match = null
/**
* For patience sort. Reference (index of the stack) to the previous pile.
*
* @type {Item?}
*/
this.ref = null
}
}
/**
* @param {Array<string>} xs
*/
const partition = xs => {
/**
* @type {Map<string,Item>}
*/
const refs = map.create()
xs.forEach((x, index) => {
map.setIfUndefined(refs, x, () => new Item()).indexes.push(index)
})
return refs
}
/**
* Find the longest common subsequence of items using patience sort.
*
* @param {Array<string>} as
* @param {Array<string>} bs
* @param {number} indexAdjust
*/
const lcs = (as, bs, indexAdjust) => {
if (as.length === 0 && bs.length === 0) return []
const aParts = partition(as)
const bParts = partition(bs)
/**
* @type {Array<Array<Item>>} I.e. Array<Pile<Item>>
*/
const piles = []
aParts.forEach((aItem, aKey) => {
// skip if no match or if either item is not unique
if (aItem.indexes.length > 1 || (aItem.match = bParts.get(aKey) || null) == null || aItem.match.indexes.length > 1) return
for (let i = 0; i < piles.length; i++) {
const pile = piles[i]
if (aItem.match.indexes[0] < /** @type {Item} */ (pile[pile.length - 1].match).indexes[0]) {
pile.push(aItem)
if (i > 0) aItem.ref = array.last(piles[i - 1])
return
}
}
piles.length > 0 && (aItem.ref = array.last(piles[piles.length - 1]))
piles.push([aItem])
})
/**
* References to all matched items
*
* @type {Array<Item>}
*/
const matches = []
/**
* @type {Item?}
*/
let currPileItem = piles[piles.length - 1]?.[0]
while (currPileItem != null) {
matches.push(currPileItem)
currPileItem = currPileItem.ref
}
matches.reverse()
// add pseude match (assume the string terminal always matches)
const pseudoA = new Item()
const pseudoB = new Item()
pseudoA.match = pseudoB
pseudoA.indexes.push(as.length)
pseudoB.indexes.push(bs.length)
matches.push(pseudoA)
/**
* @type {Array<{ index: number, remove: Array<string>, insert: Array<string>}>}
*/
const changeset = []
let diffAStart = 0
let diffBStart = 0
for (let i = 0; i < matches.length; i++) {
const m = matches[i]
const delLength = m.indexes[0] - diffAStart
const insLength = /** @type {Item} */ (m.match).indexes[0] - diffBStart
if (delLength !== 0 || insLength !== 0) {
const stripped = removeCommonPrefixAndSuffix(as.slice(diffAStart, diffAStart + delLength), bs.slice(diffBStart, diffBStart + insLength))
if (stripped.middleAs.length !== 0 || stripped.middleBs.length !== 0) {
changeset.push({ index: diffAStart + indexAdjust + stripped.commonPrefix, remove: stripped.middleAs, insert: stripped.middleBs })
}
}
diffAStart = m.indexes[0] + 1
diffBStart = /** @type {Item} */ (m.match).indexes[0] + 1
}
return changeset
}