UNPKG

json-joy

Version:

Collection of libraries for building collaborative editing apps.

577 lines (576 loc) 22.1 kB
const startsWithPairEnd = (str) => { const code = str.charCodeAt(0); return code >= 0xdc00 && code <= 0xdfff; }; const endsWithPairStart = (str) => { const code = str.charCodeAt(str.length - 1); return code >= 0xd800 && code <= 0xdbff; }; /** * Reorder and merge like edit sections. Merge equalities. * Any edit section can move as long as it doesn't cross an equality. * * @param diff Array of diff tuples. * @param fixUnicode Whether to normalize to a unicode-correct diff */ const cleanupMerge = (diff, fixUnicode) => { diff.push([0 /* PATCH_OP_TYPE.EQL */, '']); let pointer = 0; let delCnt = 0; let insCnt = 0; let delTxt = ''; let insTxt = ''; let commonLength = 0; while (pointer < diff.length) { if (pointer < diff.length - 1 && !diff[pointer][1]) { diff.splice(pointer, 1); continue; } const d1 = diff[pointer]; switch (d1[0]) { case 1 /* PATCH_OP_TYPE.INS */: insCnt++; pointer++; insTxt += d1[1]; break; case -1 /* PATCH_OP_TYPE.DEL */: delCnt++; pointer++; delTxt += d1[1]; break; case 0 /* PATCH_OP_TYPE.EQL */: { let prevEq = pointer - insCnt - delCnt - 1; if (fixUnicode) { // prevent splitting of unicode surrogate pairs. When `fixUnicode` is true, // we assume that the old and new text in the diff are complete and correct // unicode-encoded JS strings, but the tuple boundaries may fall between // surrogate pairs. We fix this by shaving off stray surrogates from the end // of the previous equality and the beginning of this equality. This may create // empty equalities or a common prefix or suffix. For example, if AB and AC are // emojis, `[[0, 'A'], [-1, 'BA'], [0, 'C']]` would turn into deleting 'ABAC' and // inserting 'AC', and then the common suffix 'AC' will be eliminated. in this // particular case, both equalities go away, we absorb any previous inequalities, // and we keep scanning for the next equality before rewriting the tuples. const d = diff[prevEq]; if (prevEq >= 0) { let str = d[1]; if (endsWithPairStart(str)) { const stray = str.slice(-1); d[1] = str = str.slice(0, -1); delTxt = stray + delTxt; insTxt = stray + insTxt; if (!str) { // emptied out previous equality, so delete it and include previous delete/insert diff.splice(prevEq, 1); pointer--; let k = prevEq - 1; const dk = diff[k]; if (dk) { const type = dk[0]; if (type === 1 /* PATCH_OP_TYPE.INS */) { insCnt++; k--; insTxt = dk[1] + insTxt; } else if (type === -1 /* PATCH_OP_TYPE.DEL */) { delCnt++; k--; delTxt = dk[1] + delTxt; } } prevEq = k; } } } const d1 = diff[pointer]; const str1 = d1[1]; if (startsWithPairEnd(str1)) { const stray = str1.charAt(0); d1[1] = str1.slice(1); delTxt += stray; insTxt += stray; } } if (pointer < diff.length - 1 && !diff[pointer][1]) { // for empty equality not at end, wait for next equality diff.splice(pointer, 1); break; } const hasDelTxt = delTxt.length > 0; const hasInsTxt = insTxt.length > 0; if (hasDelTxt || hasInsTxt) { // note that diff_commonPrefix and diff_commonSuffix are unicode-aware if (hasDelTxt && hasInsTxt) { // Factor out any common prefixes. commonLength = pfx(insTxt, delTxt); if (commonLength !== 0) { if (prevEq >= 0) { diff[prevEq][1] += insTxt.slice(0, commonLength); } else { diff.splice(0, 0, [0 /* PATCH_OP_TYPE.EQL */, insTxt.slice(0, commonLength)]); pointer++; } insTxt = insTxt.slice(commonLength); delTxt = delTxt.slice(commonLength); } // Factor out any common suffixes. commonLength = sfx(insTxt, delTxt); if (commonLength !== 0) { diff[pointer][1] = insTxt.slice(insTxt.length - commonLength) + diff[pointer][1]; insTxt = insTxt.slice(0, insTxt.length - commonLength); delTxt = delTxt.slice(0, delTxt.length - commonLength); } } // Delete the offending records and add the merged ones. const n = insCnt + delCnt; const delTxtLen = delTxt.length; const insTxtLen = insTxt.length; if (delTxtLen === 0 && insTxtLen === 0) { diff.splice(pointer - n, n); pointer = pointer - n; } else if (delTxtLen === 0) { diff.splice(pointer - n, n, [1 /* PATCH_OP_TYPE.INS */, insTxt]); pointer = pointer - n + 1; } else if (insTxtLen === 0) { diff.splice(pointer - n, n, [-1 /* PATCH_OP_TYPE.DEL */, delTxt]); pointer = pointer - n + 1; } else { diff.splice(pointer - n, n, [-1 /* PATCH_OP_TYPE.DEL */, delTxt], [1 /* PATCH_OP_TYPE.INS */, insTxt]); pointer = pointer - n + 2; } } const d0 = diff[pointer - 1]; if (pointer !== 0 && d0[0] === 0 /* PATCH_OP_TYPE.EQL */) { // Merge this equality with the previous one. d0[1] += diff[pointer][1]; diff.splice(pointer, 1); } else pointer++; insCnt = 0; delCnt = 0; delTxt = ''; insTxt = ''; break; } } } if (diff[diff.length - 1][1] === '') diff.pop(); // Remove the dummy entry at the end. // Second pass: look for single edits surrounded on both sides by equalities // which can be shifted sideways to eliminate an equality. // e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC let changes = false; pointer = 1; // Intentionally ignore the first and last element (don't need checking). while (pointer < diff.length - 1) { const d0 = diff[pointer - 1]; const d2 = diff[pointer + 1]; if (d0[0] === 0 /* PATCH_OP_TYPE.EQL */ && d2[0] === 0 /* PATCH_OP_TYPE.EQL */) { // This is a single edit surrounded by equalities. const str0 = d0[1]; const d1 = diff[pointer]; const str1 = d1[1]; const str2 = d2[1]; if (str1.slice(str1.length - str0.length) === str0) { // Shift the edit over the previous equality. diff[pointer][1] = str0 + str1.slice(0, str1.length - str0.length); d2[1] = str0 + str2; diff.splice(pointer - 1, 1); changes = true; } else if (str1.slice(0, str2.length) === str2) { // Shift the edit over the next equality. d0[1] += d2[1]; d1[1] = str1.slice(str2.length) + str2; diff.splice(pointer + 1, 1); changes = true; } } pointer++; } // If shifts were made, the diff needs reordering and another shift sweep. if (changes) cleanupMerge(diff, fixUnicode); }; /** * Given the location of the 'middle snake', split the diff in two parts * and recurse. * * @param text1 Old string to be diffed. * @param text2 New string to be diffed. * @param x Index of split point in text1. * @param y Index of split point in text2. * @return Array of diff tuples. */ const bisectSplit = (text1, text2, x, y) => { const diffsA = diff_(text1.slice(0, x), text2.slice(0, y), false); const diffsB = diff_(text1.slice(x), text2.slice(y), false); return diffsA.concat(diffsB); }; /** * Find the 'middle snake' of a diff, split the problem in two * and return the recursively constructed diff. * * This is a port of `diff-patch-match` implementation to TypeScript. * * @see http://www.xmailserver.org/diff2.pdf EUGENE W. MYERS 1986 paper: An * O(ND) Difference Algorithm and Its Variations. * * @param text1 Old string to be diffed. * @param text2 New string to be diffed. * @return A {@link Patch} - an array of patch operations. */ const bisect = (text1, text2) => { const text1Length = text1.length; const text2Length = text2.length; const maxD = Math.ceil((text1Length + text2Length) / 2); const vOffset = maxD; const vLength = 2 * maxD; const v1 = new Array(vLength); const v2 = new Array(vLength); for (let x = 0; x < vLength; x++) { v1[x] = -1; v2[x] = -1; } v1[vOffset + 1] = 0; v2[vOffset + 1] = 0; const delta = text1Length - text2Length; // If the total number of characters is odd, then the front path will collide // with the reverse path. const front = delta % 2 !== 0; // Offsets for start and end of k loop. // Prevents mapping of space beyond the grid. let k1start = 0; let k1end = 0; let k2start = 0; let k2end = 0; for (let d = 0; d < maxD; d++) { for (let k1 = -d + k1start; k1 <= d - k1end; k1 += 2) { const k1_offset = vOffset + k1; let x1 = 0; const v10 = v1[k1_offset - 1]; const v11 = v1[k1_offset + 1]; if (k1 === -d || (k1 !== d && v10 < v11)) x1 = v11; else x1 = v10 + 1; let y1 = x1 - k1; while (x1 < text1Length && y1 < text2Length && text1.charAt(x1) === text2.charAt(y1)) { x1++; y1++; } v1[k1_offset] = x1; if (x1 > text1Length) k1end += 2; else if (y1 > text2Length) k1start += 2; else if (front) { const k2Offset = vOffset + delta - k1; const v2Offset = v2[k2Offset]; if (k2Offset >= 0 && k2Offset < vLength && v2Offset !== -1) { if (x1 >= text1Length - v2Offset) return bisectSplit(text1, text2, x1, y1); } } } // Walk the reverse path one step. for (let k2 = -d + k2start; k2 <= d - k2end; k2 += 2) { const k2_offset = vOffset + k2; let x2 = k2 === -d || (k2 !== d && v2[k2_offset - 1] < v2[k2_offset + 1]) ? v2[k2_offset + 1] : v2[k2_offset - 1] + 1; let y2 = x2 - k2; while (x2 < text1Length && y2 < text2Length && text1.charAt(text1Length - x2 - 1) === text2.charAt(text2Length - y2 - 1)) { x2++; y2++; } v2[k2_offset] = x2; if (x2 > text1Length) k2end += 2; else if (y2 > text2Length) k2start += 2; else if (!front) { const k1_offset = vOffset + delta - k2; const x1 = v1[k1_offset]; if (k1_offset >= 0 && k1_offset < vLength && x1 !== -1) { const y1 = vOffset + x1 - k1_offset; x2 = text1Length - x2; if (x1 >= x2) return bisectSplit(text1, text2, x1, y1); } } } } return [ [-1 /* PATCH_OP_TYPE.DEL */, text1], [1 /* PATCH_OP_TYPE.INS */, text2], ]; }; /** * Find the differences between two texts. Assumes that the texts do not * have any common prefix or suffix. * * @param src Old string to be diffed. * @param dst New string to be diffed. * @return A {@link Patch} - an array of patch operations. */ const diffNoCommonAffix = (src, dst) => { if (!src) return [[1 /* PATCH_OP_TYPE.INS */, dst]]; if (!dst) return [[-1 /* PATCH_OP_TYPE.DEL */, src]]; const text1Length = src.length; const text2Length = dst.length; const long = text1Length > text2Length ? src : dst; const short = text1Length > text2Length ? dst : src; const shortTextLength = short.length; const indexOfContainedShort = long.indexOf(short); if (indexOfContainedShort >= 0) { const start = long.slice(0, indexOfContainedShort); const end = long.slice(indexOfContainedShort + shortTextLength); return text1Length > text2Length ? [ [-1 /* PATCH_OP_TYPE.DEL */, start], [0 /* PATCH_OP_TYPE.EQL */, short], [-1 /* PATCH_OP_TYPE.DEL */, end], ] : [ [1 /* PATCH_OP_TYPE.INS */, start], [0 /* PATCH_OP_TYPE.EQL */, short], [1 /* PATCH_OP_TYPE.INS */, end], ]; } if (shortTextLength === 1) return [ [-1 /* PATCH_OP_TYPE.DEL */, src], [1 /* PATCH_OP_TYPE.INS */, dst], ]; return bisect(src, dst); }; /** * Determine the common prefix of two strings. * * @param txt1 First string. * @param txt2 Second string. * @return The number of characters common to the start of each string. */ export const pfx = (txt1, txt2) => { if (!txt1 || !txt2 || txt1.charAt(0) !== txt2.charAt(0)) return 0; let min = 0; let max = Math.min(txt1.length, txt2.length); let mid = max; let start = 0; while (min < mid) { if (txt1.slice(start, mid) === txt2.slice(start, mid)) { min = mid; start = min; } else max = mid; mid = Math.floor((max - min) / 2 + min); } const code = txt1.charCodeAt(mid - 1); const isSurrogatePairStart = code >= 0xd800 && code <= 0xdbff; if (isSurrogatePairStart) mid--; return mid; }; /** * Determine the common suffix of two strings. * * @param txt1 First string. * @param txt2 Second string. * @return The number of characters common to the end of each string. */ export const sfx = (txt1, txt2) => { if (!txt1 || !txt2 || txt1.slice(-1) !== txt2.slice(-1)) return 0; let min = 0; let max = Math.min(txt1.length, txt2.length); let mid = max; let end = 0; while (min < mid) { if (txt1.slice(txt1.length - mid, txt1.length - end) === txt2.slice(txt2.length - mid, txt2.length - end)) { min = mid; end = min; } else max = mid; mid = Math.floor((max - min) / 2 + min); } const code = txt1.charCodeAt(txt1.length - mid); const isSurrogatePairEnd = code >= 0xd800 && code <= 0xdbff; if (isSurrogatePairEnd) mid--; return mid; }; /** * Find the differences between two texts. Simplifies the problem by stripping * any common prefix or suffix off the texts before diffing. * * @param src Old string to be diffed. * @param dst New string to be diffed. * @param cleanup Whether to apply semantic cleanup before returning. * @return A {@link Patch} - an array of patch operations. */ const diff_ = (src, dst, fixUnicode) => { if (src === dst) return src ? [[0 /* PATCH_OP_TYPE.EQL */, src]] : []; // Trim off common prefix (speedup). const prefixLength = pfx(src, dst); const prefix = src.slice(0, prefixLength); src = src.slice(prefixLength); dst = dst.slice(prefixLength); // Trim off common suffix (speedup). const suffixLength = sfx(src, dst); const suffix = src.slice(src.length - suffixLength); src = src.slice(0, src.length - suffixLength); dst = dst.slice(0, dst.length - suffixLength); // Compute the diff on the middle block. const diff = diffNoCommonAffix(src, dst); if (prefix) diff.unshift([0 /* PATCH_OP_TYPE.EQL */, prefix]); if (suffix) diff.push([0 /* PATCH_OP_TYPE.EQL */, suffix]); cleanupMerge(diff, fixUnicode); return diff; }; /** * Find the differences between two texts. * * @param src Old string to be diffed. * @param dst New string to be diffed. * @return A {@link Patch} - an array of patch operations. */ export const diff = (src, dst) => diff_(src, dst, true); /** * Considers simple insertion and deletion cases around the caret position in * the destination string. If the fast patch cannot be constructed, it falls * back to the default full implementation. * * Cases considered: * * 1. Insertion of a single or multiple characters right before the caret. * 2. Deletion of one or more characters right before the caret. * * @param src Old string to be diffed. * @param dst New string to be diffed. * @param caret The position of the caret in the new string. Set to -1 to * ignore the caret position. * @return A {@link Patch} - an array of patch operations. */ export const diffEdit = (src, dst, caret) => { edit: { if (caret < 0) break edit; const srcLen = src.length; const dstLen = dst.length; if (srcLen === dstLen) break edit; const dstSfx = dst.slice(caret); const sfxLen = dstSfx.length; if (sfxLen > srcLen) break edit; const srcSfx = src.slice(srcLen - sfxLen); if (srcSfx !== dstSfx) break edit; const isInsert = dstLen > srcLen; if (isInsert) { const pfxLen = srcLen - sfxLen; const srcPfx = src.slice(0, pfxLen); const dstPfx = dst.slice(0, pfxLen); if (srcPfx !== dstPfx) break edit; const insert = dst.slice(pfxLen, caret); const patch = []; if (srcPfx) patch.push([0 /* PATCH_OP_TYPE.EQL */, srcPfx]); if (insert) patch.push([1 /* PATCH_OP_TYPE.INS */, insert]); if (dstSfx) patch.push([0 /* PATCH_OP_TYPE.EQL */, dstSfx]); return patch; } else { const pfxLen = dstLen - sfxLen; const dstPfx = dst.slice(0, pfxLen); const srcPfx = src.slice(0, pfxLen); if (srcPfx !== dstPfx) break edit; const del = src.slice(pfxLen, srcLen - sfxLen); const patch = []; if (srcPfx) patch.push([0 /* PATCH_OP_TYPE.EQL */, srcPfx]); if (del) patch.push([-1 /* PATCH_OP_TYPE.DEL */, del]); if (dstSfx) patch.push([0 /* PATCH_OP_TYPE.EQL */, dstSfx]); return patch; } } return diff(src, dst); }; export const src = (patch) => { let txt = ''; const length = patch.length; for (let i = 0; i < length; i++) { const op = patch[i]; if (op[0] !== 1 /* PATCH_OP_TYPE.INS */) txt += op[1]; } return txt; }; export const dst = (patch) => { let txt = ''; const length = patch.length; for (let i = 0; i < length; i++) { const op = patch[i]; if (op[0] !== -1 /* PATCH_OP_TYPE.DEL */) txt += op[1]; } return txt; }; const invertOp = (op) => { const type = op[0]; return type === 0 /* PATCH_OP_TYPE.EQL */ ? op : type === 1 /* PATCH_OP_TYPE.INS */ ? [-1 /* PATCH_OP_TYPE.DEL */, op[1]] : [1 /* PATCH_OP_TYPE.INS */, op[1]]; }; /** * Inverts patch such that it can be applied to `dst` to get `src` (instead of * `src` to get `dst`). * * @param patch The patch to invert. * @returns Inverted patch. */ export const invert = (patch) => patch.map(invertOp); /** * @param patch The patch to apply. * @param srcLen The length of the source string. * @param onInsert Callback for insert operations. * @param onDelete Callback for delete operations. */ export const apply = (patch, srcLen, onInsert, onDelete) => { const length = patch.length; let pos = srcLen; for (let i = length - 1; i >= 0; i--) { const [type, str] = patch[i]; if (type === 0 /* PATCH_OP_TYPE.EQL */) pos -= str.length; else if (type === 1 /* PATCH_OP_TYPE.INS */) onInsert(pos, str); else { const len = str.length; pos -= len; onDelete(pos, len, str); } } };