@atlaskit/editor-plugin-show-diff
Version:
ShowDiff plugin for @atlaskit/editor-core
367 lines (348 loc) • 15.3 kB
JavaScript
import { simplifyChanges, ChangeSet } from 'prosemirror-changeset';
import { Mark } from '@atlaskit/editor-prosemirror/model';
import { Mapping, ReplaceStep } from '@atlaskit/editor-prosemirror/transform';
import { optimizeChanges } from './optimizeChanges';
const mapPosition = (mapping, pos) => mapping.map(pos);
/**
* Build a per-content-offset view of the textblock's characters.
*
* Returns an array `chars` whose length is `parent.content.size`. For every
* offset that lies inside a text node, `chars[offset]` is the character at
* that offset; for every offset that lies inside (or on the edge of) a
* non-text inline node — hardBreak, mention, emoji, date, etc. — the entry
* is `null`.
*
* Using doc positions to index `parent.textContent` is wrong because
* `textContent` strips non-text inline nodes, so every such node shifts the
* lookup off by its size. This per-offset view restores a 1:1 mapping between
* doc positions inside the textblock and the character (or "no character",
* i.e. a hard word boundary) at that position.
*/
const buildCharsByOffset = parent => {
const chars = new Array(parent.content.size).fill(null);
parent.content.forEach((child, offset) => {
var _child$text;
if (!child.isText) {
return;
}
const text = (_child$text = child.text) !== null && _child$text !== void 0 ? _child$text : '';
for (let i = 0; i < text.length; i++) {
chars[offset + i] = text[i];
}
});
return chars;
};
/**
* Given a ProseMirror doc and a position range [from, to], expand
* both endpoints outward to the nearest word boundaries.
*
* A "word character" is any Unicode letter/number or underscore. Punctuation is
* treated as part of the same token only when it is sandwiched between two
* non-whitespace characters, so contractions like "You'll", accented words like
* "l'été", and punctuation-joined tokens like "deep-sea" or "foo/bar" stay
* intact without treating standalone punctuation as a general word character.
* Expansion stops at whitespace, standalone punctuation, the boundary of any
* non-text inline node (hardBreak, mention, emoji, date, etc.), or the
* textblock edges.
*
* If `from` and `to` resolve into different parent nodes, or if the
* parent is not a textblock, the range is returned unchanged.
*/
const expandToWordBoundaries = (doc, from, to) => {
const $from = doc.resolve(from);
// Only expand inside a textblock.
if (!$from.parent.isTextblock) {
return {
from,
to
};
}
// When `from !== to`, verify both ends are in the same textblock.
if (from !== to) {
const $to = doc.resolve(to);
if ($from.parent !== $to.parent) {
return {
from,
to
};
}
}
const parent = $from.parent;
const parentStart = $from.start(); // absolute position of the first character in the textblock
// Per-offset view of the textblock so we don't conflate the inline
// positions of non-text nodes (hardBreak, mention, emoji, date, etc.)
// with the characters returned by `parent.textContent`.
const chars = buildCharsByOffset(parent);
// Convert absolute doc positions to zero-based content offsets.
let fromIdx = from - parentStart;
let toIdx = to - parentStart;
// Base word chars are Unicode letters/numbers/underscore. Punctuation only
// counts when it is surrounded by non-whitespace characters, e.g. in
// "You'll", "deep-sea", or "foo/bar". `null` still behaves like a hard
// boundary because only string neighbors qualify.
const isWordCharAt = idx => {
if (idx < 0 || idx >= chars.length) {
return false;
}
const ch = chars[idx];
if (typeof ch !== 'string') {
return false;
}
const prev = chars[idx - 1];
const next = chars[idx + 1];
return (
// @ts-ignore TS1501: This regular expression flag is only available when targeting 'es6' or later.
/[\p{L}\p{N}_]/u.test(ch) ||
// @ts-ignore TS1501: This regular expression flag is only available when targeting 'es6' or later.
/\p{P}/u.test(ch) && typeof prev === 'string' && typeof next === 'string' &&
// @ts-ignore TS1501: This regular expression flag is only available when targeting 'es6' or later.
!/\s/u.test(prev) &&
// @ts-ignore TS1501: This regular expression flag is only available when targeting 'es6' or later.
!/\s/u.test(next)
);
};
// Detect whether the position sits mid-word: there is a word character
// on both sides of the position (or, for a non-empty range, on the
// outer side of each endpoint).
const isMidWord = idx => idx > 0 && idx < chars.length && isWordCharAt(idx - 1) && isWordCharAt(idx);
// For a zero-width range (pure insertion / deletion point), only expand
// if the point is mid-word — i.e. both the char before and after are
// word characters. Otherwise the point is already at a word boundary.
if (from === to) {
if (!isMidWord(fromIdx)) {
return {
from,
to
};
}
// Expand both directions from the mid-word point.
while (fromIdx > 0 && isWordCharAt(fromIdx - 1)) {
fromIdx--;
}
while (toIdx < chars.length && isWordCharAt(toIdx)) {
toIdx++;
}
return {
from: parentStart + fromIdx,
to: parentStart + toIdx
};
}
// Non-empty range: expand each endpoint outward if it is mid-word.
// Expand left only if `from` is truly mid-word: the character at `from`
// (inside the range) and the character before `from` are both word chars.
if (fromIdx > 0 && fromIdx < chars.length && isWordCharAt(fromIdx) && isWordCharAt(fromIdx - 1)) {
while (fromIdx > 0 && isWordCharAt(fromIdx - 1)) {
fromIdx--;
}
}
// Expand right only if `to` is truly mid-word: the character just before
// `to` (last char of the range) and the character at `to` are both word chars.
if (toIdx > 0 && toIdx < chars.length && isWordCharAt(toIdx - 1) && isWordCharAt(toIdx)) {
while (toIdx < chars.length && isWordCharAt(toIdx)) {
toIdx++;
}
}
return {
from: parentStart + fromIdx,
to: parentStart + toIdx
};
};
/**
* Compare marks between two nodes
* We have to check each child because adding a mark splits text into multiple nodes
*/
const hasSameChildMarks = (left, right) => {
if (left.childCount !== right.childCount) {
return false;
}
for (let index = 0; index < left.childCount; index++) {
if (!Mark.sameSet(left.child(index).marks, right.child(index).marks)) {
return false;
}
}
return true;
};
const createMapping = maps => {
const mapping = new Mapping();
for (const map of maps) {
mapping.appendMap(map);
}
return mapping;
};
const createSpans = length => length > 0 ? [{
length,
data: null
}] : [];
const mergeOverlappingByNewDocRange = changes => {
if (changes.length <= 1) {
return changes;
}
const sortedChanges = [...changes].sort((left, right) => left.fromB - right.fromB);
const merged = [];
let current = {
...sortedChanges[0]
};
for (let i = 1; i < sortedChanges.length; i++) {
const next = sortedChanges[i];
const isOverlapping = next.fromB <= current.toB;
if (isOverlapping) {
current = {
fromA: Math.min(current.fromA, next.fromA),
toA: Math.max(current.toA, next.toA),
fromB: Math.min(current.fromB, next.fromB),
toB: Math.max(current.toB, next.toB),
deleted: [...current.deleted, ...next.deleted],
inserted: [...current.inserted, ...next.inserted]
};
} else {
merged.push(current);
current = {
...next
};
}
}
merged.push(current);
return merged;
};
/**
* This function checks whether to do granular diffing.
* We should do granular diffing if:
* - The step is a replace step
* - The step is not open
* - The replaced slice is not open
* - The replaced slice has only one child
* - The replacing slice has only one child
* - The replaced slice and replacing slice have the same text content
* - The replaced slice and replacing slice have the same child marks (if text content is equal)
*/
const shouldCheckGranularDiff = (step, before, from, to) => {
var _replacedNode$marks, _replacingNode$marks;
if (!(step instanceof ReplaceStep)) {
return false;
}
if (step.slice.openStart !== 0 || step.slice.openEnd !== 0) {
return false;
}
const replacedSlice = before.slice(from, to);
const replacingSlice = step.slice;
if (replacedSlice.openStart !== 0 || replacedSlice.openEnd !== 0 || replacedSlice.content.childCount !== 1 || replacingSlice.content.childCount !== 1) {
return false;
}
const replacedNode = replacedSlice.content.firstChild;
const replacingNode = replacingSlice.content.firstChild;
if ((replacedNode === null || replacedNode === void 0 ? void 0 : replacedNode.type.name) !== (replacingNode === null || replacingNode === void 0 ? void 0 : replacingNode.type.name) || !(replacedNode !== null && replacedNode !== void 0 && replacedNode.type.isTextblock)) {
return false;
}
if (!Mark.sameSet((_replacedNode$marks = replacedNode === null || replacedNode === void 0 ? void 0 : replacedNode.marks) !== null && _replacedNode$marks !== void 0 ? _replacedNode$marks : [], (_replacingNode$marks = replacingNode === null || replacingNode === void 0 ? void 0 : replacingNode.marks) !== null && _replacingNode$marks !== void 0 ? _replacingNode$marks : [])) {
return false;
}
const isTextContentEqual = (replacedNode === null || replacedNode === void 0 ? void 0 : replacedNode.textContent) === (replacingNode === null || replacingNode === void 0 ? void 0 : replacingNode.textContent);
return !isTextContentEqual || isTextContentEqual && hasSameChildMarks(replacedNode, replacingNode);
};
export const diffBySteps = (originalDoc, steps) => {
const changes = [];
let currentDoc = originalDoc;
const successfulStepMaps = [];
const rangedSteps = [];
for (const step of steps) {
const before = currentDoc;
const result = step.apply(currentDoc);
if (result.failed !== null || !result.doc) {
continue;
}
const stepMap = step.getMap();
const rangeStep = step;
if (typeof rangeStep.from === 'number' && typeof rangeStep.to === 'number') {
rangedSteps.push({
before,
doc: result.doc,
from: rangeStep.from,
to: rangeStep.to,
mapIndex: successfulStepMaps.length,
step,
stepMap
});
}
successfulStepMaps.push(stepMap);
currentDoc = result.doc;
}
for (const rangedStep of rangedSteps) {
// Mapping from original -> doc before this step.
const originalToBeforeStep = createMapping(successfulStepMaps.slice(0, rangedStep.mapIndex));
const beforeStepToOriginal = originalToBeforeStep.invert();
const fromA = mapPosition(beforeStepToOriginal, rangedStep.from);
const toA = mapPosition(beforeStepToOriginal, rangedStep.to);
// Map the step range into final steppedDoc coordinates.
const fromAfterStep = rangedStep.stepMap.map(rangedStep.from, -1);
const toAfterStep = rangedStep.stepMap.map(rangedStep.to, 1);
const afterStepToFinal = createMapping(successfulStepMaps.slice(rangedStep.mapIndex + 1));
const fromB = mapPosition(afterStepToFinal, fromAfterStep);
const toB = mapPosition(afterStepToFinal, toAfterStep);
if (shouldCheckGranularDiff(rangedStep.step, rangedStep.before, rangedStep.from, rangedStep.to)) {
const granularStepChanges = ChangeSet.create(rangedStep.before).addSteps(rangedStep.doc, [rangedStep.stepMap], null);
// `simplifyChanges` reads text using `Change.fromB`/`toB`, which are
// positions in the post-step doc (the "B" doc). Passing the pre-step
// doc (`startDoc`) misreads characters and produces mid-word cuts
// (e.g. "deep-s|ea") because word-boundary detection runs against the
// wrong text/positions. Use the post-step doc here.
const optimizedGranularStepChanges = optimizeChanges(simplifyChanges(granularStepChanges.changes, rangedStep.doc));
for (const granularChange of optimizedGranularStepChanges) {
// Expand each granular change to the nearest word boundaries in
// both the pre-step doc (A-side) and the post-step doc (B-side).
// This ensures that a mid-word edit like "sanitised" → "sanitized"
// shows as deleting the whole original word and inserting the whole
// new word, rather than a single-character swap.
const expandedA = expandToWordBoundaries(rangedStep.before, granularChange.fromA, granularChange.toA);
const expandedB = expandToWordBoundaries(rangedStep.doc, granularChange.fromB, granularChange.toB);
// When one side expanded further than the other (e.g. a space
// was inserted mid-word: "altogether" → "all together"), the
// less-expanded side must grow to match — otherwise the renderer
// shows a partial word as plain text next to a deletion/insertion.
// We compare left and right deltas independently so partial
// expansion on one side doesn't prevent the other side from
// being pulled out further.
const aLeftDelta = granularChange.fromA - expandedA.from;
const aRightDelta = expandedA.to - granularChange.toA;
const bLeftDelta = granularChange.fromB - expandedB.from;
const bRightDelta = expandedB.to - granularChange.toB;
let finalA = expandedA;
let finalB = expandedB;
// If A expanded further on either side, nudge B outward
// by the excess and re-expand to snap to word boundaries.
if (aLeftDelta > bLeftDelta || aRightDelta > bRightDelta) {
const extraLeft = Math.max(0, aLeftDelta - bLeftDelta);
const extraRight = Math.max(0, aRightDelta - bRightDelta);
finalB = expandToWordBoundaries(rangedStep.doc, Math.max(expandedB.from - extraLeft, 0), expandedB.to + extraRight);
}
// If B expanded further on either side, nudge A outward.
if (bLeftDelta > aLeftDelta || bRightDelta > aRightDelta) {
const extraLeft = Math.max(0, bLeftDelta - aLeftDelta);
const extraRight = Math.max(0, bRightDelta - aRightDelta);
finalA = expandToWordBoundaries(rangedStep.before, Math.max(expandedA.from - extraLeft, 0), expandedA.to + extraRight);
}
const granularFromA = mapPosition(beforeStepToOriginal, finalA.from);
const granularToA = mapPosition(beforeStepToOriginal, finalA.to);
const granularFromB = mapPosition(afterStepToFinal, finalB.from);
const granularToB = mapPosition(afterStepToFinal, finalB.to);
changes.push({
fromA: granularFromA,
toA: granularToA,
fromB: granularFromB,
toB: granularToB,
deleted: createSpans(Math.max(0, granularToA - granularFromA)),
inserted: createSpans(Math.max(0, granularToB - granularFromB))
});
}
continue;
}
changes.push({
fromA,
toA,
fromB,
toB,
deleted: createSpans(Math.max(0, toA - fromA)),
inserted: createSpans(Math.max(0, toB - fromB))
});
}
return mergeOverlappingByNewDocRange(changes);
};