UNPKG

@wordpress/editor

Version:
743 lines (677 loc) 21.9 kB
/** * External dependencies */ import { diffArrays } from 'diff/lib/diff/array'; import { diffWords } from 'diff/lib/diff/word'; /** * WordPress dependencies */ import { parse as grammarParse } from '@wordpress/block-serialization-default-parser'; import { privateApis as blocksPrivateApis, getBlockType, } from '@wordpress/blocks'; import { RichTextData, create, slice, concat, applyFormat, } from '@wordpress/rich-text'; import { __, _n, sprintf } from '@wordpress/i18n'; /** * Internal dependencies */ import { unlock } from '../../lock-unlock'; const { parseRawBlock } = unlock( blocksPrivateApis ); /** * Safely stringifies a value for display and comparison. * * @param {*} value The value to stringify. * @return {string} The stringified value. */ function stringifyValue( value ) { if ( value === null || value === undefined ) { return ''; } if ( typeof value === 'object' ) { return JSON.stringify( value, null, 2 ); } return String( value ); } /** * Calculate text similarity using word-set overlap. * * Uses a variant of the Jaccard index (https://en.wikipedia.org/wiki/Jaccard_index) * called the overlap coefficient (https://en.wikipedia.org/wiki/Overlap_coefficient) * where we divide by the larger set size rather than the union. This ensures that * a small edit to a long paragraph scores high — the few changed words don't * dilute the score. * * This replaces the previous diffWords-based similarity which was O(n*m) per pair. * The word-set approach is O(n) where n is the number of words. * * Words are extracted using Intl.Segmenter for proper multilingual support * (CJK, Thai, etc.) rather than splitting on whitespace. * * @param {string} text1 First text to compare. * @param {string} text2 Second text to compare. * @return {number} Similarity score between 0 and 1. */ function textSimilarity( text1, text2 ) { if ( ! text1 && ! text2 ) { return 1; } if ( ! text1 || ! text2 ) { return 0; } const segmenter = new Intl.Segmenter( undefined, { granularity: 'word', } ); // Safari's Intl.Segmenter returns isWordLike: false for numeric segments, // so fall back to a Unicode-aware regex for letters and numbers. const wordLikeRegex = /[\p{L}\p{N}]/u; const getWords = ( text ) => { const words = []; for ( const { segment, isWordLike } of segmenter.segment( text ) ) { if ( isWordLike || wordLikeRegex.test( segment ) ) { words.push( segment ); } } return words; }; const words1 = getWords( text1 ); const words2 = getWords( text2 ); if ( words1.length === 0 && words2.length === 0 ) { return 1; } const set1 = new Set( words1 ); let intersection = 0; for ( const word of words2 ) { if ( set1.has( word ) ) { intersection++; } } const total = Math.max( words1.length, words2.length ); return total > 0 ? intersection / total : 0; } /** * Post-process diff result to pair similar removed/added blocks as modifications. * * After LCS diffing, a block whose content changed appears as a separate "removed" * and "added" entry (since the full block signature differs). This function detects * such pairs and merges them into a single "modified" block with inline diff. * * Two pairing strategies are used: * 1. When exactly one block of a given type was removed and one was added, * they are paired directly — no ambiguity, no similarity check needed. * 2. When multiple candidates exist, textSimilarity (overlap coefficient) is * used to find the best match. Blocks must share at least 50% of their * words to be paired, preventing unrelated paragraphs from being merged. * * @param {Array} blocks Raw blocks with diff status. * @return {Array} Blocks with similar pairs converted to modifications. */ function pairSimilarBlocks( blocks ) { const removed = []; const added = []; // Separate blocks by status, tracking original indices. blocks.forEach( ( block, index ) => { const status = block.__revisionDiffStatus?.status; if ( status === 'removed' ) { removed.push( { block, index } ); } else if ( status === 'added' ) { added.push( { block, index } ); } } ); // If no removed or no added, nothing to pair. if ( removed.length === 0 || added.length === 0 ) { return blocks; } const pairedRemoved = new Set(); // Indices of removed blocks filtered out. const pairedAdded = new Set(); // Indices of added blocks filtered out. const modifications = new Map(); // Index → modified block. const SIMILARITY_THRESHOLD = 0.5; // Group candidates by block name for efficient lookup. const addedByName = new Map(); for ( const add of added ) { const name = add.block.blockName; if ( ! addedByName.has( name ) ) { addedByName.set( name, [] ); } addedByName.get( name ).push( add ); } const removedByName = new Map(); for ( const rem of removed ) { const name = rem.block.blockName; if ( ! removedByName.has( name ) ) { removedByName.set( name, [] ); } removedByName.get( name ).push( rem ); } // For each removed block, find best matching added block. // Track the highest added index paired so far — new pairings must // not go backwards, or the removed/added text order would break. let maxPairedAddedIndex = -1; for ( const rem of removed ) { const candidates = addedByName.get( rem.block.blockName ) || []; const sameNameRemoved = removedByName.get( rem.block.blockName ) || []; const unpaired = candidates.filter( ( add ) => ! modifications.has( add.index ) && add.index > maxPairedAddedIndex ); if ( unpaired.length === 0 ) { continue; } let bestMatch = null; // If there's exactly one removed and one added of this type, // pair them directly — no ambiguity, no similarity check needed. if ( sameNameRemoved.length === 1 && unpaired.length === 1 ) { const add = unpaired[ 0 ]; const attrsMatch = JSON.stringify( rem.block.attrs ) === JSON.stringify( add.block.attrs ); // Only skip pairing if both content and attrs are identical // (position swap, not a modification). const contentMatch = ( rem.block.innerHTML || '' ) === ( add.block.innerHTML || '' ); if ( ! contentMatch || ! attrsMatch ) { bestMatch = add; } } else { // Multiple candidates — use similarity to find best match. let bestScore = 0; for ( const add of unpaired ) { const score = textSimilarity( rem.block.innerHTML || '', add.block.innerHTML || '' ); // Skip identical blocks (score=1 with same attrs) — those // are position swaps, not modifications. They should show // as separate removed + added, not as a no-op "modified". const attrsMatch = JSON.stringify( rem.block.attrs ) === JSON.stringify( add.block.attrs ); if ( score > bestScore && score > SIMILARITY_THRESHOLD && ( score < 1 || ! attrsMatch ) ) { bestScore = score; bestMatch = add; } } } if ( bestMatch ) { maxPairedAddedIndex = bestMatch.index; const modifiedBlock = { ...bestMatch.block, __revisionDiffStatus: { status: 'modified' }, __previousRawBlock: rem.block, }; // Decide where to place the modified block by checking // what's between the removed and added positions. // If there are unpaired added blocks between them, // placing at the removed position would put the modified // block before content that comes before it in the // current revision — so use the added position. // Otherwise, use the removed position to keep the // previous revision's order intact. const lo = Math.min( rem.index, bestMatch.index ); const hi = Math.max( rem.index, bestMatch.index ); let hasAddedBetween = false; for ( let i = lo + 1; i < hi; i++ ) { if ( blocks[ i ].__revisionDiffStatus?.status === 'added' && ! pairedAdded.has( i ) ) { hasAddedBetween = true; break; } } if ( hasAddedBetween ) { // Use the added position — don't jump before // current-revision content. modifications.set( bestMatch.index, modifiedBlock ); pairedRemoved.add( rem.index ); } else { // Use the removed position — keep the previous // revision's reading order. modifications.set( rem.index, modifiedBlock ); pairedAdded.add( bestMatch.index ); } } } // Rebuild result: replace modification targets, filter out // their paired counterparts. return blocks .map( ( block, index ) => { if ( pairedRemoved.has( index ) || pairedAdded.has( index ) ) { return null; } if ( modifications.has( index ) ) { return modifications.get( index ); } return block; } ) .filter( Boolean ); } /** * Diff raw block arrays using LCS, recursively handling innerBlocks. * Detects modifications when exactly 1 block is removed and 1 is added * with the same blockName (1:1 replacement = modification). * * @param {Array} currentRaw Current revision's raw blocks. * @param {Array} previousRaw Previous revision's raw blocks. * @return {Array} Merged raw blocks with diff status injected. */ function diffRawBlocks( currentRaw, previousRaw ) { const createBlockSignature = ( rawBlock ) => JSON.stringify( { name: rawBlock.blockName, attrs: rawBlock.attrs, // Use innerContent filtered to non-null and non-whitespace-only strings. // This excludes whitespace between inner blocks which changes based on count. html: ( rawBlock.innerContent || [] ).filter( ( c ) => c !== null && c.trim() !== '' ), } ); const currentSigs = currentRaw.map( createBlockSignature ); const previousSigs = previousRaw.map( createBlockSignature ); const diff = diffArrays( previousSigs, currentSigs ); const result = []; let currIdx = 0; let prevIdx = 0; for ( const part of diff ) { if ( part.added ) { for ( let i = 0; i < part.count; i++ ) { result.push( { ...currentRaw[ currIdx++ ], __revisionDiffStatus: { status: 'added' }, } ); } } else if ( part.removed ) { for ( let i = 0; i < part.count; i++ ) { result.push( { ...previousRaw[ prevIdx++ ], __revisionDiffStatus: { status: 'removed' }, } ); } } else { // Matched blocks - recursively diff their innerBlocks. for ( let i = 0; i < part.count; i++ ) { const currBlock = currentRaw[ currIdx++ ]; const prevBlock = previousRaw[ prevIdx++ ]; // Recursively diff inner blocks. const diffedInnerBlocks = diffRawBlocks( currBlock.innerBlocks || [], prevBlock.innerBlocks || [] ); result.push( { ...currBlock, innerBlocks: diffedInnerBlocks, } ); } } } // Post-process to pair similar removed/added blocks as modifications. return pairSimilarBlocks( result ); } /** * Check if formatting has changed at specific character indices. * * @param {Array} currentFormats Current formats array. * @param {Array} previousFormats Previous formats array. * @param {number} currentIndex Character index in current. * @param {number} previousIndex Character index in previous. * @return {boolean} True if formatting changed at these indices. */ function hasFormatChangedAtIndex( currentFormats, previousFormats, currentIndex, previousIndex ) { const currFmts = currentFormats[ currentIndex ] || []; const prevFmts = previousFormats[ previousIndex ] || []; if ( currFmts.length !== prevFmts.length ) { return true; } // Check if each format in current exists in previous for ( const fmt of currFmts ) { const match = prevFmts.find( ( pf ) => pf.type === fmt.type && JSON.stringify( pf.attributes ) === JSON.stringify( fmt.attributes ) ); if ( ! match ) { return true; } } return false; } /** * Analyze what formatting changed between two character positions. * Returns both the change type (for styling) and a description (for tooltip). * * @param {Array} currentFormats Current formats array. * @param {Array} previousFormats Previous formats array. * @param {number} currIdx Character index in current. * @param {number} prevIdx Character index in previous. * @return {{ type: 'added'|'removed'|'changed', description: string }} Change info. */ function describeFormatChange( currentFormats, previousFormats, currIdx, prevIdx ) { const currFmts = currentFormats[ currIdx ] || []; const prevFmts = previousFormats[ prevIdx ] || []; let addedCount = 0; let removedCount = 0; let changedCount = 0; // Find added formats and attribute changes for ( const fmt of currFmts ) { const match = prevFmts.find( ( pf ) => pf.type === fmt.type ); if ( ! match ) { addedCount++; } else if ( JSON.stringify( fmt.attributes ) !== JSON.stringify( match.attributes ) ) { changedCount++; } } // Find removed formats for ( const fmt of prevFmts ) { const match = currFmts.find( ( cf ) => cf.type === fmt.type ); if ( ! match ) { removedCount++; } } // Determine primary change type for styling if ( addedCount > 0 && removedCount === 0 && changedCount === 0 ) { return { type: 'added', description: sprintf( /* translators: %d: number of formats added */ _n( '%d format added', '%d formats added', addedCount ), addedCount ), }; } if ( removedCount > 0 && addedCount === 0 && changedCount === 0 ) { return { type: 'removed', description: sprintf( /* translators: %d: number of formats removed */ _n( '%d format removed', '%d formats removed', removedCount ), removedCount ), }; } // Mixed or attribute-only changes const parts = []; if ( addedCount > 0 ) { parts.push( sprintf( /* translators: %d: number of formats added */ _n( '%d format added', '%d formats added', addedCount ), addedCount ) ); } if ( removedCount > 0 ) { parts.push( sprintf( /* translators: %d: number of formats removed */ _n( '%d format removed', '%d formats removed', removedCount ), removedCount ) ); } if ( changedCount > 0 ) { parts.push( sprintf( /* translators: %d: number of formats changed */ _n( '%d format changed', '%d formats changed', changedCount ), changedCount ) ); } return { type: 'changed', description: parts.join( ', ' ) || __( 'Formatting changed' ), }; } /** * Apply inline diff formatting comparing two RichTextData values. * - Text changes: apply revision/diff-removed and revision/diff-added formats * - Format-only changes (text unchanged): apply revision/diff-format-changed format * * @param {RichTextData} currentRichText Current revision's rich text. * @param {RichTextData} previousRichText Previous revision's rich text. * @return {RichTextData} New rich text with diff formatting applied. */ function applyRichTextDiff( currentRichText, previousRichText ) { const currentText = currentRichText.toPlainText(); const previousText = previousRichText.toPlainText(); // Diff the plain text (words for cleaner output) const textDiff = diffWords( previousText, currentText ); let result = create( { text: '' } ); let currentIdx = 0; let previousIdx = 0; for ( const part of textDiff ) { if ( part.removed ) { // Text deleted - slice from PREVIOUS, apply <del> const removedSlice = slice( previousRichText, previousIdx, previousIdx + part.value.length ); const formatted = applyFormat( removedSlice, { type: 'revision/diff-removed', attributes: { title: __( 'Removed' ) }, }, 0, part.value.length ); result = concat( result, formatted ); previousIdx += part.value.length; } else if ( part.added ) { // Text added - slice from CURRENT, apply <ins> const addedSlice = slice( currentRichText, currentIdx, currentIdx + part.value.length ); const formatted = applyFormat( addedSlice, { type: 'revision/diff-added', attributes: { title: __( 'Added' ) }, }, 0, part.value.length ); result = concat( result, formatted ); currentIdx += part.value.length; } else { // Text unchanged - check formatting at each character position. // Only apply <mark> to specific ranges where formatting differs. const currentFormats = currentRichText.formats || []; const previousFormats = previousRichText.formats || []; const len = part.value.length; // Helper to check format change at offset within this unchanged part. const checkFormatChanged = ( offset ) => hasFormatChangedAtIndex( currentFormats, previousFormats, currentIdx + offset, previousIdx + offset ); // Find ranges of characters grouped by whether format changed. let rangeStart = 0; let rangeFormatChanged = checkFormatChanged( 0 ); for ( let i = 1; i <= len; i++ ) { const formatChanged = i < len && checkFormatChanged( i ); // When format-changed status changes or we reach the end, emit range. if ( i === len || formatChanged !== rangeFormatChanged ) { const rangeSlice = slice( currentRichText, currentIdx + rangeStart, currentIdx + i ); if ( rangeFormatChanged ) { // Get type and description of what changed const { type, description } = describeFormatChange( currentFormats, previousFormats, currentIdx + rangeStart, previousIdx + rangeStart ); // Map change type to format type for styling const formatType = { added: 'revision/diff-format-added', removed: 'revision/diff-format-removed', changed: 'revision/diff-format-changed', }[ type ]; const marked = applyFormat( rangeSlice, { type: formatType, attributes: { title: description }, }, 0, i - rangeStart ); result = concat( result, marked ); } else { result = concat( result, rangeSlice ); } rangeStart = i; rangeFormatChanged = formatChanged; } } currentIdx += part.value.length; previousIdx += part.value.length; } } return new RichTextData( result ); } /** * Apply diffs to a modified block's attributes. * - Rich-text attributes: applies inline diff formatting (ins/del marks). * - Other attributes: computes word-level diffs for the sidebar panel. * * @param {Object} currentBlock Current parsed block. * @param {Object} previousBlock Previous parsed block. * @param {Object} diffStatus The __revisionDiffStatus object to attach changedAttributes to. */ function applyDiffToBlock( currentBlock, previousBlock, diffStatus ) { const blockType = getBlockType( currentBlock.name ); if ( ! blockType ) { return; } const changedAttributes = {}; for ( const [ attrName, attrDef ] of Object.entries( blockType.attributes ) ) { if ( attrDef.source === 'rich-text' ) { const currentRichText = currentBlock.attributes[ attrName ]; const previousRichText = previousBlock.attributes[ attrName ]; if ( currentRichText instanceof RichTextData && previousRichText instanceof RichTextData ) { currentBlock.attributes[ attrName ] = applyRichTextDiff( currentRichText, previousRichText ); } } else { const currStr = stringifyValue( currentBlock.attributes[ attrName ] ); const prevStr = stringifyValue( previousBlock.attributes[ attrName ] ); if ( currStr !== prevStr ) { changedAttributes[ attrName ] = diffWords( prevStr, currStr ); } } } if ( Object.keys( changedAttributes ).length > 0 ) { diffStatus.changedAttributes = changedAttributes; } } /** * Recursively apply diff status and rich text diff to blocks in the tree. * Copies __revisionDiffStatus from raw blocks to parsed blocks and applies * rich text diffs to modified blocks. * * @param {Object} parsedBlock Parsed block (with inner blocks). * @param {Object} rawBlock Raw block (with __revisionDiffStatus and __previousRawBlock). */ function applyDiffRecursively( parsedBlock, rawBlock ) { // Copy diff status from raw block to parsed block. if ( rawBlock.__revisionDiffStatus ) { // Apply diffs if this block is modified and has a previous raw block. if ( rawBlock.__revisionDiffStatus.status === 'modified' && rawBlock.__previousRawBlock ) { const previousParsed = parseRawBlock( rawBlock.__previousRawBlock ); if ( previousParsed ) { applyDiffToBlock( parsedBlock, previousParsed, rawBlock.__revisionDiffStatus ); } } parsedBlock.__revisionDiffStatus = rawBlock.__revisionDiffStatus; // Also store in attributes so it survives block-editor store normalization. parsedBlock.attributes.__revisionDiffStatus = rawBlock.__revisionDiffStatus; } // Recursively process inner blocks. if ( parsedBlock.innerBlocks && rawBlock.innerBlocks ) { for ( let i = 0; i < parsedBlock.innerBlocks.length; i++ ) { const parsedInner = parsedBlock.innerBlocks[ i ]; const rawInner = rawBlock.innerBlocks[ i ]; if ( parsedInner && rawInner ) { applyDiffRecursively( parsedInner, rawInner ); } } } } /** * Diff two revision contents at the grammar level. * * @param {string} currentContent Current revision's raw content. * @param {string} previousContent Previous revision's raw content. * @return {Array} Array of parsed blocks with diff status attributes. */ export function diffRevisionContent( currentContent, previousContent ) { // Grammar parse both contents. const currentRaw = grammarParse( currentContent || '' ); const previousRaw = grammarParse( previousContent || '' ); // Diff the raw block arrays. const mergedRaw = diffRawBlocks( currentRaw, previousRaw ); // Parse each raw block and apply diff status. return mergedRaw .map( ( rawBlock ) => { const parsed = parseRawBlock( rawBlock ); if ( parsed ) { applyDiffRecursively( parsed, rawBlock ); } return parsed; } ) .filter( Boolean ); }