UNPKG

node-htmldiff

Version:

Diff and markup HTML with <ins> and <del> tags

github.com/idesis-gmbh/htmldiff.js

idesis-gmbh/htmldiff.js

992 lines (913 loc) • 41.4 kB

JavaScript

/** * htmldiff.js is a library that compares HTML content. It creates a diff between two * HTML documents by combining the two documents and wrapping the differences with * <ins> and <del> tags. Here is a high-level overview of how the diff works. * * 1. Tokenize the before and after HTML with htmlToTokens. * 2. Generate a list of operations that convert the before list of tokens to the after * list of tokens with calculateOperations, which does the following: * a. Find all the matching blocks of tokens between the before and after lists of * tokens with findMatchingBlocks. This is done by finding the single longest * matching block with findMatch, then iteratively finding the next longest * matching blocks that precede and follow the longest matching block. * b. Determine insertions, deletions, and replacements from the matching blocks. * This is done in calculateOperations. * 3. Render the list of operations by wrapping tokens with <ins> and <del> tags where * appropriate with renderOperations. * * Example usage: * * var htmldiff = require('htmldiff.js'); * * htmldiff('<p>this is some text</p>', '<p>this is some more text</p>') * == '<p>this is some <ins>more </ins>text</p>' * * htmldiff('<p>this is some text</p>', '<p>this is some more text</p>', 'diff-class') * == '<p>this is some <ins class="diff-class">more </ins>text</p>' */ (function(){ 'use strict'; function isEndOfTag(char){ return char === '>'; } function isStartOfTag(char){ return char === '<'; } function isWhitespace(char){ return /^\s+$/.test(char); } /** * Determines if the given token is a tag. * * @param {string} token The token in question. * * @return {boolean|string} False if the token is not a tag, or the tag name otherwise. */ function isTag(token){ var match = token.match(/^\s*<([^!>][^>]*)>\s*$/); return !!match && match[1].trim().split(' ')[0]; } function isntTag(token){ return !isTag(token); } function isStartofHTMLComment(word){ return /^<!--/.test(word); } function isEndOfHTMLComment(word){ return /--\>$/.test(word); } /** * Regular expression to check atomic tags. * @see function diff. */ var atomicTagsRegExp; // Added head and style (for style tags inside the body) var defaultAtomicTagsRegExp = new RegExp('^<(iframe|object|math|svg|script|video|head|style|a)'); /** * Checks if the current word is the beginning of an atomic tag. An atomic tag is one whose * child nodes should not be compared - the entire tag should be treated as one token. This * is useful for tags where it does not make sense to insert <ins> and <del> tags. * * @param {string} word The characters of the current token read so far. * * @return {string|null} The name of the atomic tag if the word will be an atomic tag, * null otherwise */ function isStartOfAtomicTag(word){ var result = atomicTagsRegExp.exec(word); return result && result[1]; } /** * Checks if the current word is the end of an atomic tag (i.e. it has all the characters, * except for the end bracket of the closing tag, such as '<iframe></iframe'). * * @param {string} word The characters of the current token read so far. * @param {string} tag The ending tag to look for. * * @return {boolean} True if the word is now a complete token (including the end tag), * false otherwise. */ function isEndOfAtomicTag(word, tag){ return word.substring(word.length - tag.length - 2) === ('</' + tag); } /** * Checks if a tag is a void tag. * * @param {string} token The token to check. * * @return {boolean} True if the token is a void tag, false otherwise. */ function isVoidTag(token){ return /^\s*<[^>]+\/>\s*$/.test(token); } /** * Checks if a token can be wrapped inside a tag. * * @param {string} token The token to check. * * @return {boolean} True if the token can be wrapped inside a tag, false otherwise. */ function isWrappable(token){ var is_img = /^<img[\s>]/.test(token); return is_img|| isntTag(token) || isStartOfAtomicTag(token) || isVoidTag(token); } /** * Creates a token that holds a string and key representation. The key is used for diffing * comparisons and the string is used to recompose the document after the diff is complete. * * @param {string} currentWord The section of the document to create a token for. * * @return {Object} A token object with a string and key property. */ function createToken(currentWord){ return { string: currentWord, key: getKeyForToken(currentWord) }; } /** * A Match stores the information of a matching block. A matching block is a list of * consecutive tokens that appear in both the before and after lists of tokens. * * @param {number} startInBefore The index of the first token in the list of before tokens. * @param {number} startInAfter The index of the first token in the list of after tokens. * @param {number} length The number of consecutive matching tokens in this block. * @param {Segment} segment The segment where the match was found. */ function Match(startInBefore, startInAfter, length, segment){ this.segment = segment; this.length = length; this.startInBefore = startInBefore + segment.beforeIndex; this.startInAfter = startInAfter + segment.afterIndex; this.endInBefore = this.startInBefore + this.length - 1; this.endInAfter = this.startInAfter + this.length - 1; this.segmentStartInBefore = startInBefore; this.segmentStartInAfter = startInAfter; this.segmentEndInBefore = (this.segmentStartInBefore + this.length) - 1; this.segmentEndInAfter = (this.segmentStartInAfter + this.length) - 1; } /** * Tokenizes a string of HTML. * * @param {string} html The string to tokenize. * * @return {Array.<string>} The list of tokens. */ function htmlToTokens(html){ var mode = 'char'; var currentWord = ''; var currentAtomicTag = ''; var words = []; for (var i = 0; i < html.length; i++){ var char = html[i]; switch (mode){ case 'tag': var atomicTag = isStartOfAtomicTag(currentWord); if (atomicTag){ mode = 'atomic_tag'; currentAtomicTag = atomicTag; currentWord += char; } else if (isStartofHTMLComment(currentWord)){ mode = 'html_comment'; currentWord += char; } else if (isEndOfTag(char)){ currentWord += '>'; words.push(createToken(currentWord)); currentWord = ''; if (isWhitespace(char)){ mode = 'whitespace'; } else { mode = 'char'; } } else { currentWord += char; } break; case 'atomic_tag': if (isEndOfTag(char) && isEndOfAtomicTag(currentWord, currentAtomicTag)){ currentWord += '>'; words.push(createToken(currentWord)); currentWord = ''; currentAtomicTag = ''; mode = 'char'; } else { currentWord += char; } break; case 'html_comment': currentWord += char; if (isEndOfHTMLComment(currentWord)){ currentWord = ''; mode = 'char'; } break; case 'char': if (isStartOfTag(char)){ if (currentWord){ words.push(createToken(currentWord)); } currentWord = '<'; mode = 'tag'; } else if (/\s/.test(char)){ if (currentWord){ words.push(createToken(currentWord)); } currentWord = char; mode = 'whitespace'; } else if (/[\w\d\#@]/.test(char)){ currentWord += char; } else if (/&/.test(char)){ if (currentWord){ words.push(createToken(currentWord)); } currentWord = char; } else { currentWord += char; words.push(createToken(currentWord)); currentWord = ''; } break; case 'whitespace': if (isStartOfTag(char)){ if (currentWord){ words.push(createToken(currentWord)); } currentWord = '<'; mode = 'tag'; } else if (isWhitespace(char)){ currentWord += char; } else { if (currentWord){ words.push(createToken(currentWord)); } currentWord = char; mode = 'char'; } break; default: throw new Error('Unknown mode ' + mode); } } if (currentWord){ words.push(createToken(currentWord)); } return words; } /** * Creates a key that should be used to match tokens. This is useful, for example, if we want * to consider two open tag tokens as equal, even if they don't have the same attributes. We * use a key instead of overwriting the token because we may want to render the original string * without losing the attributes. * * @param {string} token The token to create the key for. * * @return {string} The identifying key that should be used to match before and after tokens. */ function getKeyForToken(token){ // If the token is an image element, grab it's src attribute to include in the key. var img = /^<img.*src=['"]([^"']*)['"].*>$/.exec(token); if (img) { return '<img src="' + img[1] + '">'; } // If the token is an a element, grab it's data attribute to include in the key. var a = /^<a.*href=['"]([^"']*)['"]/.exec(token); if (a) { return '<a href="' + a[1] + '"></a>'; } // If the token is an object element, grab it's data attribute to include in the key. var object = /^<object.*data=['"]([^"']*)['"]/.exec(token); if (object) { return '<object src="' + object[1] + '"></object>'; } // If it's a video, math or svg element, the entire token should be compared except the // data-uuid. if(/^<(svg|math|video)[\s>]/.test(token)) { var uuid = token.indexOf('data-uuid="'); if (uuid !== -1) { var start = token.slice(0, uuid); var end = token.slice(uuid + 44); return start + end; } else { return token; } } // If the token is an iframe element, grab it's src attribute to include in it's key. var iframe = /^<iframe.*src=['"]([^"']*)['"].*>/.exec(token); if (iframe) { return '<iframe src="' + iframe[1] + '"></iframe>'; } // If the token is any other element, just grab the tag name. var tagName = /<([^\s>]+)[\s>]/.exec(token); if (tagName){ return '<' + (tagName[1].toLowerCase()) + '>'; } // Otherwise, the token is text, collapse the whitespace. if (token) { return token.replace(/(\s+| | )/g, ' '); } return token; } /** * Creates a map from token key to an array of indices of locations of the matching token in * the list of all tokens. * * @param {Array.<string>} tokens The list of tokens to be mapped. * * @return {Object} A mapping that can be used to search for tokens. */ function createMap(tokens){ return tokens.reduce(function(map, token, index){ if (map[token.key]){ map[token.key].push(index); } else { map[token.key] = [index]; } return map; }, Object.create(null)); } /** * Compares two match objects to determine if the second match object comes before or after the * first match object. Returns -1 if the m2 should come before m1. Returns 1 if m1 should come * before m2. If the two matches criss-cross each other, a null is returned. * * @param {Match} m1 The first match object to compare. * @param {Match} m2 The second match object to compare. * * @return {number} Returns -1 if the m2 should come before m1. Returns 1 if m1 should come * before m2. If the two matches criss-cross each other, 0 is returned. */ function compareMatches(m1, m2){ if (m2.endInBefore < m1.startInBefore && m2.endInAfter < m1.startInAfter){ return -1; } else if (m2.startInBefore > m1.endInBefore && m2.startInAfter > m1.endInAfter){ return 1; } else { return 0; } } /** * A constructor for a binary search tree used to keep match objects in the proper order as * they're found. * * @constructor */ function MatchBinarySearchTree(){ this._root = null; } MatchBinarySearchTree.prototype = { /** * Adds matches to the binary search tree. * * @param {Match} value The match to add to the binary search tree. */ add: function (value){ // Create the node to hold the match value. var node = { value: value, left: null, right: null }; var current = this._root; if(current){ while (true){ // Determine if the match value should go to the left or right of the current // node. var position = compareMatches(current.value, value); if (position === -1){ // The position of the match is to the left of this node. if (current.left){ current = current.left; } else { current.left = node; break; } } else if (position === 1){ // The position of the match is to the right of this node. if (current.right){ current = current.right; } else { current.right = node; break; } } else { // If 0 was returned from compareMatches, that means the node cannot // be inserted because it overlaps an existing node. break; } } } else { // If no nodes exist in the tree, make this the root node. this._root = node; } }, /** * Converts the binary search tree into an array using an in-order traversal. * * @return {Array.<Match>} An array containing the matches in the binary search tree. */ toArray: function(){ function inOrder(node, nodes){ if (node){ inOrder(node.left, nodes); nodes.push(node.value); inOrder(node.right, nodes); } return nodes; } return inOrder(this._root, []); } }; /** * Finds and returns the best match between the before and after arrays contained in the segment * provided. * * @param {Segment} segment The segment in which to look for a match. * * @return {Match} The best match. */ function findBestMatch(segment){ var beforeTokens = segment.beforeTokens; var afterMap = segment.afterMap; var lastSpace = null; var bestMatch = null; // Iterate through the entirety of the beforeTokens to find the best match. for (var beforeIndex = 0; beforeIndex < beforeTokens.length; beforeIndex++){ var lookBehind = false; // If the current best match is longer than the remaining tokens, we can bail because we // won't find a better match. var remainingTokens = beforeTokens.length - beforeIndex; if (bestMatch && remainingTokens < bestMatch.length){ break; } // If the current token is whitespace, make a note of it and move on. Trying to start a // set of matches with whitespace is not efficient because it's too prevelant in most // documents. Instead, if the next token yields a match, we'll see if the whitespace can // be included in that match. var beforeToken = beforeTokens[beforeIndex]; if (beforeToken.key === ' '){ lastSpace = beforeIndex; continue; } // Check to see if we just skipped a space, if so, we'll ask getFullMatch to look behind // by one token to see if it can include the whitespace. if (lastSpace === beforeIndex - 1){ lookBehind = true; } // If the current token is not found in the afterTokens, it won't match and we can move // on. var afterTokenLocations = afterMap[beforeToken.key]; if(!afterTokenLocations){ continue; } // For each instance of the current token in afterTokens, let's see how big of a match // we can build. afterTokenLocations.forEach(function(afterIndex){ // getFullMatch will see how far the current token match will go in both // beforeTokens and afterTokens. var bestMatchLength = bestMatch ? bestMatch.length : 0; var match = getFullMatch( segment, beforeIndex, afterIndex, bestMatchLength, lookBehind); // If we got a new best match, we'll save it aside. if (match && match.length > bestMatchLength){ bestMatch = match; } }); } return bestMatch; } /** * Takes the start of a match, and expands it in the beforeTokens and afterTokens of the * current segment as far as it can go. * * @param {Segment} segment The segment object to search within when expanding the match. * @param {number} beforeStart The offset within beforeTokens to start looking. * @param {number} afterStart The offset within afterTokens to start looking. * @param {number} minLength The minimum length match that must be found. * @param {boolean} lookBehind If true, attempt to match a whitespace token just before the * beforeStart and afterStart tokens. * * @return {Match} The full match. */ function getFullMatch(segment, beforeStart, afterStart, minLength, lookBehind){ var beforeTokens = segment.beforeTokens; var afterTokens = segment.afterTokens; // If we already have a match that goes to the end of the document, no need to keep looking. var minBeforeIndex = beforeStart + minLength; var minAfterIndex = afterStart + minLength; if(minBeforeIndex >= beforeTokens.length || minAfterIndex >= afterTokens.length){ return; } // If a minLength was provided, we can do a quick check to see if the tokens after that // length match. If not, we won't be beating the previous best match, and we can bail out // early. if (minLength){ var nextBeforeWord = beforeTokens[minBeforeIndex].key; var nextAfterWord = afterTokens[minAfterIndex].key; if (nextBeforeWord !== nextAfterWord){ return; } } // Extend the current match as far foward as it can go, without overflowing beforeTokens or // afterTokens. var searching = true; var currentLength = 1; var beforeIndex = beforeStart + currentLength; var afterIndex = afterStart + currentLength; while (searching && beforeIndex < beforeTokens.length && afterIndex < afterTokens.length){ var beforeWord = beforeTokens[beforeIndex].key; var afterWord = afterTokens[afterIndex].key; if (beforeWord === afterWord){ currentLength++; beforeIndex = beforeStart + currentLength; afterIndex = afterStart + currentLength; } else { searching = false; } } // If we've been asked to look behind, it's because both beforeTokens and afterTokens may // have a whitespace token just behind the current match that was previously ignored. If so, // we'll expand the current match to include it. if (lookBehind && beforeStart > 0 && afterStart > 0){ var prevBeforeKey = beforeTokens[beforeStart - 1].key; var prevAfterKey = afterTokens[afterStart - 1].key; if (prevBeforeKey === ' ' && prevAfterKey === ' '){ beforeStart--; afterStart--; currentLength++; } } return new Match(beforeStart, afterStart, currentLength, segment); } /** * Creates segment objects from the original document that can be used to restrict the area that * findBestMatch and it's helper functions search to increase performance. * * @param {Array.<Token>} beforeTokens Tokens from the before document. * @param {Array.<Token>} afterTokens Tokens from the after document. * @param {number} beforeIndex The index within the before document where this segment begins. * @param {number} afterIndex The index within the after document where this segment behinds. * * @return {Segment} The segment object. */ function createSegment(beforeTokens, afterTokens, beforeIndex, afterIndex){ return { beforeTokens: beforeTokens, afterTokens: afterTokens, beforeMap: createMap(beforeTokens), afterMap: createMap(afterTokens), beforeIndex: beforeIndex, afterIndex: afterIndex }; } /** * Finds all the matching blocks within the given segment in the before and after lists of * tokens. * * @param {Segment} The segment that should be searched for matching blocks. * * @return {Array.<Match>} The list of matching blocks in this range. */ function findMatchingBlocks(segment){ // Create a binary search tree to hold the matches we find in order. var matches = new MatchBinarySearchTree(); var match; var segments = [segment]; // Each time the best match is found in a segment, zero, one or two new segments may be // created from the parts of the original segment not included in the match. We will // continue to iterate until all segments have been processed. while(segments.length){ segment = segments.pop(); match = findBestMatch(segment); if (match && match.length){ // If there's an unmatched area at the start of the segment, create a new segment // from that area and throw it into the segments array to get processed. if (match.segmentStartInBefore > 0 && match.segmentStartInAfter > 0){ var leftBeforeTokens = segment.beforeTokens.slice( 0, match.segmentStartInBefore); var leftAfterTokens = segment.afterTokens.slice(0, match.segmentStartInAfter); segments.push(createSegment(leftBeforeTokens, leftAfterTokens, segment.beforeIndex, segment.afterIndex)); } // If there's an unmatched area at the end of the segment, create a new segment from that // area and throw it into the segments array to get processed. var rightBeforeTokens = segment.beforeTokens.slice(match.segmentEndInBefore + 1); var rightAfterTokens = segment.afterTokens.slice(match.segmentEndInAfter + 1); var rightBeforeIndex = segment.beforeIndex + match.segmentEndInBefore + 1; var rightAfterIndex = segment.afterIndex + match.segmentEndInAfter + 1; if (rightBeforeTokens.length && rightAfterTokens.length){ segments.push(createSegment(rightBeforeTokens, rightAfterTokens, rightBeforeIndex, rightAfterIndex)); } matches.add(match); } } return matches.toArray(); } /** * Gets a list of operations required to transform the before list of tokens into the * after list of tokens. An operation describes whether a particular list of consecutive * tokens are equal, replaced, inserted, or deleted. * * @param {Array.<string>} beforeTokens The before list of tokens. * @param {Array.<string>} afterTokens The after list of tokens. * * @return {Array.<Object>} The list of operations to transform the before list of * tokens into the after list of tokens, where each operation has the following * keys: * - {string} action One of {'replace', 'insert', 'delete', 'equal'}. * - {number} startInBefore The beginning of the range in the list of before tokens. * - {number} endInBefore The end of the range in the list of before tokens. * - {number} startInAfter The beginning of the range in the list of after tokens. * - {number} endInAfter The end of the range in the list of after tokens. */ function calculateOperations(beforeTokens, afterTokens){ if (!beforeTokens) throw new Error('Missing beforeTokens'); if (!afterTokens) throw new Error('Missing afterTokens'); var positionInBefore = 0; var positionInAfter = 0; var operations = []; var segment = createSegment(beforeTokens, afterTokens, 0, 0); var matches = findMatchingBlocks(segment); matches.push(new Match(beforeTokens.length, afterTokens.length, 0, segment)); for (var index = 0; index < matches.length; index++){ var match = matches[index]; var actionUpToMatchPositions = 'none'; if (positionInBefore === match.startInBefore){ if (positionInAfter !== match.startInAfter){ actionUpToMatchPositions = 'insert'; } } else { actionUpToMatchPositions = 'delete'; if (positionInAfter !== match.startInAfter){ actionUpToMatchPositions = 'replace'; } } if (actionUpToMatchPositions !== 'none'){ operations.push({ action: actionUpToMatchPositions, startInBefore: positionInBefore, endInBefore: (actionUpToMatchPositions !== 'insert' ? match.startInBefore - 1 : null), startInAfter: positionInAfter, endInAfter: (actionUpToMatchPositions !== 'delete' ? match.startInAfter - 1 : null) }); } if (match.length !== 0){ operations.push({ action: 'equal', startInBefore: match.startInBefore, endInBefore: match.endInBefore, startInAfter: match.startInAfter, endInAfter: match.endInAfter }); } positionInBefore = match.endInBefore + 1; positionInAfter = match.endInAfter + 1; } var postProcessed = []; var lastOp = {action: 'none'}; function isSingleWhitespace(op){ if (op.action !== 'equal'){ return false; } if (op.endInBefore - op.startInBefore !== 0){ return false; } return /^\s$/.test(beforeTokens.slice(op.startInBefore, op.endInBefore + 1)); } for (var i = 0; i < operations.length; i++){ var op = operations[i]; if ((isSingleWhitespace(op) && lastOp.action === 'replace') || (op.action === 'replace' && lastOp.action === 'replace')){ lastOp.endInBefore = op.endInBefore; lastOp.endInAfter = op.endInAfter; } else { postProcessed.push(op); lastOp = op; } } return postProcessed; } /** * A TokenWrapper provides a utility for grouping segments of tokens based on whether they're * wrappable or not. A tag is considered wrappable if it is closed within the given set of * tokens. For example, given the following tokens: * * ['</b>', 'this', ' ', 'is', ' ', 'a', ' ', '<b>', 'test', '</b>', '!'] * * The first '</b>' is not considered wrappable since the tag is not fully contained within the * array of tokens. The '<b>', 'test', and '</b>' would be a part of the same wrappable segment * since the entire bold tag is within the set of tokens. * * TokenWrapper has a method 'combine' which allows walking over the segments to wrap them in * tags. */ function TokenWrapper(tokens){ this.tokens = tokens; this.notes = tokens.reduce(function(data, token, index){ data.notes.push({ isWrappable: isWrappable(token), insertedTag: false }); var tag = !isVoidTag(token) && isTag(token); var lastEntry = data.tagStack[data.tagStack.length - 1]; if (tag){ if (lastEntry && '/' + lastEntry.tag === tag){ data.notes[lastEntry.position].insertedTag = true; data.tagStack.pop(); } else { data.tagStack.push({ tag: tag, position: index }); } } return data; }, {notes: [], tagStack: []}).notes; } /** * Wraps the contained tokens in tags based on output given by a map function. Each segment of * tokens will be visited. A segment is a continuous run of either all wrappable * tokens or unwrappable tokens. The given map function will be called with each segment of * tokens and the resulting strings will be combined to form the wrapped HTML. * * @param {function(boolean, Array.<string>)} mapFn A function called with an array of tokens * and whether those tokens are wrappable or not. The result should be a string. */ TokenWrapper.prototype.combine = function(mapFn, tagFn){ var notes = this.notes; var tokens = this.tokens.slice(); var segments = tokens.reduce(function(data, token, index){ if (notes[index].insertedTag){ tokens[index] = tagFn(tokens[index]); } if (data.status === null){ data.status = notes[index].isWrappable; } var status = notes[index].isWrappable; if (status !== data.status){ data.list.push({ isWrappable: data.status, tokens: tokens.slice(data.lastIndex, index) }); data.lastIndex = index; data.status = status; } if (index === tokens.length - 1){ data.list.push({ isWrappable: data.status, tokens: tokens.slice(data.lastIndex, index + 1) }); } return data; }, {list: [], status: null, lastIndex: 0}).list; return segments.map(mapFn).join(''); }; /** * Wraps and concatenates a list of tokens with a tag. Does not wrap tag tokens, * unless they are wrappable (i.e. void and atomic tags). * * @param {sting} tag The tag name of the wrapper tags. * @param {Array.<string>} content The list of tokens to wrap. * @param {string} dataPrefix (Optional) The prefix to use in data attributes. * @param {string} className (Optional) The class name to include in the wrapper tag. */ function wrap(tag, content, opIndex, dataPrefix, className){ var wrapper = new TokenWrapper(content); dataPrefix = dataPrefix ? dataPrefix + '-' : ''; var attrs = ' data-' + dataPrefix + 'operation-index="' + opIndex + '"'; if (className){ attrs += ' class="' + className + '"'; } return wrapper.combine(function(segment){ if (segment.isWrappable){ var val = segment.tokens.join(''); if (val.trim()){ return '<' + tag + attrs + '>' + val + '</' + tag + '>'; } } else { return segment.tokens.join(''); } return ''; }, function(openingTag){ var dataAttrs = ' data-diff-node="' + tag + '"'; dataAttrs += ' data-' + dataPrefix + 'operation-index="' + opIndex + '"'; return openingTag.replace(/>\s*$/, dataAttrs + '$&'); }); } /** * OPS.equal/insert/delete/replace are functions that render an operation into * HTML content. * * @param {Object} op The operation that applies to a prticular list of tokens. Has the * following keys: * - {string} action One of ['replace', 'insert', 'delete', 'equal']. * - {number} startInBefore The beginning of the range in the list of before tokens. * - {number} endInBefore The end of the range in the list of before tokens. * - {number} startInAfter The beginning of the range in the list of after tokens. * - {number} endInAfter The end of the range in the list of after tokens. * @param {Array.<string>} beforeTokens The before list of tokens. * @param {Array.<string>} afterTokens The after list of tokens. * @param {number} opIndex The index into the list of operations that identifies the change to * be rendered. This is used to mark wrapped HTML as part of the same operation. * @param {string} dataPrefix (Optional) The prefix to use in data attributes. * @param {string} className (Optional) The class name to include in the wrapper tag. * * @return {string} The rendering of that operation. */ var OPS = { 'equal': function(op, beforeTokens, afterTokens, opIndex, dataPrefix, className){ var tokens = afterTokens.slice(op.startInAfter, op.endInAfter + 1); return tokens.reduce(function(prev, curr){ return prev + curr.string; }, ''); }, 'insert': function(op, beforeTokens, afterTokens, opIndex, dataPrefix, className){ var tokens = afterTokens.slice(op.startInAfter, op.endInAfter + 1); var val = tokens.map(function(token){ return token.string; }); return wrap('ins', val, opIndex, dataPrefix, className); }, 'delete': function(op, beforeTokens, afterTokens, opIndex, dataPrefix, className){ var tokens = beforeTokens.slice(op.startInBefore, op.endInBefore + 1); var val = tokens.map(function(token){ return token.string; }); return wrap('del', val, opIndex, dataPrefix, className); }, 'replace': function(){ return OPS['delete'].apply(null, arguments) + OPS['insert'].apply(null, arguments); } }; /** * Renders a list of operations into HTML content. The result is the combined version * of the before and after tokens with the differences wrapped in tags. * * @param {Array.<string>} beforeTokens The before list of tokens. * @param {Array.<string>} afterTokens The after list of tokens. * @param {Array.<Object>} operations The list of operations to transform the before * list of tokens into the after list of tokens, where each operation has the * following keys: * - {string} action One of {'replace', 'insert', 'delete', 'equal'}. * - {number} startInBefore The beginning of the range in the list of before tokens. * - {number} endInBefore The end of the range in the list of before tokens. * - {number} startInAfter The beginning of the range in the list of after tokens. * - {number} endInAfter The end of the range in the list of after tokens. * @param {string} dataPrefix (Optional) The prefix to use in data attributes. * @param {string} className (Optional) The class name to include in the wrapper tag. * * @return {string} The rendering of the list of operations. */ function renderOperations(beforeTokens, afterTokens, operations, dataPrefix, className){ return operations.reduce(function(rendering, op, index){ return rendering + OPS[op.action]( op, beforeTokens, afterTokens, index, dataPrefix, className); }, ''); } /** * Compares two pieces of HTML content and returns the combined content with differences * wrapped in <ins> and <del> tags. * * @param {string} before The HTML content before the changes. * @param {string} after The HTML content after the changes. * @param {string} className (Optional) The class attribute to include in <ins> and <del> tags. * @param {string} dataPrefix (Optional) The data prefix to use for data attributes. The * operation index data attribute will be named `data-${dataPrefix-}operation-index`. * @param {string} atomicTags (Optional) Comma separated list of atomic tag names. The * list has to be in the form `tag1,tag2,...` e. g. `head,script,style`. If not used, * the default list `iframe,object,math,svg,script,video,head,style` will be used. * * @return {string} The combined HTML content with differences wrapped in <ins> and <del> tags. */ function diff(before, after, className, dataPrefix, atomicTags){ if (before === after) return before; // Enable user provided atomic tag list. atomicTags ? (atomicTagsRegExp = new RegExp('^<(' + atomicTags.replace(/\s*/g, '').replace(/,/g, '|') + ')')) : (atomicTagsRegExp = defaultAtomicTagsRegExp); before = htmlToTokens(before); after = htmlToTokens(after); var ops = calculateOperations(before, after); return renderOperations(before, after, ops, dataPrefix, className); } diff.htmlToTokens = htmlToTokens; diff.findMatchingBlocks = findMatchingBlocks; findMatchingBlocks.findBestMatch = findBestMatch; findMatchingBlocks.createMap = createMap; findMatchingBlocks.createToken = createToken; findMatchingBlocks.createSegment = createSegment; findMatchingBlocks.getKeyForToken = getKeyForToken; diff.calculateOperations = calculateOperations; diff.renderOperations = renderOperations; if (typeof define === 'function'){ define([], function(){ return diff; }); } else if (typeof module !== 'undefined' && module !== null){ module.exports = diff; } else { this.htmldiff = diff; } }).call(this);