UNPKG

@syncfusion/ej2-richtexteditor

Version:
1,005 lines (1,004 loc) 84.9 kB
import * as EVENTS from '../../common/constant'; import { createElement, isNullOrUndefined as isNOU, detach, addClass, Browser } from '@syncfusion/ej2-base'; import { PASTE_SOURCE } from '../base/constant'; import { InsertMethods } from './insert-methods'; /** * PasteCleanup for MsWord content * * @hidden */ var MsWordPaste = /** @class */ (function () { /** * Initializes a new instance of the MsWordPaste class * * @param {EditorManager} parent - The parent editor manager instance * @returns {void} - No return value */ function MsWordPaste(parent) { this.olData = [ 'decimal', 'decimal-leading-zero', 'lower-alpha', 'lower-roman', 'upper-alpha', 'upper-roman', 'lower-greek' ]; this.ulData = [ 'disc', 'square', 'circle', 'disc', 'square', 'circle' ]; /** List of HTML node names that should not be ignored during cleanup */ this.ignorableNodes = ['A', 'APPLET', 'B', 'BLOCKQUOTE', 'BR', 'BUTTON', 'CENTER', 'CODE', 'COL', 'COLGROUP', 'DD', 'DEL', 'DFN', 'DIR', 'DIV', 'DL', 'DT', 'EM', 'FIELDSET', 'FONT', 'FORM', 'FRAME', 'FRAMESET', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'HR', 'I', 'IMG', 'IFRAME', 'INPUT', 'INS', 'LABEL', 'LI', 'OL', 'OPTION', 'P', 'PARAM', 'PRE', 'Q', 'S', 'SELECT', 'SPAN', 'STRIKE', 'STRONG', 'SUB', 'SUP', 'TABLE', 'TBODY', 'TD', 'TEXTAREA', 'TFOOT', 'TH', 'THEAD', 'TITLE', 'TR', 'TT', 'U', 'UL']; /** List of HTML block node names */ this.blockNode = ['div', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'address', 'blockquote', 'button', 'center', 'dd', 'dir', 'dl', 'dt', 'fieldset', 'frameset', 'hr', 'iframe', 'isindex', 'li', 'map', 'menu', 'noframes', 'noscript', 'object', 'ol', 'pre', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'ul', 'header', 'article', 'nav', 'footer', 'section', 'aside', 'main', 'figure', 'figcaption']; this.borderStyle = ['border-top', 'border-right', 'border-bottom', 'border-left']; this.upperRomanNumber = ['I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X', 'XI', 'XII', 'XIII', 'XIV', 'XV', 'XVI', 'XVII', 'XVIII', 'XIX', 'XX']; this.lowerRomanNumber = ['i', 'ii', 'iii', 'iv', 'v', 'vi', 'vii', 'viii', 'ix', 'x', 'xi', 'xii', 'xiii', 'xiv', 'xv', 'xvi', 'xvii', 'xviii', 'xix', 'xx']; this.lowerGreekNumber = ['α', 'β', 'γ', 'δ', 'ε', 'ζ', 'η', 'θ', 'ι', 'κ', 'λ', 'μ', 'ν', 'ξ', 'ο', 'π', 'ρ', 'σ', 'τ', 'υ', 'φ', 'χ', 'ψ', 'ω']; this.removableElements = ['o:p', 'style', 'w:sdt']; this.listContents = []; this.cropImageDimensions = []; this.parent = parent; this.addEventListener(); } MsWordPaste.prototype.addEventListener = function () { this.parent.observer.on(EVENTS.MS_WORD_CLEANUP_PLUGIN, this.wordCleanup, this); this.parent.observer.on(EVENTS.INTERNAL_DESTROY, this.destroy, this); }; MsWordPaste.prototype.removeEventListener = function () { this.parent.observer.off(EVENTS.MS_WORD_CLEANUP_PLUGIN, this.wordCleanup); this.parent.observer.off(EVENTS.INTERNAL_DESTROY, this.destroy); }; /* Cleans up MS Word content from clipboard data */ MsWordPaste.prototype.wordCleanup = function (notifyArgs) { var wordPasteStyleConfig = !isNOU(notifyArgs.allowedStylePropertiesArray) ? notifyArgs.allowedStylePropertiesArray : []; var listNodes = []; var clipboardHtmlContent = notifyArgs.args.clipboardData.getData('text/HTML'); var rtfData = notifyArgs.args.clipboardData.getData('text/rtf'); var clipboardDataElement = createElement('p'); clipboardDataElement.setAttribute('id', 'MSWord-Content'); clipboardDataElement.innerHTML = clipboardHtmlContent; this.addDoubleBr(clipboardDataElement); var msoClassSingleQuotePattern = /class='?Mso|style='[^ ]*\bmso-/i; var msoClassDoubleQuotePattern = /class="?Mso|style="[^ ]*\bmso-/i; var msoComplexPattern = /(class="?Mso|class='?Mso|class="?Xl|class='?Xl|class=Xl|style="[^"]*\bmso-|style='[^']*\bmso-|w:WordDocument)/gi; var msoWidthSourcePattern = /style='mso-width-source:/i; var contentSource = this.findSource(clipboardDataElement); if (msoClassSingleQuotePattern.test(clipboardHtmlContent) || msoClassDoubleQuotePattern.test(clipboardHtmlContent) || msoComplexPattern.test(clipboardHtmlContent) || msoWidthSourcePattern.test(clipboardHtmlContent)) { clipboardHtmlContent = clipboardHtmlContent.replace(/<img[^>]+>/i, ''); this.addListClass(clipboardDataElement); listNodes = this.listCleanUp(clipboardDataElement, listNodes); if (!isNOU(listNodes[0]) && listNodes[0].parentElement.tagName !== 'UL' && listNodes[0].parentElement.tagName !== 'OL') { this.listConverter(listNodes); } this.imageConversion(clipboardDataElement, rtfData); this.cleanList(clipboardDataElement, 'UL'); this.cleanList(clipboardDataElement, 'OL'); this.styleCorrection(clipboardDataElement, wordPasteStyleConfig); this.removingComments(clipboardDataElement); this.removeUnwantedElements(clipboardDataElement); this.removeEmptyElements(clipboardDataElement); this.removeEmptyAnchorTag(clipboardDataElement); this.breakLineAddition(clipboardDataElement); this.processMargin(clipboardDataElement); this.removeClassName(clipboardDataElement); if (msoWidthSourcePattern.test(clipboardHtmlContent)) { this.addTableBorderClass(clipboardDataElement); } notifyArgs.callBack(clipboardDataElement.innerHTML, this.cropImageDimensions, contentSource); } else { if (contentSource === PASTE_SOURCE[2]) { this.handleOneNoteContent(clipboardDataElement); } this.removeEmptyMetaTags(clipboardDataElement); notifyArgs.callBack(clipboardDataElement.innerHTML, null, contentSource); } }; /* Adds double line breaks for Apple-interchange-newline elements in Chrome. */ MsWordPaste.prototype.addDoubleBr = function (clipboardDataElement) { var newlineElement = clipboardDataElement.querySelector('.Apple-interchange-newline'); var isValidNewline = !isNOU(newlineElement) && Browser.userAgent.indexOf('Chrome') !== -1 && newlineElement.parentElement.nodeName === 'P' && clipboardDataElement !== newlineElement.parentElement; if (isValidNewline) { for (var i = 0; i < clipboardDataElement.childNodes.length; i++) { var currentNode = clipboardDataElement.childNodes[i]; var isStartFragment = currentNode.nodeType === Node.COMMENT_NODE && currentNode.nodeValue.indexOf('StartFragment') !== -1; if (isStartFragment) { var paragraphElement = createElement('p'); paragraphElement.innerHTML = '<br>'; var parentStyles = newlineElement.parentElement.style.cssText; var currentStyles = paragraphElement.getAttribute('style') || ''; var combinedStyles = currentStyles + parentStyles; paragraphElement.style.cssText = combinedStyles; clipboardDataElement.insertBefore(paragraphElement, currentNode.nextSibling); detach(newlineElement); break; } } } }; /* Cleans list elements by removing div elements and restructuring the list */ MsWordPaste.prototype.cleanList = function (clipboardDataElement, listTagName) { var divElements = clipboardDataElement.querySelectorAll(listTagName + ' div'); for (var i = divElements.length - 1; i >= 0; i--) { var currentDiv = divElements[i]; var parentNode = currentDiv.parentNode; // Move all children of the div to its parent while (currentDiv.firstChild) { parentNode.insertBefore(currentDiv.firstChild, currentDiv); } // Find the closest list element and insert the div after it var closestListElement = this.findClosestListElem(currentDiv); if (closestListElement) { this.insertAfter(currentDiv, closestListElement); } } }; /* Inserts a node after a reference node */ MsWordPaste.prototype.insertAfter = function (newNode, referenceNode) { if (referenceNode.parentNode) { referenceNode.parentNode.insertBefore(newNode, referenceNode.nextSibling); } }; /* Finds the closest list element (UL or OL) to the given element */ MsWordPaste.prototype.findClosestListElem = function (currentElement) { var closestListElement; while (!isNOU(currentElement)) { var hasUlParent = !isNOU(currentElement.closest('ul')) && currentElement.tagName !== 'UL'; var hasOlParent = currentElement.tagName !== 'OL' && !isNOU(currentElement.closest('ol')); if (hasUlParent) { currentElement = currentElement.closest('ul'); } else if (hasOlParent) { currentElement = currentElement.closest('ol'); } else { currentElement = null; } closestListElement = !isNOU(currentElement) ? currentElement : closestListElement; } return closestListElement; }; /* Adds 'msolistparagraph' class to elements that have MS Word list styles */ MsWordPaste.prototype.addListClass = function (clipboardDataElement) { var allElements = clipboardDataElement.querySelectorAll('*'); for (var i = 0; i < allElements.length; i++) { var currentElement = allElements[i]; var elementStyle = currentElement.getAttribute('style'); if (isNOU(elementStyle)) { continue; } // Remove all spaces and the first newline character from the elementStyle string var normalizedStyle = elementStyle.replace(/ /g, '').replace('\n', ''); var hasMsoListStyle = normalizedStyle.indexOf('mso-list:l') >= 0; var hasNoMsoListClass = currentElement.className.toLowerCase().indexOf('msolistparagraph') === -1; var isNotHeading = currentElement.tagName.charAt(0) !== 'H'; var isNotListElement = currentElement.tagName !== 'LI' && currentElement.tagName !== 'OL' && currentElement.tagName !== 'UL'; if (hasMsoListStyle && hasNoMsoListClass && isNotHeading && isNotListElement) { currentElement.classList.add('msolistparagraph'); } } }; /* Adds 'e-rte-table-border' class to tables that have border styles */ MsWordPaste.prototype.addTableBorderClass = function (containerElement) { var tableElements = containerElement.querySelectorAll('table'); var hasBorderStyle = false; for (var i = 0; i < tableElements.length; i++) { for (var j = 0; j < this.borderStyle.length; j++) { if (tableElements[i].innerHTML.indexOf(this.borderStyle[j]) >= 0) { hasBorderStyle = true; break; } } if (hasBorderStyle) { tableElements[i].classList.add('e-rte-table-border'); hasBorderStyle = false; // Reset for the next table } } }; /* Converts images from MS Word to appropriate formats */ MsWordPaste.prototype.imageConversion = function (clipboardDataElement, rtfData) { this.checkVShape(clipboardDataElement); // First pass: Mark unsupported images and remove v:shapes attribute var imageElements = clipboardDataElement.querySelectorAll('img'); this.markUnsupportedImages(imageElements); // Second pass: Process supported images imageElements = clipboardDataElement.querySelectorAll('img'); if (imageElements.length === 0) { return; } var imageSources = []; var base64Sources = []; var imageNames = []; // Extract image sources and names this.extractImageInfo(imageElements, imageSources, imageNames); // Convert hex data to base64 var hexValues = this.hexConversion(rtfData); this.processHexValues(hexValues, base64Sources); // Update image sources this.updateImageSources(clipboardDataElement, imageSources, base64Sources, imageNames); // Clean up unsupported images this.cleanUnsupportedImages(clipboardDataElement); }; /* Marks unsupported images and removes v:shapes attribute */ MsWordPaste.prototype.markUnsupportedImages = function (imageElements) { for (var i = 0; i < imageElements.length; i++) { var currentImage = imageElements[i]; var shapesAttribute = currentImage.getAttribute('v:shapes'); if (!isNOU(shapesAttribute)) { var isUnsupported = this.isUnsupportedImageShape(shapesAttribute); if (isUnsupported) { currentImage.classList.add('e-rte-image-unsupported'); } currentImage.removeAttribute('v:shapes'); } } }; /* Determines if an image shape is unsupported */ MsWordPaste.prototype.isUnsupportedImageShape = function (shapesValue) { var supportedShapes = [ 'Picture', 'Chart', '圖片', 'Grafik', 'image', 'Graphic', '_x0000_s', '_x0000_i', 'img1', 'Immagine' ]; for (var i = 0; i < supportedShapes.length; i++) { var shape = supportedShapes[i]; if (shape === 'image') { if (shapesValue.toLowerCase().indexOf(shape) >= 0) { return false; } } else if (shapesValue.indexOf(shape) >= 0) { return false; } } return true; }; /* Extracts image information from image elements */ MsWordPaste.prototype.extractImageInfo = function (imageElements, imageSources, imageNames) { for (var i = 0; i < imageElements.length; i++) { var currentImage = imageElements[i]; if (!currentImage.classList.contains('e-rte-image-unsupported')) { var src = currentImage.getAttribute('src'); imageSources.push(src); var srcParts = src.split('/'); var lastPart = srcParts[srcParts.length - 1]; var imageName = lastPart.split('.')[0] + i; imageNames.push(imageName); } } }; /* Processes hex values and converts them to base64 */ MsWordPaste.prototype.processHexValues = function (hexValues, base64Sources) { for (var i = 0; i < hexValues.length; i++) { var currentHex = hexValues[i]; base64Sources.push({ base64Data: !isNOU(currentHex.hex) ? this.convertToBase64(currentHex) : null, isCroppedImage: currentHex.isCroppedImage }); if (currentHex.isCroppedImage) { this.cropImageDimensions.push({ goalWidth: currentHex.goalWidth, goalHeight: currentHex.goalHeight, cropLength: currentHex.cropLength, cropTop: currentHex.cropTop, cropR: currentHex.cropR, cropB: currentHex.cropB }); } } }; /* Updates image sources with base64 data or marks as unsupported */ MsWordPaste.prototype.updateImageSources = function (clipboardDataElement, imageSources, base64Sources, imageNames) { // eslint-disable-next-line var linkRegex = new RegExp(/([^\S]|^)(((https?\:\/\/)|(www\.)|(blob\:))(\S+))/gi); var imageElements = clipboardDataElement.querySelectorAll('img:not(.e-rte-image-unsupported)'); for (var i = 0; i < imageElements.length; i++) { var currentImage = imageElements[i]; var currentSource = imageSources[i]; if (currentSource.match(linkRegex)) { currentImage.setAttribute('src', currentSource); } else { var currentBase64 = base64Sources[i]; if (!isNOU(currentBase64) && !isNOU(currentBase64.base64Data)) { currentImage.setAttribute('src', currentBase64.base64Data); } else { currentImage.removeAttribute('src'); currentImage.classList.add('e-rte-image-unsupported'); } if (!isNOU(currentBase64) && currentBase64.isCroppedImage) { currentImage.classList.add('e-img-cropped'); } } currentImage.setAttribute('id', 'msWordImg-' + imageNames[i]); } }; /* Removes src attribute from unsupported images */ MsWordPaste.prototype.cleanUnsupportedImages = function (clipboardDataElement) { var unsupportedImages = clipboardDataElement.querySelectorAll('.e-rte-image-unsupported'); for (var i = 0; i < unsupportedImages.length; i++) { unsupportedImages[i].removeAttribute('src'); } }; /* Processes V:SHAPE elements and converts them to standard image elements */ MsWordPaste.prototype.checkVShape = function (clipboardDataElement) { var allElements = clipboardDataElement.querySelectorAll('*'); for (var i = 0; i < allElements.length; i++) { var currentElement = allElements[i]; var elementNodeName = currentElement.nodeName; switch (elementNodeName) { case 'V:SHAPETYPE': detach(currentElement); break; case 'V:SHAPE': this.processVShapeElement(currentElement); break; } } }; /* Processes a V:SHAPE element and converts it to a standard image if it contains image data */ MsWordPaste.prototype.processVShapeElement = function (shapeElement) { var firstChild = shapeElement.firstElementChild; if (firstChild && firstChild.nodeName === 'V:IMAGEDATA') { var imageSrc = firstChild.getAttribute('src'); var imageElement = createElement('img'); imageElement.setAttribute('src', imageSrc); // Insert the new image before the V:SHAPE element shapeElement.parentElement.insertBefore(imageElement, shapeElement); // Remove the original V:SHAPE element detach(shapeElement); } }; /* Converts hex value to base64 string */ MsWordPaste.prototype.convertToBase64 = function (hexValue) { var byteArr = this.conHexStringToBytes(hexValue.hex); var base64String = this.conBytesToBase64(byteArr); var mimeType = hexValue.type; var dataUri = mimeType ? 'data:' + mimeType + ';base64,' + base64String : null; return dataUri; }; /* Converts byte array to base64 string */ MsWordPaste.prototype.conBytesToBase64 = function (byteArray) { var base64String = ''; var base64Chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'; var byteArrayLength = byteArray.length; // Process bytes in groups of 3 for (var i = 0; i < byteArrayLength; i += 3) { // Get a slice of 3 bytes (or fewer at the end) var threeBytes = byteArray.slice(i, i + 3); var threeBytesLength = threeBytes.length; var fourChars = []; // Pad the array if needed if (threeBytesLength < 3) { for (var j = threeBytesLength; j < 3; j++) { threeBytes[j] = 0; } } // Convert 3 bytes (24 bits) into 4 base64 characters (6 bits each) fourChars[0] = (threeBytes[0] & 0xFC) >> 2; fourChars[1] = ((threeBytes[0] & 0x03) << 4) | (threeBytes[1] >> 4); fourChars[2] = ((threeBytes[1] & 0x0F) << 2) | ((threeBytes[2] & 0xC0) >> 6); fourChars[3] = threeBytes[2] & 0x3F; // Convert indices to base64 characters for (var j = 0; j < 4; j++) { // Add padding '=' for incomplete byte groups if (j <= threeBytesLength) { base64String += base64Chars.charAt(fourChars[j]); } else { base64String += '='; } } } return base64String; }; /* Converts a hexadecimal string to an array of bytes */ MsWordPaste.prototype.conHexStringToBytes = function (hexString) { var byteArray = []; var byteCount = hexString.length / 2; for (var i = 0; i < byteCount; i++) { var hexByte = hexString.substr(i * 2, 2); var byte = parseInt(hexByte, 16); byteArray.push(byte); } return byteArray; }; /* Converts RTF data to hex values for image processing */ MsWordPaste.prototype.hexConversion = function (rtfData) { var regExp = RegExp; var pictureHeaderPattern = new regExp('\\{\\\\pict[\\s\\S]+?\\\\bliptag-?\\d+(\\\\blipupi-?\\d+)?(\\{\\\\\\*\\\\blipuid\\s?[\\da-fA-F]+)?[\\s\\}]*?'); var picturePattern = new regExp('(?:(' + pictureHeaderPattern.source + '))([\\da-fA-F\\s]+)\\}', 'g'); var matchedImages = rtfData.match(picturePattern); var result = []; if (isNOU(matchedImages)) { return result; } for (var i = 0; i < matchedImages.length; i++) { var currentImage = matchedImages[i]; // Skip bullet images if (currentImage.indexOf('fIsBullet') !== -1 && currentImage.indexOf('wzName') === -1) { continue; } if (!pictureHeaderPattern.test(currentImage)) { continue; } var imageData = this.extractImageData(currentImage, pictureHeaderPattern); if (imageData) { result.push(imageData); } } return result; }; /* Extracts image data from RTF picture content */ MsWordPaste.prototype.extractImageData = function (imageContent, pictureHeaderPattern) { var imageType = null; // Determine image type if (imageContent.indexOf('\\pngblip') !== -1) { imageType = 'image/png'; } else if (imageContent.indexOf('\\jpegblip') !== -1) { imageType = 'image/jpeg'; } else if (imageContent.indexOf('\\emfblip') !== -1) { imageType = null; } else { return null; } // Check if image is cropped var isCroppedImage = this.isImageCropped(imageContent); var cropData = { goalWidth: 0, goalHeight: 0, cropLength: 0, cropTop: 0, cropR: 0, cropB: 0 }; if (isCroppedImage) { cropData.goalWidth = this.extractCropValue('wgoal', imageContent); cropData.goalHeight = this.extractCropValue('hgoal', imageContent); cropData.cropLength = this.extractCropValue('cropl', imageContent); cropData.cropTop = this.extractCropValue('cropt', imageContent); cropData.cropR = this.extractCropValue('cropr', imageContent); cropData.cropB = this.extractCropValue('cropb', imageContent); } return { hex: imageType ? imageContent.replace(pictureHeaderPattern, '').replace(/[^\da-fA-F]/g, '') : null, type: imageType, isCroppedImage: isCroppedImage, goalWidth: cropData.goalWidth, goalHeight: cropData.goalHeight, cropLength: cropData.cropLength, cropTop: cropData.cropTop, cropR: cropData.cropR, cropB: cropData.cropB }; }; /* Determines if an image is cropped based on crop values */ MsWordPaste.prototype.isImageCropped = function (rtfData) { var hasLeftTopCrop = this.extractCropValue('cropl', rtfData) > 0 && this.extractCropValue('cropt', rtfData) > 0; var hasRightCrop = this.extractCropValue('cropr', rtfData) > 0; var hasBottomCrop = this.extractCropValue('cropb', rtfData) > 0; return hasLeftTopCrop || hasRightCrop || hasBottomCrop; }; /* Extracts crop value from RTF data for a specific crop property */ MsWordPaste.prototype.extractCropValue = function (cropProperty, rtfData) { // Normalize RTF data by handling line breaks var normalizedRtfData = rtfData .replace(/\r\n\\/g, '\\') .replace(/\n/g, '\\'); var regExp = RegExp; var cropPattern = new regExp('\\\\pic' + cropProperty + '(\\-?\\d+)\\\\'); // Execute the pattern against the normalized RTF data var matchResult = cropPattern.exec(normalizedRtfData); // Return 0 if no match found or match doesn't have the expected format if (!matchResult || matchResult.length < 2) { return 0; } return parseInt(matchResult[1], 10); }; /* Removes class attributes from elements except for specific classes */ MsWordPaste.prototype.removeClassName = function (clipboardDataElement) { var elementsWithClass = clipboardDataElement.querySelectorAll('*[class]:not(.e-img-cropped):not(.e-rte-image-unsupported)'); for (var i = 0; i < elementsWithClass.length; i++) { elementsWithClass[i].removeAttribute('class'); } }; /* Adds line breaks in place of empty elements with &nbsp; */ MsWordPaste.prototype.breakLineAddition = function (clipboardDataElement) { var allElements = clipboardDataElement.querySelectorAll('*'); for (var i = 0; i < allElements.length; i++) { var currentElement = allElements[i]; if (this.isReplacableWithBreak(currentElement)) { var detachableElement = this.findDetachElem(currentElement); var brElement = createElement('br'); var hasNbsp = this.hasNonBreakingSpace(detachableElement); if (!hasNbsp && !isNOU(detachableElement.parentElement)) { detachableElement.parentElement.insertBefore(brElement, detachableElement); detach(detachableElement); } } } }; /* Determines if an element should be replaced with a line break */ MsWordPaste.prototype.isReplacableWithBreak = function (element) { var hasNoChildren = element.children.length === 0; var hasNbspContent = element.innerHTML === '&nbsp;'; var isNotInListItem = !element.closest('li'); var isNotInTableCell = !element.closest('td'); var isNotSpan = element.nodeName !== 'SPAN'; var isIsolatedSpan = element.nodeName === 'SPAN' && isNOU(element.previousElementSibling) && isNOU(element.nextElementSibling); return hasNoChildren && hasNbspContent && isNotInListItem && isNotInTableCell && (isNotSpan || isIsolatedSpan); }; /* Checks if an element contains non-breaking space characters */ MsWordPaste.prototype.hasNonBreakingSpace = function (element) { var hasText = element.textContent.length > 0; var nbspMatches = element.textContent.match(/\u00a0/g); var hasNbspMatches = nbspMatches !== null && nbspMatches.length > 0; return hasText && hasNbspMatches; }; /* Finds the topmost empty parent element that should be removed */ MsWordPaste.prototype.findDetachElem = function (element) { var parent = element.parentElement; if (isNOU(parent)) { return element; } var isEmptyParent = parent.textContent.trim() === ''; var isNotTableCell = parent.tagName !== 'TD' && parent.tagName !== 'TH'; var hasNoImages = isNOU(parent.querySelector('img')); if (isEmptyParent && isNotTableCell && hasNoImages) { return this.findDetachElem(parent); } return element; }; /* Removes unwanted elements from the HTML content */ MsWordPaste.prototype.removeUnwantedElements = function (clipboardDataElement) { // Remove style elements this.removeStyleElements(clipboardDataElement); // Remove elements by tag name using regex this.removeElementsByTagName(clipboardDataElement); }; /* Removes style elements from the container */ MsWordPaste.prototype.removeStyleElements = function (clipboardDataElement) { var styleElement = clipboardDataElement.querySelector('style'); if (!isNOU(styleElement)) { detach(styleElement); } }; /* Removes elements by tag name using regex */ MsWordPaste.prototype.removeElementsByTagName = function (clipboardDataElement) { var htmlContent = clipboardDataElement.innerHTML; var regExpConstructor = RegExp; for (var i = 0; i < this.removableElements.length; i++) { var tagName = this.removableElements[i]; var startTagPattern = new regExpConstructor('<' + tagName + '\\s*[^>]*>', 'g'); var endTagPattern = new regExpConstructor('</' + tagName + '>', 'g'); htmlContent = htmlContent.replace(startTagPattern, ''); htmlContent = htmlContent.replace(endTagPattern, ''); } clipboardDataElement.innerHTML = htmlContent; clipboardDataElement.querySelectorAll(':empty'); }; /* Finds the topmost empty parent element that should be removed */ MsWordPaste.prototype.findDetachEmptyElem = function (element) { if (isNOU(element.parentElement)) { return null; } var parentElement = element.parentElement; // Check if parent has non-breaking spaces var hasNbsp = this.hasNonBreakingSpace(parentElement); // Check if parent is empty and not a special element var isEmptyParent = !hasNbsp && parentElement.textContent.trim() === ''; var isNotMsWordContent = parentElement.getAttribute('id') !== 'MSWord-Content'; var isNotMsoListParagraph = !this.hasParentWithClass(element, 'MsoListParagraph'); var hasNoImages = isNOU(parentElement.querySelector('img')); if (isEmptyParent && isNotMsWordContent && isNotMsoListParagraph && hasNoImages) { return this.findDetachEmptyElem(parentElement); } return element; }; /* Checks if an element has a parent with the specified class */ MsWordPaste.prototype.hasParentWithClass = function (element, className) { var currentParentElem = element.parentElement; while (!isNOU(currentParentElem)) { if (currentParentElem.classList.contains(className)) { return true; } currentParentElem = currentParentElem.parentElement; } return false; }; /* Removes empty elements from the HTML content */ MsWordPaste.prototype.removeEmptyElements = function (containerElement) { var emptyElements = containerElement.querySelectorAll(':empty'); for (var i = 0; i < emptyElements.length; i++) { var currentElement = emptyElements[i]; // Handle empty cells with MsoNormal class if (this.isEmptyCellWithMsoNormal(currentElement)) { currentElement.innerHTML = '-'; } // Check if div has border var isDivWithoutBorder = this.isDivWithoutBorder(currentElement); // Skip certain elements that should remain empty if (this.shouldRemoveEmptyElement(currentElement, isDivWithoutBorder)) { var detachableElement = this.findDetachEmptyElem(currentElement); if (!isNOU(detachableElement)) { detach(detachableElement); } } } }; /* Checks if an element is an empty cell with MsoNormal class */ MsWordPaste.prototype.isEmptyCellWithMsoNormal = function (element) { var parentCell = element.closest('td'); return !isNOU(parentCell) && !isNOU(parentCell.querySelector('.MsoNormal')); }; /* Checks if a div element has no border */ MsWordPaste.prototype.isDivWithoutBorder = function (element) { if (element.tagName !== 'DIV') { return true; } var borderBottom = element.style.borderBottom; return borderBottom === 'none' || borderBottom === ''; }; /* Determines if an empty element should be removed */ MsWordPaste.prototype.shouldRemoveEmptyElement = function (element, isDivWithoutBorder) { var preservedTags = ['IMG', 'BR', 'IFRAME', 'TD', 'TH', 'HR']; return preservedTags.indexOf(element.tagName) === -1 && isDivWithoutBorder; }; /* Removes empty meta tags from the HTML content */ MsWordPaste.prototype.removeEmptyMetaTags = function (clipboardDataElement) { var emptyMetaTags = clipboardDataElement.querySelectorAll('meta:empty'); // Process in reverse order to avoid index issues when removing elements for (var i = emptyMetaTags.length - 1; i >= 0; i--) { var metaTag = emptyMetaTags[i]; if (metaTag.textContent === '') { detach(metaTag); } } }; /* Corrects styles in the HTML content based on Word paste style configuration */ MsWordPaste.prototype.styleCorrection = function (clipboardDataElement, allowedStyleProperties) { var styleElements = clipboardDataElement.querySelectorAll('style'); var styleRules = []; if (styleElements.length === 0) { return; } // Extract style rules from the first or second style element var styleRulePattern = /[\S ]+\s+{[\s\S]+?}/gi; if (!isNOU(styleElements[0].innerHTML.match(styleRulePattern))) { styleRules = styleElements[0].innerHTML.match(styleRulePattern); } else if (styleElements.length > 1) { styleRules = styleElements[1].innerHTML.match(styleRulePattern); } // Convert style rules to a structured object var styleClassObject = !isNOU(styleRules) ? this.findStyleObject(styleRules) : null; if (isNOU(styleClassObject)) { return; } // Process style rules var selectors = Object.keys(styleClassObject); var styleValues = selectors.map(function (selector) { return styleClassObject["" + selector]; }); // Remove unwanted styles and filter existing styles styleValues = this.removeUnwantedStyle(styleValues, allowedStyleProperties); this.filterStyles(clipboardDataElement, allowedStyleProperties); // Apply styles to matching elements this.applyStylesToElements(clipboardDataElement, selectors, styleValues); // Process list-specific styles this.processListStyles(clipboardDataElement, selectors, styleValues); }; /* Filters inline styles to keep only allowed style properties */ MsWordPaste.prototype.filterStyles = function (clipboardDataElement, allowedStyleProperties) { var elementsWithStyle = clipboardDataElement.querySelectorAll('*[style]'); for (var i = 0; i < elementsWithStyle.length; i++) { var currentElement = elementsWithStyle[i]; var styleDeclarations = currentElement.getAttribute('style').split(';'); var filteredStyle = ''; // Process each style declaration for (var j = 0; j < styleDeclarations.length; j++) { var declaration = styleDeclarations[j]; var propertyName = declaration.split(':')[0].trim(); // Keep only allowed style properties if (allowedStyleProperties.indexOf(propertyName) >= 0) { filteredStyle += declaration + ';'; } } // Apply filtered styles back to the element currentElement.style.cssText = filteredStyle; } }; /* Applies styles to elements matching the selectors */ MsWordPaste.prototype.applyStylesToElements = function (clipboardDataElement, selectors, styleValues) { var matchedElements; var isClassSelector = false; var specialSelectorPattern = /^(p|div|li)\.(1|10|11)$/; for (var i = 0; i < selectors.length; i++) { var currentSelector = selectors[i]; var selectorParts = currentSelector.split('.'); var baseSelector = selectorParts[0]; // Determine how to select elements based on the selector format if (baseSelector === '') { // Class selector (className) var className = selectorParts[1]; matchedElements = clipboardDataElement.getElementsByClassName(className); isClassSelector = true; } else if ((selectorParts.length === 1 && baseSelector.indexOf('@') >= 0) || (specialSelectorPattern.test(currentSelector))) { // Skip special selectors continue; } else if (selectorParts.length === 1 && baseSelector.indexOf('@') < 0) { // Tag selector (tagName) matchedElements = clipboardDataElement.getElementsByTagName(baseSelector); } else { // Complex selector (tag.class, etc.) matchedElements = clipboardDataElement.querySelectorAll(currentSelector); } // Apply styles to each matching element this.applyStyleToElementCollection(matchedElements, currentSelector, styleValues[i], isClassSelector); isClassSelector = false; } }; /* Applies styles to a collection of elements */ MsWordPaste.prototype.applyStyleToElementCollection = function (elements, selector, styleValue, isClassSelector) { for (var j = 0; j < elements.length; j++) { var currentElement = elements[j]; // Skip paragraph elements inside list items if (currentElement.closest('li') && selector === 'p') { continue; } var existingStyle = currentElement.getAttribute('style'); var hasExistingStyle = !isNOU(existingStyle) && existingStyle.trim() !== ''; if (hasExistingStyle) { // Process existing style var styleDeclarations = styleValue.split(';'); this.removeBorderNoneStyles(styleDeclarations); if (!isClassSelector) { this.removeOverlappingStyles(styleDeclarations, existingStyle); } var combinedStyle = styleDeclarations.join(';') + ';' + existingStyle; currentElement.style.cssText = combinedStyle; } else { // Apply clean style styleValue = styleValue .replace(/text-indent:-.*?;?/g, '') // Remove 'text-indent' .replace(/border:\s*none;?/g, '') // Remove 'border:none' .trim(); currentElement.style.cssText = styleValue; } } }; /* Removes 'border: none' styles from the style array */ MsWordPaste.prototype.removeBorderNoneStyles = function (styleDeclarations) { for (var i = 0; i < styleDeclarations.length; i++) { var declarationParts = styleDeclarations[i].split(':'); if (declarationParts[0] === 'border' && declarationParts[1] === 'none') { styleDeclarations.splice(i, 1); i--; } } }; /* Removes styles that would overlap with existing inline styles */ MsWordPaste.prototype.removeOverlappingStyles = function (styleDeclarations, existingStyle) { for (var i = 0; i < styleDeclarations.length; i++) { var propertyName = styleDeclarations[i].split(':')[0]; if (existingStyle.indexOf(propertyName + ':') >= 0) { styleDeclarations.splice(i, 1); i--; } } }; /* Processes list-specific styles */ MsWordPaste.prototype.processListStyles = function (containerElement, selectors, styleValues) { var listClasses = [ 'MsoListParagraphCxSpFirst', 'MsoListParagraphCxSpMiddle', 'MsoListParagraphCxSpLast' ]; for (var i = 0; i < listClasses.length; i++) { var listClassName = listClasses[i]; var listSelector = 'li.' + listClassName; var selectorIndex = selectors.indexOf(listSelector); if (selectorIndex > -1) { var listElements = containerElement.querySelectorAll('ol.' + listClassName + ', ul.' + listClassName); this.adjustListMargins(listElements, styleValues[selectorIndex]); } } }; /* Adjusts margins for list elements */ MsWordPaste.prototype.adjustListMargins = function (listElements, styleValue) { for (var j = 0; j < listElements.length; j++) { var listElement = listElements[j]; var existingStyle = listElement.getAttribute('style'); var hasValidStyle = !isNOU(existingStyle) && existingStyle.trim() !== '' && listElement.style.marginLeft !== ''; if (hasValidStyle) { var styleDeclarations = styleValue.split(';'); for (var k = 0; k < styleDeclarations.length; k++) { var declaration = styleDeclarations[k]; var propertyName = declaration.split(':')[0]; if ('margin-left'.indexOf(propertyName) >= 0) { this.adjustMarginLeftValue(listElement, declaration); } } } } }; /* Adjusts the margin-left value for a list element */ MsWordPaste.prototype.adjustMarginLeftValue = function (element, marginDeclaration) { var declarationParts = marginDeclaration.split(':'); var marginValue = declarationParts[1]; var elementMargin = element.style.marginLeft; var hasInchUnits = !isNOU(marginValue) && marginValue.indexOf('in') >= 0 && elementMargin.indexOf('in') >= 0; if (hasInchUnits) { var classStyleValue = parseFloat(marginValue.split('in')[0]); var inlineStyleValue = parseFloat(elementMargin.split('in')[0]); element.style.marginLeft = (inlineStyleValue - classStyleValue) + 'in'; } }; /* Filters style values to keep only allowed style properties */ MsWordPaste.prototype.removeUnwantedStyle = function (styleValues, allowedStyleProperties) { var filteredValues = []; for (var i = 0; i < styleValues.length; i++) { var styleDeclarations = styleValues[i].split(';'); var filteredDeclarations = ''; for (var j = 0; j < styleDeclarations.length; j++) { var declaration = styleDeclarations[j]; var propertyName = declaration.split(':')[0]; // Keep only allowed style properties if (allowedStyleProperties.indexOf(propertyName) >= 0) { filteredDeclarations += declaration + ';'; } } filteredValues[i] = filteredDeclarations; } return filteredValues; }; /* Converts CSS rule strings into a structured object mapping selectors to style declarations */ MsWordPaste.prototype.findStyleObject = function (styleRules) { var styleClassObject = {}; for (var i = 0; i < styleRules.length; i++) { var currentRule = styleRules[i]; // Extract selector and style parts from the rule var selectorText = currentRule.replace(/([\S ]+\s+){[\s\S]+?}/gi, '$1'); var styleText = currentRule.replace(/[\S ]+\s+{([\s\S]+?)}/gi, '$1'); // Clean up whitespace and line breaks selectorText = this.cleanupStyleText(selectorText); styleText = this.cleanupStyleText(styleText); // Map each selector to the style declarations var selectors = selectorText.split(', '); for (var j = 0; j < selectors.length; j++) { styleClassObject[selectors[j]] = styleText; } } return styleClassObject; }; /* Cleans up style text by removing whitespace and line breaks */ MsWordPaste.prototype.cleanupStyleText = function (text) { var cleanedText = text; // Remove leading and trailing whitespace cleanedText = cleanedText.replace(/^[\s]|[\s]$/gm, ''); // Remove line breaks cleanedText = cleanedText.replace(/\n|\r|\n\r/g, ''); return cleanedText; }; /* Removes HTML comments from an element */ MsWordPaste.prototype.removingComments = function (clipboardDataElement) { var htmlContent = clipboardDataElement.innerHTML; var commentPattern = /<!--[\s\S]*?-->/g; var cleanedContent = htmlContent.replace(commentPattern, ''); clipboardDataElement.innerHTML = cleanedContent; }; /* Cleans up HTML content and identifies list nodes for conversion */ MsWordPaste.prototype.listCleanUp = function (containerElement, listNodes) { var nodesToRemove = []; var previousWasMsoList = false; var allElements = containerElement.querySelectorAll('*'); for (var i = 0; i < allElements.length; i++) { var currentElement = allElements[i]; // Check if element should be ignored if (this.shouldIgnoreElement(currentElement)) { nodesToRemove.push(currentElement); continue; } // Check if element is an MS Word list paragraph if (this.isMsoListParagraph(currentElement)) { // Add a null separator for new list if needed if (this.isFirstListItem(currentElement) && listNodes.length > 0 && listNodes[listNodes.length - 1] !== null) { listNodes.push(null); } // Add the list node listNodes.push(currentElement); } // Add a null separator when transitioning from list to non-list block if (this.shouldAddListSeparator(previousWasMsoList, currentElement)) { listNodes.push(null); } // Update previous state flag for next iteration if (this.isBlockElement(currentElement)) { previousWasMsoList = this.isMsoListParagraph(currentElement); } } // Add a final null separator if needed if (listNodes.length > 0 && listNodes[listNodes.length - 1] !== null) { listNodes.push(null); } return listNodes; }; /* Determines if an element should be ignored during cleanup */ MsWordPaste.prototype.shouldIgnoreElement = function (element) { var isNotInIgnorableList = this.ignorableNodes.indexOf(element.nodeName) === -1; var isEmptyTextNode = element.nodeType === 3 && element.textContent.trim() === ''; return isNotInIgnorableList || isEmptyTextNode; }; /* Determines if an element is an MS Word list paragraph */ MsWordPaste.prototype.isMsoListParagraph = function (element) { var elementClass = element.className; var hasClassName = elementClass && elementClass.toLowerCase().indexOf('msolistparagraph') !== -1; var elementStyles = element.getAttribute('style'); var hasMsoListStyle = !isNOU(elementStyles) && elementStyles.indexOf('mso-list:') >= 0; return hasClassName && hasMsoListStyle; }; /* Determines if an element is the first item in a list */ MsWordPaste.prototype.isFirstListItem = function (element) { return element.className.indexOf('MsoListParagraphCxSpFirst') >= 0; }; /* Determines if a list separator should be added */ MsWordPaste.prototype.shouldAddListSeparator = function (previousWasMsoList, currentElement) { return previousWasMsoList && this.isBlockElement(currentElement) && !this.isMsoListParagraph(currentElement); }; /* Determines if an element is a block element */ MsWordPaste.prototype.isBlockElement = function (element) { return this.blockNode.indexOf(element.nodeName.toLowerCase()) !== -1; }; /** * Converts MS Word list nodes to standard HTML lists * * @param {Element[]} listNodes - Array of list nodes to convert * @returns {void} - No return value * @private */ MsWordPaste.prototype.listConverter = function (listNodes) { var convertedLists = []; var listCollection = []; var currentListStyle = ''; // Process list nodes and build collection this.processListNodes(listNodes, convertedLists, listCollection, currentListStyl