UNPKG

@ckeditor/ckeditor5-paste-from-office

Version:

Paste from Office feature for CKEditor 5.

254 lines (253 loc) • 10 kB
/** * @license Copyright (c) 2003-2024, CKSource Holding sp. z o.o. All rights reserved. * For licensing, see LICENSE.md or https://ckeditor.com/legal/ckeditor-oss-license */ /** * @module paste-from-office/filters/image */ /* globals btoa */ import { Matcher, UpcastWriter } from 'ckeditor5/src/engine.js'; /** * Replaces source attribute of all `<img>` elements representing regular * images (not the Word shapes) with inlined base64 image representation extracted from RTF or Blob data. * * @param documentFragment Document fragment on which transform images. * @param rtfData The RTF data from which images representation will be used. */ export function replaceImagesSourceWithBase64(documentFragment, rtfData) { if (!documentFragment.childCount) { return; } const upcastWriter = new UpcastWriter(documentFragment.document); const shapesIds = findAllShapesIds(documentFragment, upcastWriter); removeAllImgElementsRepresentingShapes(shapesIds, documentFragment, upcastWriter); insertMissingImgs(shapesIds, documentFragment, upcastWriter); removeAllShapeElements(documentFragment, upcastWriter); const images = findAllImageElementsWithLocalSource(documentFragment, upcastWriter); if (images.length) { replaceImagesFileSourceWithInlineRepresentation(images, extractImageDataFromRtf(rtfData), upcastWriter); } } /** * Converts given HEX string to base64 representation. * * @internal * @param hexString The HEX string to be converted. * @returns Base64 representation of a given HEX string. */ export function _convertHexToBase64(hexString) { return btoa(hexString.match(/\w{2}/g).map(char => { return String.fromCharCode(parseInt(char, 16)); }).join('')); } /** * Finds all shapes (`<v:*>...</v:*>`) ids. Shapes can represent images (canvas) * or Word shapes (which does not have RTF or Blob representation). * * @param documentFragment Document fragment from which to extract shape ids. * @returns Array of shape ids. */ function findAllShapesIds(documentFragment, writer) { const range = writer.createRangeIn(documentFragment); const shapeElementsMatcher = new Matcher({ name: /v:(.+)/ }); const shapesIds = []; for (const value of range) { if (value.type != 'elementStart') { continue; } const el = value.item; const previousSibling = el.previousSibling; const prevSiblingName = previousSibling && previousSibling.is('element') ? previousSibling.name : null; // List of ids which should not be considered as shapes. // https://github.com/ckeditor/ckeditor5/pull/15847#issuecomment-1941543983 const exceptionIds = ['Chart']; const isElementAShape = shapeElementsMatcher.match(el); const hasElementGfxdataAttribute = el.getAttribute('o:gfxdata'); const isPreviousSiblingAShapeType = prevSiblingName === 'v:shapetype'; const isElementIdInExceptionsArray = hasElementGfxdataAttribute && exceptionIds.some(item => el.getAttribute('id').includes(item)); // If shape element has 'o:gfxdata' attribute and is not directly before // `<v:shapetype>` element it means that it represents a Word shape. if (isElementAShape && hasElementGfxdataAttribute && !isPreviousSiblingAShapeType && !isElementIdInExceptionsArray) { shapesIds.push(value.item.getAttribute('id')); } } return shapesIds; } /** * Removes all `<img>` elements which represents Word shapes and not regular images. * * @param shapesIds Shape ids which will be checked against `<img>` elements. * @param documentFragment Document fragment from which to remove `<img>` elements. */ function removeAllImgElementsRepresentingShapes(shapesIds, documentFragment, writer) { const range = writer.createRangeIn(documentFragment); const imageElementsMatcher = new Matcher({ name: 'img' }); const imgs = []; for (const value of range) { if (value.item.is('element') && imageElementsMatcher.match(value.item)) { const el = value.item; const shapes = el.getAttribute('v:shapes') ? el.getAttribute('v:shapes').split(' ') : []; if (shapes.length && shapes.every(shape => shapesIds.indexOf(shape) > -1)) { imgs.push(el); // Shapes may also have empty source while content is paste in some browsers (Safari). } else if (!el.getAttribute('src')) { imgs.push(el); } } } for (const img of imgs) { writer.remove(img); } } /** * Removes all shape elements (`<v:*>...</v:*>`) so they do not pollute the output structure. * * @param documentFragment Document fragment from which to remove shape elements. */ function removeAllShapeElements(documentFragment, writer) { const range = writer.createRangeIn(documentFragment); const shapeElementsMatcher = new Matcher({ name: /v:(.+)/ }); const shapes = []; for (const value of range) { if (value.type == 'elementStart' && shapeElementsMatcher.match(value.item)) { shapes.push(value.item); } } for (const shape of shapes) { writer.remove(shape); } } /** * Inserts `img` tags if there is none after a shape. */ function insertMissingImgs(shapeIds, documentFragment, writer) { const range = writer.createRangeIn(documentFragment); const shapes = []; for (const value of range) { if (value.type == 'elementStart' && value.item.is('element', 'v:shape')) { const id = value.item.getAttribute('id'); if (shapeIds.includes(id)) { continue; } if (!containsMatchingImg(value.item.parent.getChildren(), id)) { shapes.push(value.item); } } } for (const shape of shapes) { const attrs = { src: findSrc(shape) }; if (shape.hasAttribute('alt')) { attrs.alt = shape.getAttribute('alt'); } const img = writer.createElement('img', attrs); writer.insertChild(shape.index + 1, img, shape.parent); } function containsMatchingImg(nodes, id) { for (const node of nodes) { /* istanbul ignore else -- @preserve */ if (node.is('element')) { if (node.name == 'img' && node.getAttribute('v:shapes') == id) { return true; } if (containsMatchingImg(node.getChildren(), id)) { return true; } } } return false; } function findSrc(shape) { for (const child of shape.getChildren()) { /* istanbul ignore else -- @preserve */ if (child.is('element') && child.getAttribute('src')) { return child.getAttribute('src'); } } } } /** * Finds all `<img>` elements in a given document fragment which have source pointing to local `file://` resource. * * @param documentFragment Document fragment in which to look for `<img>` elements. * @returns result All found images grouped by source type. */ function findAllImageElementsWithLocalSource(documentFragment, writer) { const range = writer.createRangeIn(documentFragment); const imageElementsMatcher = new Matcher({ name: 'img' }); const imgs = []; for (const value of range) { if (value.item.is('element') && imageElementsMatcher.match(value.item)) { if (value.item.getAttribute('src').startsWith('file://')) { imgs.push(value.item); } } } return imgs; } /** * Extracts all images HEX representations from a given RTF data. * * @param rtfData The RTF data from which to extract images HEX representation. * @returns Array of found HEX representations. Each array item is an object containing: * * * hex Image representation in HEX format. * * type Type of image, `image/png` or `image/jpeg`. */ function extractImageDataFromRtf(rtfData) { if (!rtfData) { return []; } const regexPictureHeader = /{\\pict[\s\S]+?\\bliptag-?\d+(\\blipupi-?\d+)?({\\\*\\blipuid\s?[\da-fA-F]+)?[\s}]*?/; const regexPicture = new RegExp('(?:(' + regexPictureHeader.source + '))([\\da-fA-F\\s]+)\\}', 'g'); const images = rtfData.match(regexPicture); const result = []; if (images) { for (const image of images) { let imageType = false; if (image.includes('\\pngblip')) { imageType = 'image/png'; } else if (image.includes('\\jpegblip')) { imageType = 'image/jpeg'; } if (imageType) { result.push({ hex: image.replace(regexPictureHeader, '').replace(/[^\da-fA-F]/g, ''), type: imageType }); } } } return result; } /** * Replaces `src` attribute value of all given images with the corresponding base64 image representation. * * @param imageElements Array of image elements which will have its source replaced. * @param imagesHexSources Array of images hex sources (usually the result of `extractImageDataFromRtf()` function). * The array should be the same length as `imageElements` parameter. */ function replaceImagesFileSourceWithInlineRepresentation(imageElements, imagesHexSources, writer) { // Assume there is an equal amount of image elements and images HEX sources so they can be matched accordingly based on existing order. if (imageElements.length === imagesHexSources.length) { for (let i = 0; i < imageElements.length; i++) { const newSrc = `data:${imagesHexSources[i].type};base64,${_convertHexToBase64(imagesHexSources[i].hex)}`; writer.setAttribute('src', newSrc, imageElements[i]); } } }