UNPKG

@wordpress/blocks

Version:
183 lines (169 loc) 7.68 kB
/** * WordPress dependencies */ import { getPhrasingContentSchema, removeInvalidHTML } from '@wordpress/dom'; /** * Internal dependencies */ import { htmlToBlocks } from './html-to-blocks'; import { hasBlockSupport } from '../registration'; import { getBlockInnerHTML } from '../serializer'; import parse from '../parser'; import normaliseBlocks from './normalise-blocks'; import specialCommentConverter from './special-comment-converter'; import commentRemover from './comment-remover'; import isInlineContent from './is-inline-content'; import phrasingContentReducer from './phrasing-content-reducer'; import headRemover from './head-remover'; import msListConverter from './ms-list-converter'; import msListIgnore from './ms-list-ignore'; import listReducer from './list-reducer'; import imageCorrector from './image-corrector'; import blockquoteNormaliser from './blockquote-normaliser'; import divNormaliser from './div-normaliser'; import figureContentReducer from './figure-content-reducer'; import shortcodeConverter from './shortcode-converter'; import markdownConverter from './markdown-converter'; import iframeRemover from './iframe-remover'; import googleDocsUIDRemover from './google-docs-uid-remover'; import htmlFormattingRemover from './html-formatting-remover'; import brRemover from './br-remover'; import { deepFilterHTML, isPlain, getBlockContentSchema } from './utils'; import emptyParagraphRemover from './empty-paragraph-remover'; import slackParagraphCorrector from './slack-paragraph-corrector'; const log = (...args) => window?.console?.log?.(...args); /** * Filters HTML to only contain phrasing content. * * @param {string} HTML The HTML to filter. * * @return {string} HTML only containing phrasing content. */ function filterInlineHTML(HTML) { HTML = deepFilterHTML(HTML, [headRemover, googleDocsUIDRemover, msListIgnore, phrasingContentReducer, commentRemover]); HTML = removeInvalidHTML(HTML, getPhrasingContentSchema('paste'), { inline: true }); HTML = deepFilterHTML(HTML, [htmlFormattingRemover, brRemover]); // Allows us to ask for this information when we get a report. log('Processed inline HTML:\n\n', HTML); return HTML; } /** * Converts an HTML string to known blocks. Strips everything else. * * @param {Object} options * @param {string} [options.HTML] The HTML to convert. * @param {string} [options.plainText] Plain text version. * @param {string} [options.mode] Handle content as blocks or inline content. * * 'AUTO': Decide based on the content passed. * * 'INLINE': Always handle as inline content, and return string. * * 'BLOCKS': Always handle as blocks, and return array of blocks. * @param {Array} [options.tagName] The tag into which content will be inserted. * * @return {Array|string} A list of blocks or a string, depending on `handlerMode`. */ export function pasteHandler({ HTML = '', plainText = '', mode = 'AUTO', tagName }) { // First of all, strip any meta tags. HTML = HTML.replace(/<meta[^>]+>/g, ''); // Strip Windows markers. HTML = HTML.replace(/^\s*<html[^>]*>\s*<body[^>]*>(?:\s*<!--\s*StartFragment\s*-->)?/i, ''); HTML = HTML.replace(/(?:<!--\s*EndFragment\s*-->\s*)?<\/body>\s*<\/html>\s*$/i, ''); // If we detect block delimiters in HTML, parse entirely as blocks. if (mode !== 'INLINE') { // Check plain text if there is no HTML. const content = HTML ? HTML : plainText; if (content.indexOf('<!-- wp:') !== -1) { const parseResult = parse(content); const isSingleFreeFormBlock = parseResult.length === 1 && parseResult[0].name === 'core/freeform'; if (!isSingleFreeFormBlock) { return parseResult; } } } // Normalize unicode to use composed characters. // Not normalizing the content will only affect older browsers and won't // entirely break the app. // See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/normalize // See: https://core.trac.wordpress.org/ticket/30130 // See: https://github.com/WordPress/gutenberg/pull/6983#pullrequestreview-125151075 if (String.prototype.normalize) { HTML = HTML.normalize(); } // Must be run before checking if it's inline content. HTML = deepFilterHTML(HTML, [slackParagraphCorrector]); // Consider plain text if: // * There is a plain text version. // * There is no HTML version, or it has no formatting. const isPlainText = plainText && (!HTML || isPlain(HTML)); // Parse Markdown (and encoded HTML) if it's considered plain text. if (isPlainText) { HTML = plainText; // The markdown converter (Showdown) trims whitespace. if (!/^\s+$/.test(plainText)) { HTML = markdownConverter(HTML); } } // An array of HTML strings and block objects. The blocks replace matched // shortcodes. const pieces = shortcodeConverter(HTML); // The call to shortcodeConverter will always return more than one element // if shortcodes are matched. The reason is when shortcodes are matched // empty HTML strings are included. const hasShortcodes = pieces.length > 1; if (isPlainText && !hasShortcodes) { // Switch to inline mode if: // * The current mode is AUTO. // * The original plain text had no line breaks. // * The original plain text was not an HTML paragraph. // * The converted text is just a paragraph. if (mode === 'AUTO' && plainText.indexOf('\n') === -1 && plainText.indexOf('<p>') !== 0 && HTML.indexOf('<p>') === 0) { mode = 'INLINE'; } } if (mode === 'INLINE') { return filterInlineHTML(HTML); } if (mode === 'AUTO' && !hasShortcodes && isInlineContent(HTML, tagName)) { return filterInlineHTML(HTML); } const phrasingContentSchema = getPhrasingContentSchema('paste'); const blockContentSchema = getBlockContentSchema('paste'); const blocks = pieces.map(piece => { // Already a block from shortcode. if (typeof piece !== 'string') { return piece; } const filters = [googleDocsUIDRemover, msListConverter, headRemover, listReducer, imageCorrector, phrasingContentReducer, specialCommentConverter, commentRemover, iframeRemover, figureContentReducer, blockquoteNormaliser(), divNormaliser]; const schema = { ...blockContentSchema, // Keep top-level phrasing content, normalised by `normaliseBlocks`. ...phrasingContentSchema }; piece = deepFilterHTML(piece, filters, blockContentSchema); piece = removeInvalidHTML(piece, schema); piece = normaliseBlocks(piece); piece = deepFilterHTML(piece, [htmlFormattingRemover, brRemover, emptyParagraphRemover], blockContentSchema); // Allows us to ask for this information when we get a report. log('Processed HTML piece:\n\n', piece); return htmlToBlocks(piece, pasteHandler); }).flat().filter(Boolean); // If we're allowed to return inline content, and there is only one // inlineable block, and the original plain text content does not have any // line breaks, then treat it as inline paste. if (mode === 'AUTO' && blocks.length === 1 && hasBlockSupport(blocks[0].name, '__unstablePasteTextInline', false)) { const trimRegex = /^[\n]+|[\n]+$/g; // Don't catch line breaks at the start or end. const trimmedPlainText = plainText.replace(trimRegex, ''); if (trimmedPlainText !== '' && trimmedPlainText.indexOf('\n') === -1) { return removeInvalidHTML(getBlockInnerHTML(blocks[0]), phrasingContentSchema).replace(trimRegex, ''); } } return blocks; } //# sourceMappingURL=paste-handler.js.map