UNPKG

@wordpress/blocks

Version:
251 lines (221 loc) 7.79 kB
/** * WordPress dependencies */ import { getPhrasingContentSchema, removeInvalidHTML } from '@wordpress/dom'; /** * Internal dependencies */ import { htmlToBlocks } from './html-to-blocks'; import { hasBlockSupport } from '../registration'; import { getBlockInnerHTML } from '../serializer'; import parse from '../parser'; import normaliseBlocks from './normalise-blocks'; import specialCommentConverter from './special-comment-converter'; import commentRemover from './comment-remover'; import isInlineContent from './is-inline-content'; import phrasingContentReducer from './phrasing-content-reducer'; import headRemover from './head-remover'; import msListConverter from './ms-list-converter'; import msListIgnore from './ms-list-ignore'; import listReducer from './list-reducer'; import imageCorrector from './image-corrector'; import blockquoteNormaliser from './blockquote-normaliser'; import divNormaliser from './div-normaliser'; import figureContentReducer from './figure-content-reducer'; import shortcodeConverter from './shortcode-converter'; import markdownConverter from './markdown-converter'; import iframeRemover from './iframe-remover'; import googleDocsUIDRemover from './google-docs-uid-remover'; import htmlFormattingRemover from './html-formatting-remover'; import brRemover from './br-remover'; import { deepFilterHTML, isPlain, getBlockContentSchema } from './utils'; import emptyParagraphRemover from './empty-paragraph-remover'; import slackParagraphCorrector from './slack-paragraph-corrector'; const log = ( ...args ) => window?.console?.log?.( ...args ); /** * Filters HTML to only contain phrasing content. * * @param {string} HTML The HTML to filter. * * @return {string} HTML only containing phrasing content. */ function filterInlineHTML( HTML ) { HTML = deepFilterHTML( HTML, [ headRemover, googleDocsUIDRemover, msListIgnore, phrasingContentReducer, commentRemover, ] ); HTML = removeInvalidHTML( HTML, getPhrasingContentSchema( 'paste' ), { inline: true, } ); HTML = deepFilterHTML( HTML, [ htmlFormattingRemover, brRemover ] ); // Allows us to ask for this information when we get a report. log( 'Processed inline HTML:\n\n', HTML ); return HTML; } /** * Converts an HTML string to known blocks. Strips everything else. * * @param {Object} options * @param {string} [options.HTML] The HTML to convert. * @param {string} [options.plainText] Plain text version. * @param {string} [options.mode] Handle content as blocks or inline content. * * 'AUTO': Decide based on the content passed. * * 'INLINE': Always handle as inline content, and return string. * * 'BLOCKS': Always handle as blocks, and return array of blocks. * @param {Array} [options.tagName] The tag into which content will be inserted. * * @return {Array|string} A list of blocks or a string, depending on `handlerMode`. */ export function pasteHandler( { HTML = '', plainText = '', mode = 'AUTO', tagName, } ) { // First of all, strip any meta tags. HTML = HTML.replace( /<meta[^>]+>/g, '' ); // Strip Windows markers. HTML = HTML.replace( /^\s*<html[^>]*>\s*<body[^>]*>(?:\s*<!--\s*StartFragment\s*-->)?/i, '' ); HTML = HTML.replace( /(?:<!--\s*EndFragment\s*-->\s*)?<\/body>\s*<\/html>\s*$/i, '' ); // If we detect block delimiters in HTML, parse entirely as blocks. if ( mode !== 'INLINE' ) { // Check plain text if there is no HTML. const content = HTML ? HTML : plainText; if ( content.indexOf( '<!-- wp:' ) !== -1 ) { const parseResult = parse( content ); const isSingleFreeFormBlock = parseResult.length === 1 && parseResult[ 0 ].name === 'core/freeform'; if ( ! isSingleFreeFormBlock ) { return parseResult; } } } // Normalize unicode to use composed characters. // Not normalizing the content will only affect older browsers and won't // entirely break the app. // See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/normalize // See: https://core.trac.wordpress.org/ticket/30130 // See: https://github.com/WordPress/gutenberg/pull/6983#pullrequestreview-125151075 if ( String.prototype.normalize ) { HTML = HTML.normalize(); } // Must be run before checking if it's inline content. HTML = deepFilterHTML( HTML, [ slackParagraphCorrector ] ); // Consider plain text if: // * There is a plain text version. // * There is no HTML version, or it has no formatting. const isPlainText = plainText && ( ! HTML || isPlain( HTML ) ); // Parse Markdown (and encoded HTML) if it's considered plain text. if ( isPlainText ) { HTML = plainText; // The markdown converter (Showdown) trims whitespace. if ( ! /^\s+$/.test( plainText ) ) { HTML = markdownConverter( HTML ); } } // An array of HTML strings and block objects. The blocks replace matched // shortcodes. const pieces = shortcodeConverter( HTML ); // The call to shortcodeConverter will always return more than one element // if shortcodes are matched. The reason is when shortcodes are matched // empty HTML strings are included. const hasShortcodes = pieces.length > 1; if ( isPlainText && ! hasShortcodes ) { // Switch to inline mode if: // * The current mode is AUTO. // * The original plain text had no line breaks. // * The original plain text was not an HTML paragraph. // * The converted text is just a paragraph. if ( mode === 'AUTO' && plainText.indexOf( '\n' ) === -1 && plainText.indexOf( '<p>' ) !== 0 && HTML.indexOf( '<p>' ) === 0 ) { mode = 'INLINE'; } } if ( mode === 'INLINE' ) { return filterInlineHTML( HTML ); } if ( mode === 'AUTO' && ! hasShortcodes && isInlineContent( HTML, tagName ) ) { return filterInlineHTML( HTML ); } const phrasingContentSchema = getPhrasingContentSchema( 'paste' ); const blockContentSchema = getBlockContentSchema( 'paste' ); const blocks = pieces .map( ( piece ) => { // Already a block from shortcode. if ( typeof piece !== 'string' ) { return piece; } const filters = [ googleDocsUIDRemover, msListConverter, headRemover, listReducer, imageCorrector, phrasingContentReducer, specialCommentConverter, commentRemover, iframeRemover, figureContentReducer, blockquoteNormaliser(), divNormaliser, ]; const schema = { ...blockContentSchema, // Keep top-level phrasing content, normalised by `normaliseBlocks`. ...phrasingContentSchema, }; piece = deepFilterHTML( piece, filters, blockContentSchema ); piece = removeInvalidHTML( piece, schema ); piece = normaliseBlocks( piece ); piece = deepFilterHTML( piece, [ htmlFormattingRemover, brRemover, emptyParagraphRemover ], blockContentSchema ); // Allows us to ask for this information when we get a report. log( 'Processed HTML piece:\n\n', piece ); return htmlToBlocks( piece, pasteHandler ); } ) .flat() .filter( Boolean ); // If we're allowed to return inline content, and there is only one // inlineable block, and the original plain text content does not have any // line breaks, then treat it as inline paste. if ( mode === 'AUTO' && blocks.length === 1 && hasBlockSupport( blocks[ 0 ].name, '__unstablePasteTextInline', false ) ) { const trimRegex = /^[\n]+|[\n]+$/g; // Don't catch line breaks at the start or end. const trimmedPlainText = plainText.replace( trimRegex, '' ); if ( trimmedPlainText !== '' && trimmedPlainText.indexOf( '\n' ) === -1 ) { return removeInvalidHTML( getBlockInnerHTML( blocks[ 0 ] ), phrasingContentSchema ).replace( trimRegex, '' ); } } return blocks; }