@wordpress/blocks
Version:
Block API for WordPress.
183 lines (169 loc) • 7.68 kB
JavaScript
/**
* WordPress dependencies
*/
import { getPhrasingContentSchema, removeInvalidHTML } from '@wordpress/dom';
/**
* Internal dependencies
*/
import { htmlToBlocks } from './html-to-blocks';
import { hasBlockSupport } from '../registration';
import { getBlockInnerHTML } from '../serializer';
import parse from '../parser';
import normaliseBlocks from './normalise-blocks';
import specialCommentConverter from './special-comment-converter';
import commentRemover from './comment-remover';
import isInlineContent from './is-inline-content';
import phrasingContentReducer from './phrasing-content-reducer';
import headRemover from './head-remover';
import msListConverter from './ms-list-converter';
import msListIgnore from './ms-list-ignore';
import listReducer from './list-reducer';
import imageCorrector from './image-corrector';
import blockquoteNormaliser from './blockquote-normaliser';
import divNormaliser from './div-normaliser';
import figureContentReducer from './figure-content-reducer';
import shortcodeConverter from './shortcode-converter';
import markdownConverter from './markdown-converter';
import iframeRemover from './iframe-remover';
import googleDocsUIDRemover from './google-docs-uid-remover';
import htmlFormattingRemover from './html-formatting-remover';
import brRemover from './br-remover';
import { deepFilterHTML, isPlain, getBlockContentSchema } from './utils';
import emptyParagraphRemover from './empty-paragraph-remover';
import slackParagraphCorrector from './slack-paragraph-corrector';
const log = (...args) => window?.console?.log?.(...args);
/**
* Filters HTML to only contain phrasing content.
*
* @param {string} HTML The HTML to filter.
*
* @return {string} HTML only containing phrasing content.
*/
function filterInlineHTML(HTML) {
HTML = deepFilterHTML(HTML, [headRemover, googleDocsUIDRemover, msListIgnore, phrasingContentReducer, commentRemover]);
HTML = removeInvalidHTML(HTML, getPhrasingContentSchema('paste'), {
inline: true
});
HTML = deepFilterHTML(HTML, [htmlFormattingRemover, brRemover]);
// Allows us to ask for this information when we get a report.
log('Processed inline HTML:\n\n', HTML);
return HTML;
}
/**
* Converts an HTML string to known blocks. Strips everything else.
*
* @param {Object} options
* @param {string} [options.HTML] The HTML to convert.
* @param {string} [options.plainText] Plain text version.
* @param {string} [options.mode] Handle content as blocks or inline content.
* * 'AUTO': Decide based on the content passed.
* * 'INLINE': Always handle as inline content, and return string.
* * 'BLOCKS': Always handle as blocks, and return array of blocks.
* @param {Array} [options.tagName] The tag into which content will be inserted.
*
* @return {Array|string} A list of blocks or a string, depending on `handlerMode`.
*/
export function pasteHandler({
HTML = '',
plainText = '',
mode = 'AUTO',
tagName
}) {
// First of all, strip any meta tags.
HTML = HTML.replace(/<meta[^>]+>/g, '');
// Strip Windows markers.
HTML = HTML.replace(/^\s*<html[^>]*>\s*<body[^>]*>(?:\s*<!--\s*StartFragment\s*-->)?/i, '');
HTML = HTML.replace(/(?:<!--\s*EndFragment\s*-->\s*)?<\/body>\s*<\/html>\s*$/i, '');
// If we detect block delimiters in HTML, parse entirely as blocks.
if (mode !== 'INLINE') {
// Check plain text if there is no HTML.
const content = HTML ? HTML : plainText;
if (content.indexOf('<!-- wp:') !== -1) {
const parseResult = parse(content);
const isSingleFreeFormBlock = parseResult.length === 1 && parseResult[0].name === 'core/freeform';
if (!isSingleFreeFormBlock) {
return parseResult;
}
}
}
// Normalize unicode to use composed characters.
// Not normalizing the content will only affect older browsers and won't
// entirely break the app.
// See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/normalize
// See: https://core.trac.wordpress.org/ticket/30130
// See: https://github.com/WordPress/gutenberg/pull/6983#pullrequestreview-125151075
if (String.prototype.normalize) {
HTML = HTML.normalize();
}
// Must be run before checking if it's inline content.
HTML = deepFilterHTML(HTML, [slackParagraphCorrector]);
// Consider plain text if:
// * There is a plain text version.
// * There is no HTML version, or it has no formatting.
const isPlainText = plainText && (!HTML || isPlain(HTML));
// Parse Markdown (and encoded HTML) if it's considered plain text.
if (isPlainText) {
HTML = plainText;
// The markdown converter (Showdown) trims whitespace.
if (!/^\s+$/.test(plainText)) {
HTML = markdownConverter(HTML);
}
}
// An array of HTML strings and block objects. The blocks replace matched
// shortcodes.
const pieces = shortcodeConverter(HTML);
// The call to shortcodeConverter will always return more than one element
// if shortcodes are matched. The reason is when shortcodes are matched
// empty HTML strings are included.
const hasShortcodes = pieces.length > 1;
if (isPlainText && !hasShortcodes) {
// Switch to inline mode if:
// * The current mode is AUTO.
// * The original plain text had no line breaks.
// * The original plain text was not an HTML paragraph.
// * The converted text is just a paragraph.
if (mode === 'AUTO' && plainText.indexOf('\n') === -1 && plainText.indexOf('<p>') !== 0 && HTML.indexOf('<p>') === 0) {
mode = 'INLINE';
}
}
if (mode === 'INLINE') {
return filterInlineHTML(HTML);
}
if (mode === 'AUTO' && !hasShortcodes && isInlineContent(HTML, tagName)) {
return filterInlineHTML(HTML);
}
const phrasingContentSchema = getPhrasingContentSchema('paste');
const blockContentSchema = getBlockContentSchema('paste');
const blocks = pieces.map(piece => {
// Already a block from shortcode.
if (typeof piece !== 'string') {
return piece;
}
const filters = [googleDocsUIDRemover, msListConverter, headRemover, listReducer, imageCorrector, phrasingContentReducer, specialCommentConverter, commentRemover, iframeRemover, figureContentReducer, blockquoteNormaliser(), divNormaliser];
const schema = {
...blockContentSchema,
// Keep top-level phrasing content, normalised by `normaliseBlocks`.
...phrasingContentSchema
};
piece = deepFilterHTML(piece, filters, blockContentSchema);
piece = removeInvalidHTML(piece, schema);
piece = normaliseBlocks(piece);
piece = deepFilterHTML(piece, [htmlFormattingRemover, brRemover, emptyParagraphRemover], blockContentSchema);
// Allows us to ask for this information when we get a report.
log('Processed HTML piece:\n\n', piece);
return htmlToBlocks(piece, pasteHandler);
}).flat().filter(Boolean);
// If we're allowed to return inline content, and there is only one
// inlineable block, and the original plain text content does not have any
// line breaks, then treat it as inline paste.
if (mode === 'AUTO' && blocks.length === 1 && hasBlockSupport(blocks[0].name, '__unstablePasteTextInline', false)) {
const trimRegex = /^[\n]+|[\n]+$/g;
// Don't catch line breaks at the start or end.
const trimmedPlainText = plainText.replace(trimRegex, '');
if (trimmedPlainText !== '' && trimmedPlainText.indexOf('\n') === -1) {
return removeInvalidHTML(getBlockInnerHTML(blocks[0]), phrasingContentSchema).replace(trimRegex, '');
}
}
return blocks;
}
//# sourceMappingURL=paste-handler.js.map