@wordpress/blocks
Version:
Block API for WordPress.
188 lines (158 loc) • 7.46 kB
JavaScript
/**
* External dependencies
*/
import { flatMap, compact } from 'lodash';
/**
* WordPress dependencies
*/
import { getPhrasingContentSchema, removeInvalidHTML } from '@wordpress/dom';
/**
* Internal dependencies
*/
import { htmlToBlocks } from './html-to-blocks';
import { hasBlockSupport } from '../registration';
import { getBlockInnerHTML } from '../serializer';
import { parseWithGrammar } from '../parser';
import normaliseBlocks from './normalise-blocks';
import specialCommentConverter from './special-comment-converter';
import commentRemover from './comment-remover';
import isInlineContent from './is-inline-content';
import phrasingContentReducer from './phrasing-content-reducer';
import headRemover from './head-remover';
import msListConverter from './ms-list-converter';
import listReducer from './list-reducer';
import imageCorrector from './image-corrector';
import blockquoteNormaliser from './blockquote-normaliser';
import figureContentReducer from './figure-content-reducer';
import shortcodeConverter from './shortcode-converter';
import markdownConverter from './markdown-converter';
import iframeRemover from './iframe-remover';
import googleDocsUIDRemover from './google-docs-uid-remover';
import htmlFormattingRemover from './html-formatting-remover';
import brRemover from './br-remover';
import { deepFilterHTML, isPlain, getBlockContentSchema } from './utils';
import emptyParagraphRemover from './empty-paragraph-remover';
/**
* Browser dependencies
*/
const {
console
} = window;
/**
* Filters HTML to only contain phrasing content.
*
* @param {string} HTML The HTML to filter.
* @param {boolean} preserveWhiteSpace Whether or not to preserve consequent white space.
*
* @return {string} HTML only containing phrasing content.
*/
function filterInlineHTML(HTML, preserveWhiteSpace) {
HTML = deepFilterHTML(HTML, [googleDocsUIDRemover, phrasingContentReducer, commentRemover]);
HTML = removeInvalidHTML(HTML, getPhrasingContentSchema('paste'), {
inline: true
});
if (!preserveWhiteSpace) {
HTML = deepFilterHTML(HTML, [htmlFormattingRemover, brRemover]);
} // Allows us to ask for this information when we get a report.
console.log('Processed inline HTML:\n\n', HTML);
return HTML;
}
/**
* Converts an HTML string to known blocks. Strips everything else.
*
* @param {Object} options
* @param {string} [options.HTML] The HTML to convert.
* @param {string} [options.plainText] Plain text version.
* @param {string} [options.mode] Handle content as blocks or inline content.
* * 'AUTO': Decide based on the content passed.
* * 'INLINE': Always handle as inline content, and return string.
* * 'BLOCKS': Always handle as blocks, and return array of blocks.
* @param {Array} [options.tagName] The tag into which content will be inserted.
* @param {boolean} [options.preserveWhiteSpace] Whether or not to preserve consequent white space.
*
* @return {Array|string} A list of blocks or a string, depending on `handlerMode`.
*/
export function pasteHandler({
HTML = '',
plainText = '',
mode = 'AUTO',
tagName,
preserveWhiteSpace
}) {
// First of all, strip any meta tags.
HTML = HTML.replace(/<meta[^>]+>/g, ''); // Strip Windows markers.
HTML = HTML.replace(/^\s*<html[^>]*>\s*<body[^>]*>(?:\s*<!--\s*StartFragment\s*-->)?/i, '');
HTML = HTML.replace(/(?:<!--\s*EndFragment\s*-->\s*)?<\/body>\s*<\/html>\s*$/i, ''); // If we detect block delimiters in HTML, parse entirely as blocks.
if (mode !== 'INLINE') {
// Check plain text if there is no HTML.
const content = HTML ? HTML : plainText;
if (content.indexOf('<!-- wp:') !== -1) {
return parseWithGrammar(content);
}
} // Normalize unicode to use composed characters.
// This is unsupported in IE 11 but it's a nice-to-have feature, not mandatory.
// Not normalizing the content will only affect older browsers and won't
// entirely break the app.
// See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/normalize
// See: https://core.trac.wordpress.org/ticket/30130
// See: https://github.com/WordPress/gutenberg/pull/6983#pullrequestreview-125151075
if (String.prototype.normalize) {
HTML = HTML.normalize();
} // Parse Markdown (and encoded HTML) if:
// * There is a plain text version.
// * There is no HTML version, or it has no formatting.
if (plainText && (!HTML || isPlain(HTML))) {
HTML = plainText; // The markdown converter (Showdown) trims whitespace.
if (!/^\s+$/.test(plainText)) {
HTML = markdownConverter(HTML);
} // Switch to inline mode if:
// * The current mode is AUTO.
// * The original plain text had no line breaks.
// * The original plain text was not an HTML paragraph.
// * The converted text is just a paragraph.
if (mode === 'AUTO' && plainText.indexOf('\n') === -1 && plainText.indexOf('<p>') !== 0 && HTML.indexOf('<p>') === 0) {
mode = 'INLINE';
}
}
if (mode === 'INLINE') {
return filterInlineHTML(HTML, preserveWhiteSpace);
} // An array of HTML strings and block objects. The blocks replace matched
// shortcodes.
const pieces = shortcodeConverter(HTML); // The call to shortcodeConverter will always return more than one element
// if shortcodes are matched. The reason is when shortcodes are matched
// empty HTML strings are included.
const hasShortcodes = pieces.length > 1;
if (mode === 'AUTO' && !hasShortcodes && isInlineContent(HTML, tagName)) {
return filterInlineHTML(HTML, preserveWhiteSpace);
}
const phrasingContentSchema = getPhrasingContentSchema('paste');
const blockContentSchema = getBlockContentSchema('paste');
const blocks = compact(flatMap(pieces, piece => {
// Already a block from shortcode.
if (typeof piece !== 'string') {
return piece;
}
const filters = [googleDocsUIDRemover, msListConverter, headRemover, listReducer, imageCorrector, phrasingContentReducer, specialCommentConverter, commentRemover, iframeRemover, figureContentReducer, blockquoteNormaliser];
const schema = { ...blockContentSchema,
// Keep top-level phrasing content, normalised by `normaliseBlocks`.
...phrasingContentSchema
};
piece = deepFilterHTML(piece, filters, blockContentSchema);
piece = removeInvalidHTML(piece, schema);
piece = normaliseBlocks(piece);
piece = deepFilterHTML(piece, [htmlFormattingRemover, brRemover, emptyParagraphRemover], blockContentSchema); // Allows us to ask for this information when we get a report.
console.log('Processed HTML piece:\n\n', piece);
return htmlToBlocks(piece);
})); // If we're allowed to return inline content, and there is only one
// inlineable block, and the original plain text content does not have any
// line breaks, then treat it as inline paste.
if (mode === 'AUTO' && blocks.length === 1 && hasBlockSupport(blocks[0].name, '__unstablePasteTextInline', false)) {
// Don't catch line breaks at the start or end.
const trimmedPlainText = plainText.replace(/^[\n]+|[\n]+$/g, '');
if (trimmedPlainText !== '' && trimmedPlainText.indexOf('\n') === -1) {
return removeInvalidHTML(getBlockInnerHTML(blocks[0]), phrasingContentSchema);
}
}
return blocks;
}
//# sourceMappingURL=paste-handler.js.map