UNPKG

@wordpress/blocks

Version:
8 lines (7 loc) 12 kB
{ "version": 3, "sources": ["../../../src/api/raw-handling/paste-handler.js"], "sourcesContent": ["/**\n * WordPress dependencies\n */\nimport { getPhrasingContentSchema, removeInvalidHTML } from '@wordpress/dom';\n\n/**\n * Internal dependencies\n */\nimport { htmlToBlocks } from './html-to-blocks';\nimport { hasBlockSupport } from '../registration';\nimport { getBlockInnerHTML } from '../serializer';\nimport parse from '../parser';\nimport normaliseBlocks from './normalise-blocks';\nimport specialCommentConverter from './special-comment-converter';\nimport commentRemover from './comment-remover';\nimport isInlineContent from './is-inline-content';\nimport phrasingContentReducer from './phrasing-content-reducer';\nimport headRemover from './head-remover';\nimport msListConverter from './ms-list-converter';\nimport msListIgnore from './ms-list-ignore';\nimport listReducer from './list-reducer';\nimport imageCorrector from './image-corrector';\nimport blockquoteNormaliser from './blockquote-normaliser';\nimport divNormaliser from './div-normaliser';\nimport figureContentReducer from './figure-content-reducer';\nimport shortcodeConverter from './shortcode-converter';\nimport markdownConverter from './markdown-converter';\nimport iframeRemover from './iframe-remover';\nimport googleDocsUIDRemover from './google-docs-uid-remover';\nimport htmlFormattingRemover from './html-formatting-remover';\nimport brRemover from './br-remover';\nimport { deepFilterHTML, isPlain, getBlockContentSchema } from './utils';\nimport emptyParagraphRemover from './empty-paragraph-remover';\nimport slackParagraphCorrector from './slack-paragraph-corrector';\nimport isLatexMathMode from './latex-to-math';\nimport { createBlock } from '../factory';\nimport headingTransformer from './heading-transformer';\n\nconst log = ( ...args ) => window?.console?.log?.( ...args );\n\n/**\n * Filters HTML to only contain phrasing content.\n *\n * @param {string} HTML The HTML to filter.\n *\n * @return {string} HTML only containing phrasing content.\n */\nfunction filterInlineHTML( HTML ) {\n\tHTML = deepFilterHTML( HTML, [\n\t\theadRemover,\n\t\tgoogleDocsUIDRemover,\n\t\tmsListIgnore,\n\t\tphrasingContentReducer,\n\t\tcommentRemover,\n\t] );\n\tHTML = removeInvalidHTML( HTML, getPhrasingContentSchema( 'paste' ), {\n\t\tinline: true,\n\t} );\n\n\tHTML = deepFilterHTML( HTML, [ htmlFormattingRemover, brRemover ] );\n\n\t// Allows us to ask for this information when we get a report.\n\tlog( 'Processed inline HTML:\\n\\n', HTML );\n\n\treturn HTML;\n}\n\n/**\n * Converts an HTML string to known blocks. Strips everything else.\n *\n * @param {Object} options\n * @param {string} [options.HTML] The HTML to convert.\n * @param {string} [options.plainText] Plain text version.\n * @param {string} [options.mode] Handle content as blocks or inline content.\n * * 'AUTO': Decide based on the content passed.\n * * 'INLINE': Always handle as inline content, and return string.\n * * 'BLOCKS': Always handle as blocks, and return array of blocks.\n * @param {Array} [options.tagName] The tag into which content will be inserted.\n *\n * @return {Array|string} A list of blocks or a string, depending on `handlerMode`.\n */\nexport function pasteHandler( {\n\tHTML = '',\n\tplainText = '',\n\tmode = 'AUTO',\n\ttagName,\n} ) {\n\t// Allows us to ask for this information when we get a report.\n\tlog( 'Received HTML (pasteHandler):\\n\\n', HTML );\n\tlog( 'Received plain text (pasteHandler):\\n\\n', plainText );\n\n\t// First of all, strip any meta tags.\n\tHTML = HTML.replace( /<meta[^>]+>/g, '' );\n\t// Strip Windows markers.\n\tHTML = HTML.replace(\n\t\t/^\\s*<html[^>]*>\\s*<body[^>]*>(?:\\s*<!--\\s*StartFragment\\s*-->)?/i,\n\t\t''\n\t);\n\tHTML = HTML.replace(\n\t\t/(?:<!--\\s*EndFragment\\s*-->\\s*)?<\\/body>\\s*<\\/html>\\s*$/i,\n\t\t''\n\t);\n\n\t// If we detect block delimiters in HTML, parse entirely as blocks.\n\tif ( mode !== 'INLINE' ) {\n\t\t// Check plain text if there is no HTML.\n\t\tconst content = HTML ? HTML : plainText;\n\n\t\tif ( content.indexOf( '<!-- wp:' ) !== -1 ) {\n\t\t\tconst parseResult = parse( content );\n\t\t\tconst isSingleFreeFormBlock =\n\t\t\t\tparseResult.length === 1 &&\n\t\t\t\tparseResult[ 0 ].name === 'core/freeform';\n\t\t\tif ( ! isSingleFreeFormBlock ) {\n\t\t\t\treturn parseResult;\n\t\t\t}\n\t\t}\n\t}\n\n\t// Normalize unicode to use composed characters.\n\t// Not normalizing the content will only affect older browsers and won't\n\t// entirely break the app.\n\t// See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/normalize\n\t// See: https://core.trac.wordpress.org/ticket/30130\n\t// See: https://github.com/WordPress/gutenberg/pull/6983#pullrequestreview-125151075\n\tif ( String.prototype.normalize ) {\n\t\tHTML = HTML.normalize();\n\t}\n\n\t// Must be run before checking if it's inline content.\n\tHTML = deepFilterHTML( HTML, [ slackParagraphCorrector ] );\n\n\t// Consider plain text if:\n\t// * There is a plain text version.\n\t// * There is no HTML version, or it has no formatting.\n\tconst isPlainText = plainText && ( ! HTML || isPlain( HTML ) );\n\n\tif ( isPlainText && isLatexMathMode( plainText ) ) {\n\t\treturn [ createBlock( 'core/math', { latex: plainText } ) ];\n\t}\n\n\t// Parse Markdown (and encoded HTML) if it's considered plain text.\n\tif ( isPlainText ) {\n\t\tHTML = plainText;\n\n\t\t// The markdown converter (Showdown) trims whitespace.\n\t\tif ( ! /^\\s+$/.test( plainText ) ) {\n\t\t\tHTML = markdownConverter( HTML );\n\t\t}\n\t}\n\n\t// An array of HTML strings and block objects. The blocks replace matched\n\t// shortcodes.\n\tconst pieces = shortcodeConverter( HTML );\n\n\t// The call to shortcodeConverter will always return more than one element\n\t// if shortcodes are matched. The reason is when shortcodes are matched\n\t// empty HTML strings are included.\n\tconst hasShortcodes = pieces.length > 1;\n\n\tif ( isPlainText && ! hasShortcodes ) {\n\t\t// Switch to inline mode if:\n\t\t// * The current mode is AUTO.\n\t\t// * The original plain text had no line breaks.\n\t\t// * The original plain text was not an HTML paragraph.\n\t\t// * The converted text is just a paragraph.\n\t\tif (\n\t\t\tmode === 'AUTO' &&\n\t\t\tplainText.indexOf( '\\n' ) === -1 &&\n\t\t\tplainText.indexOf( '<p>' ) !== 0 &&\n\t\t\tHTML.indexOf( '<p>' ) === 0\n\t\t) {\n\t\t\tmode = 'INLINE';\n\t\t}\n\t}\n\n\tif ( mode === 'INLINE' ) {\n\t\treturn filterInlineHTML( HTML );\n\t}\n\n\tif (\n\t\tmode === 'AUTO' &&\n\t\t! hasShortcodes &&\n\t\tisInlineContent( HTML, tagName )\n\t) {\n\t\treturn filterInlineHTML( HTML );\n\t}\n\n\tconst phrasingContentSchema = getPhrasingContentSchema( 'paste' );\n\tconst blockContentSchema = getBlockContentSchema( 'paste' );\n\n\tconst blocks = pieces\n\t\t.map( ( piece ) => {\n\t\t\t// Already a block from shortcode.\n\t\t\tif ( typeof piece !== 'string' ) {\n\t\t\t\treturn piece;\n\t\t\t}\n\n\t\t\tconst filters = [\n\t\t\t\tgoogleDocsUIDRemover,\n\t\t\t\tmsListConverter,\n\t\t\t\theadRemover,\n\t\t\t\tlistReducer,\n\t\t\t\timageCorrector,\n\t\t\t\tphrasingContentReducer,\n\t\t\t\tspecialCommentConverter,\n\t\t\t\tcommentRemover,\n\t\t\t\tiframeRemover,\n\t\t\t\tfigureContentReducer,\n\t\t\t\tblockquoteNormaliser(),\n\t\t\t\tdivNormaliser,\n\t\t\t\theadingTransformer,\n\t\t\t];\n\n\t\t\tconst schema = {\n\t\t\t\t...blockContentSchema,\n\t\t\t\t// Keep top-level phrasing content, normalised by `normaliseBlocks`.\n\t\t\t\t...phrasingContentSchema,\n\t\t\t};\n\n\t\t\tpiece = deepFilterHTML( piece, filters, blockContentSchema );\n\t\t\tpiece = removeInvalidHTML( piece, schema );\n\t\t\tpiece = normaliseBlocks( piece );\n\t\t\tpiece = deepFilterHTML(\n\t\t\t\tpiece,\n\t\t\t\t[ htmlFormattingRemover, brRemover, emptyParagraphRemover ],\n\t\t\t\tblockContentSchema\n\t\t\t);\n\n\t\t\t// Allows us to ask for this information when we get a report.\n\t\t\tlog( 'Processed HTML piece:\\n\\n', piece );\n\n\t\t\treturn htmlToBlocks( piece, pasteHandler );\n\t\t} )\n\t\t.flat()\n\t\t.filter( Boolean );\n\n\t// If we're allowed to return inline content, and there is only one\n\t// inlineable block, and the original plain text content does not have any\n\t// line breaks, then treat it as inline paste.\n\tif (\n\t\tmode === 'AUTO' &&\n\t\tblocks.length === 1 &&\n\t\thasBlockSupport( blocks[ 0 ].name, '__unstablePasteTextInline', false )\n\t) {\n\t\tconst trimRegex = /^[\\n]+|[\\n]+$/g;\n\t\t// Don't catch line breaks at the start or end.\n\t\tconst trimmedPlainText = plainText.replace( trimRegex, '' );\n\n\t\tif (\n\t\t\ttrimmedPlainText !== '' &&\n\t\t\ttrimmedPlainText.indexOf( '\\n' ) === -1\n\t\t) {\n\t\t\treturn removeInvalidHTML(\n\t\t\t\tgetBlockInnerHTML( blocks[ 0 ] ),\n\t\t\t\tphrasingContentSchema\n\t\t\t).replace( trimRegex, '' );\n\t\t}\n\t}\n\n\treturn blocks;\n}\n"], "mappings": ";AAGA,SAAS,0BAA0B,yBAAyB;AAK5D,SAAS,oBAAoB;AAC7B,SAAS,uBAAuB;AAChC,SAAS,yBAAyB;AAClC,OAAO,WAAW;AAClB,OAAO,qBAAqB;AAC5B,OAAO,6BAA6B;AACpC,OAAO,oBAAoB;AAC3B,OAAO,qBAAqB;AAC5B,OAAO,4BAA4B;AACnC,OAAO,iBAAiB;AACxB,OAAO,qBAAqB;AAC5B,OAAO,kBAAkB;AACzB,OAAO,iBAAiB;AACxB,OAAO,oBAAoB;AAC3B,OAAO,0BAA0B;AACjC,OAAO,mBAAmB;AAC1B,OAAO,0BAA0B;AACjC,OAAO,wBAAwB;AAC/B,OAAO,uBAAuB;AAC9B,OAAO,mBAAmB;AAC1B,OAAO,0BAA0B;AACjC,OAAO,2BAA2B;AAClC,OAAO,eAAe;AACtB,SAAS,gBAAgB,SAAS,6BAA6B;AAC/D,OAAO,2BAA2B;AAClC,OAAO,6BAA6B;AACpC,OAAO,qBAAqB;AAC5B,SAAS,mBAAmB;AAC5B,OAAO,wBAAwB;AAE/B,IAAM,MAAM,IAAK,SAAU,QAAQ,SAAS,MAAO,GAAG,IAAK;AAS3D,SAAS,iBAAkB,MAAO;AACjC,SAAO,eAAgB,MAAM;AAAA,IAC5B;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACD,CAAE;AACF,SAAO,kBAAmB,MAAM,yBAA0B,OAAQ,GAAG;AAAA,IACpE,QAAQ;AAAA,EACT,CAAE;AAEF,SAAO,eAAgB,MAAM,CAAE,uBAAuB,SAAU,CAAE;AAGlE,MAAK,8BAA8B,IAAK;AAExC,SAAO;AACR;AAgBO,SAAS,aAAc;AAAA,EAC7B,OAAO;AAAA,EACP,YAAY;AAAA,EACZ,OAAO;AAAA,EACP;AACD,GAAI;AAEH,MAAK,qCAAqC,IAAK;AAC/C,MAAK,2CAA2C,SAAU;AAG1D,SAAO,KAAK,QAAS,gBAAgB,EAAG;AAExC,SAAO,KAAK;AAAA,IACX;AAAA,IACA;AAAA,EACD;AACA,SAAO,KAAK;AAAA,IACX;AAAA,IACA;AAAA,EACD;AAGA,MAAK,SAAS,UAAW;AAExB,UAAM,UAAU,OAAO,OAAO;AAE9B,QAAK,QAAQ,QAAS,UAAW,MAAM,IAAK;AAC3C,YAAM,cAAc,MAAO,OAAQ;AACnC,YAAM,wBACL,YAAY,WAAW,KACvB,YAAa,CAAE,EAAE,SAAS;AAC3B,UAAK,CAAE,uBAAwB;AAC9B,eAAO;AAAA,MACR;AAAA,IACD;AAAA,EACD;AAQA,MAAK,OAAO,UAAU,WAAY;AACjC,WAAO,KAAK,UAAU;AAAA,EACvB;AAGA,SAAO,eAAgB,MAAM,CAAE,uBAAwB,CAAE;AAKzD,QAAM,cAAc,cAAe,CAAE,QAAQ,QAAS,IAAK;AAE3D,MAAK,eAAe,gBAAiB,SAAU,GAAI;AAClD,WAAO,CAAE,YAAa,aAAa,EAAE,OAAO,UAAU,CAAE,CAAE;AAAA,EAC3D;AAGA,MAAK,aAAc;AAClB,WAAO;AAGP,QAAK,CAAE,QAAQ,KAAM,SAAU,GAAI;AAClC,aAAO,kBAAmB,IAAK;AAAA,IAChC;AAAA,EACD;AAIA,QAAM,SAAS,mBAAoB,IAAK;AAKxC,QAAM,gBAAgB,OAAO,SAAS;AAEtC,MAAK,eAAe,CAAE,eAAgB;AAMrC,QACC,SAAS,UACT,UAAU,QAAS,IAAK,MAAM,MAC9B,UAAU,QAAS,KAAM,MAAM,KAC/B,KAAK,QAAS,KAAM,MAAM,GACzB;AACD,aAAO;AAAA,IACR;AAAA,EACD;AAEA,MAAK,SAAS,UAAW;AACxB,WAAO,iBAAkB,IAAK;AAAA,EAC/B;AAEA,MACC,SAAS,UACT,CAAE,iBACF,gBAAiB,MAAM,OAAQ,GAC9B;AACD,WAAO,iBAAkB,IAAK;AAAA,EAC/B;AAEA,QAAM,wBAAwB,yBAA0B,OAAQ;AAChE,QAAM,qBAAqB,sBAAuB,OAAQ;AAE1D,QAAM,SAAS,OACb,IAAK,CAAE,UAAW;AAElB,QAAK,OAAO,UAAU,UAAW;AAChC,aAAO;AAAA,IACR;AAEA,UAAM,UAAU;AAAA,MACf;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,qBAAqB;AAAA,MACrB;AAAA,MACA;AAAA,IACD;AAEA,UAAM,SAAS;AAAA,MACd,GAAG;AAAA;AAAA,MAEH,GAAG;AAAA,IACJ;AAEA,YAAQ,eAAgB,OAAO,SAAS,kBAAmB;AAC3D,YAAQ,kBAAmB,OAAO,MAAO;AACzC,YAAQ,gBAAiB,KAAM;AAC/B,YAAQ;AAAA,MACP;AAAA,MACA,CAAE,uBAAuB,WAAW,qBAAsB;AAAA,MAC1D;AAAA,IACD;AAGA,QAAK,6BAA6B,KAAM;AAExC,WAAO,aAAc,OAAO,YAAa;AAAA,EAC1C,CAAE,EACD,KAAK,EACL,OAAQ,OAAQ;AAKlB,MACC,SAAS,UACT,OAAO,WAAW,KAClB,gBAAiB,OAAQ,CAAE,EAAE,MAAM,6BAA6B,KAAM,GACrE;AACD,UAAM,YAAY;AAElB,UAAM,mBAAmB,UAAU,QAAS,WAAW,EAAG;AAE1D,QACC,qBAAqB,MACrB,iBAAiB,QAAS,IAAK,MAAM,IACpC;AACD,aAAO;AAAA,QACN,kBAAmB,OAAQ,CAAE,CAAE;AAAA,QAC/B;AAAA,MACD,EAAE,QAAS,WAAW,EAAG;AAAA,IAC1B;AAAA,EACD;AAEA,SAAO;AACR;", "names": [] }