UNPKG

@wordpress/blocks

Version:
804 lines (732 loc) 22.9 kB
/** * External dependencies */ import { Tokenizer } from 'simple-html-tokenizer'; import fastDeepEqual from 'fast-deep-equal/es6'; /** * WordPress dependencies */ import deprecated from '@wordpress/deprecated'; import { decodeEntities } from '@wordpress/html-entities'; /** * Internal dependencies */ import { createLogger, createQueuedLogger } from './logger'; import { getSaveContent } from '../serializer'; import { getFreeformContentHandlerName, getUnregisteredTypeHandlerName, } from '../registration'; import { normalizeBlockType } from '../utils'; /** @typedef {import('../parser').WPBlock} WPBlock */ /** @typedef {import('../registration').WPBlockType} WPBlockType */ /** @typedef {import('./logger').LoggerItem} LoggerItem */ const identity = ( x ) => x; /** * Globally matches any consecutive whitespace * * @type {RegExp} */ const REGEXP_WHITESPACE = /[\t\n\r\v\f ]+/g; /** * Matches a string containing only whitespace * * @type {RegExp} */ const REGEXP_ONLY_WHITESPACE = /^[\t\n\r\v\f ]*$/; /** * Matches a CSS URL type value * * @type {RegExp} */ const REGEXP_STYLE_URL_TYPE = /^url\s*\(['"\s]*(.*?)['"\s]*\)$/; /** * Boolean attributes are attributes whose presence as being assigned is * meaningful, even if only empty. * * See: https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#boolean-attributes * Extracted from: https://html.spec.whatwg.org/multipage/indices.html#attributes-3 * * Object.keys( Array.from( document.querySelectorAll( '#attributes-1 > tbody > tr' ) ) * .filter( ( tr ) => tr.lastChild.textContent.indexOf( 'Boolean attribute' ) !== -1 ) * .reduce( ( result, tr ) => Object.assign( result, { * [ tr.firstChild.textContent.trim() ]: true * } ), {} ) ).sort(); * * @type {Array} */ const BOOLEAN_ATTRIBUTES = [ 'allowfullscreen', 'allowpaymentrequest', 'allowusermedia', 'async', 'autofocus', 'autoplay', 'checked', 'controls', 'default', 'defer', 'disabled', 'download', 'formnovalidate', 'hidden', 'ismap', 'itemscope', 'loop', 'multiple', 'muted', 'nomodule', 'novalidate', 'open', 'playsinline', 'readonly', 'required', 'reversed', 'selected', 'typemustmatch', ]; /** * Enumerated attributes are attributes which must be of a specific value form. * Like boolean attributes, these are meaningful if specified, even if not of a * valid enumerated value. * * See: https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#enumerated-attribute * Extracted from: https://html.spec.whatwg.org/multipage/indices.html#attributes-3 * * Object.keys( Array.from( document.querySelectorAll( '#attributes-1 > tbody > tr' ) ) * .filter( ( tr ) => /^("(.+?)";?\s*)+/.test( tr.lastChild.textContent.trim() ) ) * .reduce( ( result, tr ) => Object.assign( result, { * [ tr.firstChild.textContent.trim() ]: true * } ), {} ) ).sort(); * * @type {Array} */ const ENUMERATED_ATTRIBUTES = [ 'autocapitalize', 'autocomplete', 'charset', 'contenteditable', 'crossorigin', 'decoding', 'dir', 'draggable', 'enctype', 'formenctype', 'formmethod', 'http-equiv', 'inputmode', 'kind', 'method', 'preload', 'scope', 'shape', 'spellcheck', 'translate', 'type', 'wrap', ]; /** * Meaningful attributes are those who cannot be safely ignored when omitted in * one HTML markup string and not another. * * @type {Array} */ const MEANINGFUL_ATTRIBUTES = [ ...BOOLEAN_ATTRIBUTES, ...ENUMERATED_ATTRIBUTES, ]; /** * Array of functions which receive a text string on which to apply normalizing * behavior for consideration in text token equivalence, carefully ordered from * least-to-most expensive operations. * * @type {Array} */ const TEXT_NORMALIZATIONS = [ identity, getTextWithCollapsedWhitespace ]; /** * Regular expression matching a named character reference. In lieu of bundling * a full set of references, the pattern covers the minimal necessary to test * positively against the full set. * * "The ampersand must be followed by one of the names given in the named * character references section, using the same case." * * Tested against "12.5 Named character references": * * ``` * const references = Array.from( document.querySelectorAll( * '#named-character-references-table tr[id^=entity-] td:first-child' * ) ).map( ( code ) => code.textContent ) * references.every( ( reference ) => /^[\da-z]+$/i.test( reference ) ) * ``` * * @see https://html.spec.whatwg.org/multipage/syntax.html#character-references * @see https://html.spec.whatwg.org/multipage/named-characters.html#named-character-references * * @type {RegExp} */ const REGEXP_NAMED_CHARACTER_REFERENCE = /^[\da-z]+$/i; /** * Regular expression matching a decimal character reference. * * "The ampersand must be followed by a U+0023 NUMBER SIGN character (#), * followed by one or more ASCII digits, representing a base-ten integer" * * @see https://html.spec.whatwg.org/multipage/syntax.html#character-references * * @type {RegExp} */ const REGEXP_DECIMAL_CHARACTER_REFERENCE = /^#\d+$/; /** * Regular expression matching a hexadecimal character reference. * * "The ampersand must be followed by a U+0023 NUMBER SIGN character (#), which * must be followed by either a U+0078 LATIN SMALL LETTER X character (x) or a * U+0058 LATIN CAPITAL LETTER X character (X), which must then be followed by * one or more ASCII hex digits, representing a hexadecimal integer" * * @see https://html.spec.whatwg.org/multipage/syntax.html#character-references * * @type {RegExp} */ const REGEXP_HEXADECIMAL_CHARACTER_REFERENCE = /^#x[\da-f]+$/i; /** * Returns true if the given string is a valid character reference segment, or * false otherwise. The text should be stripped of `&` and `;` demarcations. * * @param {string} text Text to test. * * @return {boolean} Whether text is valid character reference. */ export function isValidCharacterReference( text ) { return ( REGEXP_NAMED_CHARACTER_REFERENCE.test( text ) || REGEXP_DECIMAL_CHARACTER_REFERENCE.test( text ) || REGEXP_HEXADECIMAL_CHARACTER_REFERENCE.test( text ) ); } /** * Substitute EntityParser class for `simple-html-tokenizer` which uses the * implementation of `decodeEntities` from `html-entities`, in order to avoid * bundling a massive named character reference. * * @see https://github.com/tildeio/simple-html-tokenizer/tree/HEAD/src/entity-parser.ts */ export class DecodeEntityParser { /** * Returns a substitute string for an entity string sequence between `&` * and `;`, or undefined if no substitution should occur. * * @param {string} entity Entity fragment discovered in HTML. * * @return {string | undefined} Entity substitute value. */ parse( entity ) { if ( isValidCharacterReference( entity ) ) { return decodeEntities( '&' + entity + ';' ); } } } /** * Given a specified string, returns an array of strings split by consecutive * whitespace, ignoring leading or trailing whitespace. * * @param {string} text Original text. * * @return {string[]} Text pieces split on whitespace. */ export function getTextPiecesSplitOnWhitespace( text ) { return text.trim().split( REGEXP_WHITESPACE ); } /** * Given a specified string, returns a new trimmed string where all consecutive * whitespace is collapsed to a single space. * * @param {string} text Original text. * * @return {string} Trimmed text with consecutive whitespace collapsed. */ export function getTextWithCollapsedWhitespace( text ) { // This is an overly simplified whitespace comparison. The specification is // more prescriptive of whitespace behavior in inline and block contexts. // // See: https://medium.com/@patrickbrosset/when-does-white-space-matter-in-html-b90e8a7cdd33 return getTextPiecesSplitOnWhitespace( text ).join( ' ' ); } /** * Returns attribute pairs of the given StartTag token, including only pairs * where the value is non-empty or the attribute is a boolean attribute, an * enumerated attribute, or a custom data- attribute. * * @see MEANINGFUL_ATTRIBUTES * * @param {Object} token StartTag token. * * @return {Array[]} Attribute pairs. */ export function getMeaningfulAttributePairs( token ) { return token.attributes.filter( ( pair ) => { const [ key, value ] = pair; return ( value || key.indexOf( 'data-' ) === 0 || MEANINGFUL_ATTRIBUTES.includes( key ) ); } ); } /** * Returns true if two text tokens (with `chars` property) are equivalent, or * false otherwise. * * @param {Object} actual Actual token. * @param {Object} expected Expected token. * @param {Object} logger Validation logger object. * * @return {boolean} Whether two text tokens are equivalent. */ export function isEquivalentTextTokens( actual, expected, logger = createLogger() ) { // This function is intentionally written as syntactically "ugly" as a hot // path optimization. Text is progressively normalized in order from least- // to-most operationally expensive, until the earliest point at which text // can be confidently inferred as being equal. let actualChars = actual.chars; let expectedChars = expected.chars; for ( let i = 0; i < TEXT_NORMALIZATIONS.length; i++ ) { const normalize = TEXT_NORMALIZATIONS[ i ]; actualChars = normalize( actualChars ); expectedChars = normalize( expectedChars ); if ( actualChars === expectedChars ) { return true; } } logger.warning( 'Expected text `%s`, saw `%s`.', expected.chars, actual.chars ); return false; } /** * Given a CSS length value, returns a normalized CSS length value for strict equality * comparison. * * @param {string} value CSS length value. * * @return {string} Normalized CSS length value. */ export function getNormalizedLength( value ) { if ( 0 === parseFloat( value ) ) { return '0'; } // Normalize strings with floats to always include a leading zero. if ( value.indexOf( '.' ) === 0 ) { return '0' + value; } return value; } /** * Given a style value, returns a normalized style value for strict equality * comparison. * * @param {string} value Style value. * * @return {string} Normalized style value. */ export function getNormalizedStyleValue( value ) { const textPieces = getTextPiecesSplitOnWhitespace( value ); const normalizedPieces = textPieces.map( getNormalizedLength ); const result = normalizedPieces.join( ' ' ); return ( result // Normalize URL type to omit whitespace or quotes. .replace( REGEXP_STYLE_URL_TYPE, 'url($1)' ) ); } /** * Given a style attribute string, returns an object of style properties. * * @param {string} text Style attribute. * * @return {Object} Style properties. */ export function getStyleProperties( text ) { const pairs = text // Trim ending semicolon (avoid including in split) .replace( /;?\s*$/, '' ) // Split on property assignment. .split( ';' ) // For each property assignment... .map( ( style ) => { // ...split further into key-value pairs. const [ key, ...valueParts ] = style.split( ':' ); const value = valueParts.join( ':' ); return [ key.trim(), getNormalizedStyleValue( value.trim() ) ]; } ); return Object.fromEntries( pairs ); } /** * Attribute-specific equality handlers * * @type {Object} */ export const isEqualAttributesOfName = { class: ( actual, expected ) => { // Class matches if members are the same, even if out of order or // superfluous whitespace between. const [ actualPieces, expectedPieces ] = [ actual, expected ].map( getTextPiecesSplitOnWhitespace ); const actualDiff = actualPieces.filter( ( c ) => ! expectedPieces.includes( c ) ); const expectedDiff = expectedPieces.filter( ( c ) => ! actualPieces.includes( c ) ); return actualDiff.length === 0 && expectedDiff.length === 0; }, style: ( actual, expected ) => { return fastDeepEqual( ...[ actual, expected ].map( getStyleProperties ) ); }, // For each boolean attribute, mere presence of attribute in both is enough // to assume equivalence. ...Object.fromEntries( BOOLEAN_ATTRIBUTES.map( ( attribute ) => [ attribute, () => true ] ) ), }; /** * Given two sets of attribute tuples, returns true if the attribute sets are * equivalent. * * @param {Array[]} actual Actual attributes tuples. * @param {Array[]} expected Expected attributes tuples. * @param {Object} logger Validation logger object. * * @return {boolean} Whether attributes are equivalent. */ export function isEqualTagAttributePairs( actual, expected, logger = createLogger() ) { // Attributes is tokenized as tuples. Their lengths should match. This also // avoids us needing to check both attributes sets, since if A has any keys // which do not exist in B, we know the sets to be different. if ( actual.length !== expected.length ) { logger.warning( 'Expected attributes %o, instead saw %o.', expected, actual ); return false; } // Attributes are not guaranteed to occur in the same order. For validating // actual attributes, first convert the set of expected attribute values to // an object, for lookup by key. const expectedAttributes = {}; for ( let i = 0; i < expected.length; i++ ) { expectedAttributes[ expected[ i ][ 0 ].toLowerCase() ] = expected[ i ][ 1 ]; } for ( let i = 0; i < actual.length; i++ ) { const [ name, actualValue ] = actual[ i ]; const nameLower = name.toLowerCase(); // As noted above, if missing member in B, assume different. if ( ! expectedAttributes.hasOwnProperty( nameLower ) ) { logger.warning( 'Encountered unexpected attribute `%s`.', name ); return false; } const expectedValue = expectedAttributes[ nameLower ]; const isEqualAttributes = isEqualAttributesOfName[ nameLower ]; if ( isEqualAttributes ) { // Defer custom attribute equality handling. if ( ! isEqualAttributes( actualValue, expectedValue ) ) { logger.warning( 'Expected attribute `%s` of value `%s`, saw `%s`.', name, expectedValue, actualValue ); return false; } } else if ( actualValue !== expectedValue ) { // Otherwise strict inequality should bail. logger.warning( 'Expected attribute `%s` of value `%s`, saw `%s`.', name, expectedValue, actualValue ); return false; } } return true; } /** * Token-type-specific equality handlers * * @type {Object} */ export const isEqualTokensOfType = { StartTag: ( actual, expected, logger = createLogger() ) => { if ( actual.tagName !== expected.tagName && // Optimization: Use short-circuit evaluation to defer case- // insensitive check on the assumption that the majority case will // have exactly equal tag names. actual.tagName.toLowerCase() !== expected.tagName.toLowerCase() ) { logger.warning( 'Expected tag name `%s`, instead saw `%s`.', expected.tagName, actual.tagName ); return false; } return isEqualTagAttributePairs( ...[ actual, expected ].map( getMeaningfulAttributePairs ), logger ); }, Chars: isEquivalentTextTokens, Comment: isEquivalentTextTokens, }; /** * Given an array of tokens, returns the first token which is not purely * whitespace. * * Mutates the tokens array. * * @param {Object[]} tokens Set of tokens to search. * * @return {Object | undefined} Next non-whitespace token. */ export function getNextNonWhitespaceToken( tokens ) { let token; while ( ( token = tokens.shift() ) ) { if ( token.type !== 'Chars' ) { return token; } if ( ! REGEXP_ONLY_WHITESPACE.test( token.chars ) ) { return token; } } } /** * Tokenize an HTML string, gracefully handling any errors thrown during * underlying tokenization. * * @param {string} html HTML string to tokenize. * @param {Object} logger Validation logger object. * * @return {Object[]|null} Array of valid tokenized HTML elements, or null on error */ function getHTMLTokens( html, logger = createLogger() ) { try { return new Tokenizer( new DecodeEntityParser() ).tokenize( html ); } catch ( e ) { logger.warning( 'Malformed HTML detected: %s', html ); } return null; } /** * Returns true if the next HTML token closes the current token. * * @param {Object} currentToken Current token to compare with. * @param {Object|undefined} nextToken Next token to compare against. * * @return {boolean} true if `nextToken` closes `currentToken`, false otherwise */ export function isClosedByToken( currentToken, nextToken ) { // Ensure this is a self closed token. if ( ! currentToken.selfClosing ) { return false; } // Check token names and determine if nextToken is the closing tag for currentToken. if ( nextToken && nextToken.tagName === currentToken.tagName && nextToken.type === 'EndTag' ) { return true; } return false; } /** * Returns true if the given HTML strings are effectively equivalent, or * false otherwise. Invalid HTML is not considered equivalent, even if the * strings directly match. * * @param {string} actual Actual HTML string. * @param {string} expected Expected HTML string. * @param {Object} logger Validation logger object. * * @return {boolean} Whether HTML strings are equivalent. */ export function isEquivalentHTML( actual, expected, logger = createLogger() ) { // Short-circuit if markup is identical. if ( actual === expected ) { return true; } // Tokenize input content and reserialized save content. const [ actualTokens, expectedTokens ] = [ actual, expected ].map( ( html ) => getHTMLTokens( html, logger ) ); // If either is malformed then stop comparing - the strings are not equivalent. if ( ! actualTokens || ! expectedTokens ) { return false; } let actualToken, expectedToken; while ( ( actualToken = getNextNonWhitespaceToken( actualTokens ) ) ) { expectedToken = getNextNonWhitespaceToken( expectedTokens ); // Inequal if exhausted all expected tokens. if ( ! expectedToken ) { logger.warning( 'Expected end of content, instead saw %o.', actualToken ); return false; } // Inequal if next non-whitespace token of each set are not same type. if ( actualToken.type !== expectedToken.type ) { logger.warning( 'Expected token of type `%s` (%o), instead saw `%s` (%o).', expectedToken.type, expectedToken, actualToken.type, actualToken ); return false; } // Defer custom token type equality handling, otherwise continue and // assume as equal. const isEqualTokens = isEqualTokensOfType[ actualToken.type ]; if ( isEqualTokens && ! isEqualTokens( actualToken, expectedToken, logger ) ) { return false; } // Peek at the next tokens (actual and expected) to see if they close // a self-closing tag. if ( isClosedByToken( actualToken, expectedTokens[ 0 ] ) ) { // Consume the next expected token that closes the current actual // self-closing token. getNextNonWhitespaceToken( expectedTokens ); } else if ( isClosedByToken( expectedToken, actualTokens[ 0 ] ) ) { // Consume the next actual token that closes the current expected // self-closing token. getNextNonWhitespaceToken( actualTokens ); } } if ( ( expectedToken = getNextNonWhitespaceToken( expectedTokens ) ) ) { // If any non-whitespace tokens remain in expected token set, this // indicates inequality. logger.warning( 'Expected %o, instead saw end of content.', expectedToken ); return false; } return true; } /** * Returns an object with `isValid` property set to `true` if the parsed block * is valid given the input content. A block is considered valid if, when serialized * with assumed attributes, the content matches the original value. If block is * invalid, this function returns all validations issues as well. * * @param {string|Object} blockTypeOrName Block type. * @param {Object} attributes Parsed block attributes. * @param {string} originalBlockContent Original block content. * @param {Object} logger Validation logger object. * * @return {Object} Whether block is valid and contains validation messages. */ /** * Returns an object with `isValid` property set to `true` if the parsed block * is valid given the input content. A block is considered valid if, when serialized * with assumed attributes, the content matches the original value. If block is * invalid, this function returns all validations issues as well. * * @param {WPBlock} block block object. * @param {WPBlockType|string} [blockTypeOrName = block.name] Block type or name, inferred from block if not given. * * @return {[boolean,Array<LoggerItem>]} validation results. */ export function validateBlock( block, blockTypeOrName = block.name ) { const isFallbackBlock = block.name === getFreeformContentHandlerName() || block.name === getUnregisteredTypeHandlerName(); // Shortcut to avoid costly validation. if ( isFallbackBlock ) { return [ true, [] ]; } const logger = createQueuedLogger(); const blockType = normalizeBlockType( blockTypeOrName ); let generatedBlockContent; try { generatedBlockContent = getSaveContent( blockType, block.attributes ); } catch ( error ) { logger.error( 'Block validation failed because an error occurred while generating block content:\n\n%s', error.toString() ); return [ false, logger.getItems() ]; } const isValid = isEquivalentHTML( block.originalContent, generatedBlockContent, logger ); if ( ! isValid ) { logger.error( 'Block validation failed for `%s` (%o).\n\nContent generated by `save` function:\n\n%s\n\nContent retrieved from post body:\n\n%s', blockType.name, blockType, generatedBlockContent, block.originalContent ); } return [ isValid, logger.getItems() ]; } /** * Returns true if the parsed block is valid given the input content. A block * is considered valid if, when serialized with assumed attributes, the content * matches the original value. * * Logs to console in development environments when invalid. * * @deprecated Use validateBlock instead to avoid data loss. * * @param {string|Object} blockTypeOrName Block type. * @param {Object} attributes Parsed block attributes. * @param {string} originalBlockContent Original block content. * * @return {boolean} Whether block is valid. */ export function isValidBlockContent( blockTypeOrName, attributes, originalBlockContent ) { deprecated( 'isValidBlockContent introduces opportunity for data loss', { since: '12.6', plugin: 'Gutenberg', alternative: 'validateBlock', } ); const blockType = normalizeBlockType( blockTypeOrName ); const block = { name: blockType.name, attributes, innerBlocks: [], originalContent: originalBlockContent, }; const [ isValid ] = validateBlock( block, blockType ); return isValid; }