UNPKG

@wordpress/blocks

Version:
622 lines (579 loc) 22.6 kB
/** * External dependencies */ import { Tokenizer } from 'simple-html-tokenizer'; import fastDeepEqual from 'fast-deep-equal/es6'; /** * WordPress dependencies */ import deprecated from '@wordpress/deprecated'; import { decodeEntities } from '@wordpress/html-entities'; /** * Internal dependencies */ import { createLogger, createQueuedLogger } from './logger'; import { getSaveContent } from '../serializer'; import { getFreeformContentHandlerName, getUnregisteredTypeHandlerName } from '../registration'; import { normalizeBlockType } from '../utils'; /** @typedef {import('../parser').WPBlock} WPBlock */ /** @typedef {import('../registration').WPBlockType} WPBlockType */ /** @typedef {import('./logger').LoggerItem} LoggerItem */ const identity = x => x; /** * Globally matches any consecutive whitespace * * @type {RegExp} */ const REGEXP_WHITESPACE = /[\t\n\r\v\f ]+/g; /** * Matches a string containing only whitespace * * @type {RegExp} */ const REGEXP_ONLY_WHITESPACE = /^[\t\n\r\v\f ]*$/; /** * Matches a CSS URL type value * * @type {RegExp} */ const REGEXP_STYLE_URL_TYPE = /^url\s*\(['"\s]*(.*?)['"\s]*\)$/; /** * Boolean attributes are attributes whose presence as being assigned is * meaningful, even if only empty. * * See: https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#boolean-attributes * Extracted from: https://html.spec.whatwg.org/multipage/indices.html#attributes-3 * * Object.keys( Array.from( document.querySelectorAll( '#attributes-1 > tbody > tr' ) ) * .filter( ( tr ) => tr.lastChild.textContent.indexOf( 'Boolean attribute' ) !== -1 ) * .reduce( ( result, tr ) => Object.assign( result, { * [ tr.firstChild.textContent.trim() ]: true * } ), {} ) ).sort(); * * @type {Array} */ const BOOLEAN_ATTRIBUTES = ['allowfullscreen', 'allowpaymentrequest', 'allowusermedia', 'async', 'autofocus', 'autoplay', 'checked', 'controls', 'default', 'defer', 'disabled', 'download', 'formnovalidate', 'hidden', 'ismap', 'itemscope', 'loop', 'multiple', 'muted', 'nomodule', 'novalidate', 'open', 'playsinline', 'readonly', 'required', 'reversed', 'selected', 'typemustmatch']; /** * Enumerated attributes are attributes which must be of a specific value form. * Like boolean attributes, these are meaningful if specified, even if not of a * valid enumerated value. * * See: https://html.spec.whatwg.org/multipage/common-microsyntaxes.html#enumerated-attribute * Extracted from: https://html.spec.whatwg.org/multipage/indices.html#attributes-3 * * Object.keys( Array.from( document.querySelectorAll( '#attributes-1 > tbody > tr' ) ) * .filter( ( tr ) => /^("(.+?)";?\s*)+/.test( tr.lastChild.textContent.trim() ) ) * .reduce( ( result, tr ) => Object.assign( result, { * [ tr.firstChild.textContent.trim() ]: true * } ), {} ) ).sort(); * * @type {Array} */ const ENUMERATED_ATTRIBUTES = ['autocapitalize', 'autocomplete', 'charset', 'contenteditable', 'crossorigin', 'decoding', 'dir', 'draggable', 'enctype', 'formenctype', 'formmethod', 'http-equiv', 'inputmode', 'kind', 'method', 'preload', 'scope', 'shape', 'spellcheck', 'translate', 'type', 'wrap']; /** * Meaningful attributes are those who cannot be safely ignored when omitted in * one HTML markup string and not another. * * @type {Array} */ const MEANINGFUL_ATTRIBUTES = [...BOOLEAN_ATTRIBUTES, ...ENUMERATED_ATTRIBUTES]; /** * Array of functions which receive a text string on which to apply normalizing * behavior for consideration in text token equivalence, carefully ordered from * least-to-most expensive operations. * * @type {Array} */ const TEXT_NORMALIZATIONS = [identity, getTextWithCollapsedWhitespace]; /** * Regular expression matching a named character reference. In lieu of bundling * a full set of references, the pattern covers the minimal necessary to test * positively against the full set. * * "The ampersand must be followed by one of the names given in the named * character references section, using the same case." * * Tested against "12.5 Named character references": * * ``` * const references = Array.from( document.querySelectorAll( * '#named-character-references-table tr[id^=entity-] td:first-child' * ) ).map( ( code ) => code.textContent ) * references.every( ( reference ) => /^[\da-z]+$/i.test( reference ) ) * ``` * * @see https://html.spec.whatwg.org/multipage/syntax.html#character-references * @see https://html.spec.whatwg.org/multipage/named-characters.html#named-character-references * * @type {RegExp} */ const REGEXP_NAMED_CHARACTER_REFERENCE = /^[\da-z]+$/i; /** * Regular expression matching a decimal character reference. * * "The ampersand must be followed by a U+0023 NUMBER SIGN character (#), * followed by one or more ASCII digits, representing a base-ten integer" * * @see https://html.spec.whatwg.org/multipage/syntax.html#character-references * * @type {RegExp} */ const REGEXP_DECIMAL_CHARACTER_REFERENCE = /^#\d+$/; /** * Regular expression matching a hexadecimal character reference. * * "The ampersand must be followed by a U+0023 NUMBER SIGN character (#), which * must be followed by either a U+0078 LATIN SMALL LETTER X character (x) or a * U+0058 LATIN CAPITAL LETTER X character (X), which must then be followed by * one or more ASCII hex digits, representing a hexadecimal integer" * * @see https://html.spec.whatwg.org/multipage/syntax.html#character-references * * @type {RegExp} */ const REGEXP_HEXADECIMAL_CHARACTER_REFERENCE = /^#x[\da-f]+$/i; /** * Returns true if the given string is a valid character reference segment, or * false otherwise. The text should be stripped of `&` and `;` demarcations. * * @param {string} text Text to test. * * @return {boolean} Whether text is valid character reference. */ export function isValidCharacterReference(text) { return REGEXP_NAMED_CHARACTER_REFERENCE.test(text) || REGEXP_DECIMAL_CHARACTER_REFERENCE.test(text) || REGEXP_HEXADECIMAL_CHARACTER_REFERENCE.test(text); } /** * Substitute EntityParser class for `simple-html-tokenizer` which uses the * implementation of `decodeEntities` from `html-entities`, in order to avoid * bundling a massive named character reference. * * @see https://github.com/tildeio/simple-html-tokenizer/tree/HEAD/src/entity-parser.ts */ export class DecodeEntityParser { /** * Returns a substitute string for an entity string sequence between `&` * and `;`, or undefined if no substitution should occur. * * @param {string} entity Entity fragment discovered in HTML. * * @return {string | undefined} Entity substitute value. */ parse(entity) { if (isValidCharacterReference(entity)) { return decodeEntities('&' + entity + ';'); } } } /** * Given a specified string, returns an array of strings split by consecutive * whitespace, ignoring leading or trailing whitespace. * * @param {string} text Original text. * * @return {string[]} Text pieces split on whitespace. */ export function getTextPiecesSplitOnWhitespace(text) { return text.trim().split(REGEXP_WHITESPACE); } /** * Given a specified string, returns a new trimmed string where all consecutive * whitespace is collapsed to a single space. * * @param {string} text Original text. * * @return {string} Trimmed text with consecutive whitespace collapsed. */ export function getTextWithCollapsedWhitespace(text) { // This is an overly simplified whitespace comparison. The specification is // more prescriptive of whitespace behavior in inline and block contexts. // // See: https://medium.com/@patrickbrosset/when-does-white-space-matter-in-html-b90e8a7cdd33 return getTextPiecesSplitOnWhitespace(text).join(' '); } /** * Returns attribute pairs of the given StartTag token, including only pairs * where the value is non-empty or the attribute is a boolean attribute, an * enumerated attribute, or a custom data- attribute. * * @see MEANINGFUL_ATTRIBUTES * * @param {Object} token StartTag token. * * @return {Array[]} Attribute pairs. */ export function getMeaningfulAttributePairs(token) { return token.attributes.filter(pair => { const [key, value] = pair; return value || key.indexOf('data-') === 0 || MEANINGFUL_ATTRIBUTES.includes(key); }); } /** * Returns true if two text tokens (with `chars` property) are equivalent, or * false otherwise. * * @param {Object} actual Actual token. * @param {Object} expected Expected token. * @param {Object} logger Validation logger object. * * @return {boolean} Whether two text tokens are equivalent. */ export function isEquivalentTextTokens(actual, expected, logger = createLogger()) { // This function is intentionally written as syntactically "ugly" as a hot // path optimization. Text is progressively normalized in order from least- // to-most operationally expensive, until the earliest point at which text // can be confidently inferred as being equal. let actualChars = actual.chars; let expectedChars = expected.chars; for (let i = 0; i < TEXT_NORMALIZATIONS.length; i++) { const normalize = TEXT_NORMALIZATIONS[i]; actualChars = normalize(actualChars); expectedChars = normalize(expectedChars); if (actualChars === expectedChars) { return true; } } logger.warning('Expected text `%s`, saw `%s`.', expected.chars, actual.chars); return false; } /** * Given a CSS length value, returns a normalized CSS length value for strict equality * comparison. * * @param {string} value CSS length value. * * @return {string} Normalized CSS length value. */ export function getNormalizedLength(value) { if (0 === parseFloat(value)) { return '0'; } // Normalize strings with floats to always include a leading zero. if (value.indexOf('.') === 0) { return '0' + value; } return value; } /** * Given a style value, returns a normalized style value for strict equality * comparison. * * @param {string} value Style value. * * @return {string} Normalized style value. */ export function getNormalizedStyleValue(value) { const textPieces = getTextPiecesSplitOnWhitespace(value); const normalizedPieces = textPieces.map(getNormalizedLength); const result = normalizedPieces.join(' '); return result // Normalize URL type to omit whitespace or quotes. .replace(REGEXP_STYLE_URL_TYPE, 'url($1)'); } /** * Given a style attribute string, returns an object of style properties. * * @param {string} text Style attribute. * * @return {Object} Style properties. */ export function getStyleProperties(text) { const pairs = text // Trim ending semicolon (avoid including in split) .replace(/;?\s*$/, '') // Split on property assignment. .split(';') // For each property assignment... .map(style => { // ...split further into key-value pairs. const [key, ...valueParts] = style.split(':'); const value = valueParts.join(':'); return [key.trim(), getNormalizedStyleValue(value.trim())]; }); return Object.fromEntries(pairs); } /** * Attribute-specific equality handlers * * @type {Object} */ export const isEqualAttributesOfName = { class: (actual, expected) => { // Class matches if members are the same, even if out of order or // superfluous whitespace between. const [actualPieces, expectedPieces] = [actual, expected].map(getTextPiecesSplitOnWhitespace); const actualDiff = actualPieces.filter(c => !expectedPieces.includes(c)); const expectedDiff = expectedPieces.filter(c => !actualPieces.includes(c)); return actualDiff.length === 0 && expectedDiff.length === 0; }, style: (actual, expected) => { return fastDeepEqual(...[actual, expected].map(getStyleProperties)); }, // For each boolean attribute, mere presence of attribute in both is enough // to assume equivalence. ...Object.fromEntries(BOOLEAN_ATTRIBUTES.map(attribute => [attribute, () => true])) }; /** * Given two sets of attribute tuples, returns true if the attribute sets are * equivalent. * * @param {Array[]} actual Actual attributes tuples. * @param {Array[]} expected Expected attributes tuples. * @param {Object} logger Validation logger object. * * @return {boolean} Whether attributes are equivalent. */ export function isEqualTagAttributePairs(actual, expected, logger = createLogger()) { // Attributes is tokenized as tuples. Their lengths should match. This also // avoids us needing to check both attributes sets, since if A has any keys // which do not exist in B, we know the sets to be different. if (actual.length !== expected.length) { logger.warning('Expected attributes %o, instead saw %o.', expected, actual); return false; } // Attributes are not guaranteed to occur in the same order. For validating // actual attributes, first convert the set of expected attribute values to // an object, for lookup by key. const expectedAttributes = {}; for (let i = 0; i < expected.length; i++) { expectedAttributes[expected[i][0].toLowerCase()] = expected[i][1]; } for (let i = 0; i < actual.length; i++) { const [name, actualValue] = actual[i]; const nameLower = name.toLowerCase(); // As noted above, if missing member in B, assume different. if (!expectedAttributes.hasOwnProperty(nameLower)) { logger.warning('Encountered unexpected attribute `%s`.', name); return false; } const expectedValue = expectedAttributes[nameLower]; const isEqualAttributes = isEqualAttributesOfName[nameLower]; if (isEqualAttributes) { // Defer custom attribute equality handling. if (!isEqualAttributes(actualValue, expectedValue)) { logger.warning('Expected attribute `%s` of value `%s`, saw `%s`.', name, expectedValue, actualValue); return false; } } else if (actualValue !== expectedValue) { // Otherwise strict inequality should bail. logger.warning('Expected attribute `%s` of value `%s`, saw `%s`.', name, expectedValue, actualValue); return false; } } return true; } /** * Token-type-specific equality handlers * * @type {Object} */ export const isEqualTokensOfType = { StartTag: (actual, expected, logger = createLogger()) => { if (actual.tagName !== expected.tagName && // Optimization: Use short-circuit evaluation to defer case- // insensitive check on the assumption that the majority case will // have exactly equal tag names. actual.tagName.toLowerCase() !== expected.tagName.toLowerCase()) { logger.warning('Expected tag name `%s`, instead saw `%s`.', expected.tagName, actual.tagName); return false; } return isEqualTagAttributePairs(...[actual, expected].map(getMeaningfulAttributePairs), logger); }, Chars: isEquivalentTextTokens, Comment: isEquivalentTextTokens }; /** * Given an array of tokens, returns the first token which is not purely * whitespace. * * Mutates the tokens array. * * @param {Object[]} tokens Set of tokens to search. * * @return {Object | undefined} Next non-whitespace token. */ export function getNextNonWhitespaceToken(tokens) { let token; while (token = tokens.shift()) { if (token.type !== 'Chars') { return token; } if (!REGEXP_ONLY_WHITESPACE.test(token.chars)) { return token; } } } /** * Tokenize an HTML string, gracefully handling any errors thrown during * underlying tokenization. * * @param {string} html HTML string to tokenize. * @param {Object} logger Validation logger object. * * @return {Object[]|null} Array of valid tokenized HTML elements, or null on error */ function getHTMLTokens(html, logger = createLogger()) { try { return new Tokenizer(new DecodeEntityParser()).tokenize(html); } catch (e) { logger.warning('Malformed HTML detected: %s', html); } return null; } /** * Returns true if the next HTML token closes the current token. * * @param {Object} currentToken Current token to compare with. * @param {Object|undefined} nextToken Next token to compare against. * * @return {boolean} true if `nextToken` closes `currentToken`, false otherwise */ export function isClosedByToken(currentToken, nextToken) { // Ensure this is a self closed token. if (!currentToken.selfClosing) { return false; } // Check token names and determine if nextToken is the closing tag for currentToken. if (nextToken && nextToken.tagName === currentToken.tagName && nextToken.type === 'EndTag') { return true; } return false; } /** * Returns true if the given HTML strings are effectively equivalent, or * false otherwise. Invalid HTML is not considered equivalent, even if the * strings directly match. * * @param {string} actual Actual HTML string. * @param {string} expected Expected HTML string. * @param {Object} logger Validation logger object. * * @return {boolean} Whether HTML strings are equivalent. */ export function isEquivalentHTML(actual, expected, logger = createLogger()) { // Short-circuit if markup is identical. if (actual === expected) { return true; } // Tokenize input content and reserialized save content. const [actualTokens, expectedTokens] = [actual, expected].map(html => getHTMLTokens(html, logger)); // If either is malformed then stop comparing - the strings are not equivalent. if (!actualTokens || !expectedTokens) { return false; } let actualToken, expectedToken; while (actualToken = getNextNonWhitespaceToken(actualTokens)) { expectedToken = getNextNonWhitespaceToken(expectedTokens); // Inequal if exhausted all expected tokens. if (!expectedToken) { logger.warning('Expected end of content, instead saw %o.', actualToken); return false; } // Inequal if next non-whitespace token of each set are not same type. if (actualToken.type !== expectedToken.type) { logger.warning('Expected token of type `%s` (%o), instead saw `%s` (%o).', expectedToken.type, expectedToken, actualToken.type, actualToken); return false; } // Defer custom token type equality handling, otherwise continue and // assume as equal. const isEqualTokens = isEqualTokensOfType[actualToken.type]; if (isEqualTokens && !isEqualTokens(actualToken, expectedToken, logger)) { return false; } // Peek at the next tokens (actual and expected) to see if they close // a self-closing tag. if (isClosedByToken(actualToken, expectedTokens[0])) { // Consume the next expected token that closes the current actual // self-closing token. getNextNonWhitespaceToken(expectedTokens); } else if (isClosedByToken(expectedToken, actualTokens[0])) { // Consume the next actual token that closes the current expected // self-closing token. getNextNonWhitespaceToken(actualTokens); } } if (expectedToken = getNextNonWhitespaceToken(expectedTokens)) { // If any non-whitespace tokens remain in expected token set, this // indicates inequality. logger.warning('Expected %o, instead saw end of content.', expectedToken); return false; } return true; } /** * Returns an object with `isValid` property set to `true` if the parsed block * is valid given the input content. A block is considered valid if, when serialized * with assumed attributes, the content matches the original value. If block is * invalid, this function returns all validations issues as well. * * @param {string|Object} blockTypeOrName Block type. * @param {Object} attributes Parsed block attributes. * @param {string} originalBlockContent Original block content. * @param {Object} logger Validation logger object. * * @return {Object} Whether block is valid and contains validation messages. */ /** * Returns an object with `isValid` property set to `true` if the parsed block * is valid given the input content. A block is considered valid if, when serialized * with assumed attributes, the content matches the original value. If block is * invalid, this function returns all validations issues as well. * * @param {WPBlock} block block object. * @param {WPBlockType|string} [blockTypeOrName = block.name] Block type or name, inferred from block if not given. * * @return {[boolean,Array<LoggerItem>]} validation results. */ export function validateBlock(block, blockTypeOrName = block.name) { const isFallbackBlock = block.name === getFreeformContentHandlerName() || block.name === getUnregisteredTypeHandlerName(); // Shortcut to avoid costly validation. if (isFallbackBlock) { return [true, []]; } const logger = createQueuedLogger(); const blockType = normalizeBlockType(blockTypeOrName); let generatedBlockContent; try { generatedBlockContent = getSaveContent(blockType, block.attributes); } catch (error) { logger.error('Block validation failed because an error occurred while generating block content:\n\n%s', error.toString()); return [false, logger.getItems()]; } const isValid = isEquivalentHTML(block.originalContent, generatedBlockContent, logger); if (!isValid) { logger.error('Block validation failed for `%s` (%o).\n\nContent generated by `save` function:\n\n%s\n\nContent retrieved from post body:\n\n%s', blockType.name, blockType, generatedBlockContent, block.originalContent); } return [isValid, logger.getItems()]; } /** * Returns true if the parsed block is valid given the input content. A block * is considered valid if, when serialized with assumed attributes, the content * matches the original value. * * Logs to console in development environments when invalid. * * @deprecated Use validateBlock instead to avoid data loss. * * @param {string|Object} blockTypeOrName Block type. * @param {Object} attributes Parsed block attributes. * @param {string} originalBlockContent Original block content. * * @return {boolean} Whether block is valid. */ export function isValidBlockContent(blockTypeOrName, attributes, originalBlockContent) { deprecated('isValidBlockContent introduces opportunity for data loss', { since: '12.6', plugin: 'Gutenberg', alternative: 'validateBlock' }); const blockType = normalizeBlockType(blockTypeOrName); const block = { name: blockType.name, attributes, innerBlocks: [], originalContent: originalBlockContent }; const [isValid] = validateBlock(block, blockType); return isValid; } //# sourceMappingURL=index.js.map