UNPKG

hast-util-minify-whitespace

Version:

hast utility to get the plain-text value of a node

378 lines (332 loc) 8.2 kB
/** * @import {Nodes, Parents, Text} from 'hast' */ /** * @callback Collapse * Collapse a string. * @param {string} value * Value to collapse. * @returns {string} * Collapsed value. * * @typedef Options * Configuration. * @property {boolean | null | undefined} [newlines=false] * Collapse whitespace containing newlines to `'\n'` instead of `' '` * (default: `false`); the default is to collapse to a single space. * * @typedef Result * Result. * @property {boolean} remove * Whether to remove. * @property {boolean} ignore * Whether to ignore. * @property {boolean} stripAtStart * Whether to strip at the start. * * @typedef State * Info passed around. * @property {Collapse} collapse * Collapse. * @property {Whitespace} whitespace * Current whitespace. * @property {boolean | undefined} [before] * Whether there is a break before (default: `false`). * @property {boolean | undefined} [after] * Whether there is a break after (default: `false`). * * @typedef {'normal' | 'nowrap' | 'pre' | 'pre-wrap'} Whitespace * Whitespace setting. */ import {embedded} from 'hast-util-embedded' import {isElement} from 'hast-util-is-element' import {whitespace} from 'hast-util-whitespace' import {convert} from 'unist-util-is' import {blocks} from './block.js' import {content as contents} from './content.js' import {skippable as skippables} from './skippable.js' /** @type {Options} */ const emptyOptions = {} const ignorableNode = convert(['comment', 'doctype']) /** * Minify whitespace. * * @param {Nodes} tree * Tree. * @param {Options | null | undefined} [options] * Configuration (optional). * @returns {undefined} * Nothing. */ export function minifyWhitespace(tree, options) { const settings = options || emptyOptions minify(tree, { collapse: collapseFactory( settings.newlines ? replaceNewlines : replaceWhitespace ), whitespace: 'normal' }) } /** * @param {Nodes} node * Node. * @param {State} state * Info passed around. * @returns {Result} * Result. */ function minify(node, state) { if ('children' in node) { const settings = {...state} if (node.type === 'root' || blocklike(node)) { settings.before = true settings.after = true } settings.whitespace = inferWhiteSpace(node, state) return all(node, settings) } if (node.type === 'text') { if (state.whitespace === 'normal') { return minifyText(node, state) } // Naïve collapse, but no trimming: if (state.whitespace === 'nowrap') { node.value = state.collapse(node.value) } // The `pre-wrap` or `pre` whitespace settings are neither collapsed nor // trimmed. } return {ignore: ignorableNode(node), stripAtStart: false, remove: false} } /** * @param {Text} node * Node. * @param {State} state * Info passed around. * @returns {Result} * Result. */ function minifyText(node, state) { const value = state.collapse(node.value) const result = {ignore: false, stripAtStart: false, remove: false} let start = 0 let end = value.length if (state.before && removable(value.charAt(0))) { start++ } if (start !== end && removable(value.charAt(end - 1))) { if (state.after) { end-- } else { result.stripAtStart = true } } if (start === end) { result.remove = true } else { node.value = value.slice(start, end) } return result } /** * @param {Parents} parent * Node. * @param {State} state * Info passed around. * @returns {Result} * Result. */ function all(parent, state) { let before = state.before const after = state.after const children = parent.children let length = children.length let index = -1 while (++index < length) { const result = minify(children[index], { ...state, after: collapsableAfter(children, index, after), before }) if (result.remove) { children.splice(index, 1) index-- length-- } else if (!result.ignore) { before = result.stripAtStart } // If this element, such as a `<select>` or `<img>`, contributes content // somehow, allow whitespace again. if (content(children[index])) { before = false } } return {ignore: false, stripAtStart: Boolean(before || after), remove: false} } /** * @param {Array<Nodes>} nodes * Nodes. * @param {number} index * Index. * @param {boolean | undefined} [after] * Whether there is a break after `nodes` (default: `false`). * @returns {boolean | undefined} * Whether there is a break after the node at `index`. */ function collapsableAfter(nodes, index, after) { while (++index < nodes.length) { const node = nodes[index] let result = inferBoundary(node) if (result === undefined && 'children' in node && !skippable(node)) { result = collapsableAfter(node.children, -1) } if (typeof result === 'boolean') { return result } } return after } /** * Infer two types of boundaries: * * 1. `true` — boundary for which whitespace around it does not contribute * anything * 2. `false` — boundary for which whitespace around it *does* contribute * * No result (`undefined`) is returned if it is unknown. * * @param {Nodes} node * Node. * @returns {boolean | undefined} * Boundary. */ function inferBoundary(node) { if (node.type === 'element') { if (content(node)) { return false } if (blocklike(node)) { return true } // Unknown: either depends on siblings if embedded or metadata, or on // children. } else if (node.type === 'text') { if (!whitespace(node)) { return false } } else if (!ignorableNode(node)) { return false } } /** * Infer whether a node is skippable. * * @param {Nodes} node * Node. * @returns {boolean} * Whether `node` is skippable. */ function content(node) { return embedded(node) || isElement(node, contents) } /** * See: <https://html.spec.whatwg.org/#the-css-user-agent-style-sheet-and-presentational-hints> * * @param {Nodes} node * Node. * @returns {boolean} * Whether `node` is block-like. */ function blocklike(node) { return isElement(node, blocks) } /** * @param {Parents} node * Node. * @returns {boolean} * Whether `node` is skippable. */ function skippable(node) { return ( Boolean(node.type === 'element' && node.properties.hidden) || ignorableNode(node) || isElement(node, skippables) ) } /** * @param {string} character * Character. * @returns {boolean} * Whether `character` is removable. */ function removable(character) { return character === ' ' || character === '\n' } /** * @type {Collapse} */ function replaceNewlines(value) { const match = /\r?\n|\r/.exec(value) return match ? match[0] : ' ' } /** * @type {Collapse} */ function replaceWhitespace() { return ' ' } /** * @param {Collapse} replace * @returns {Collapse} * Collapse. */ function collapseFactory(replace) { return collapse /** * @type {Collapse} */ function collapse(value) { return String(value).replace(/[\t\n\v\f\r ]+/g, replace) } } /** * We don’t need to support void elements here (so `nobr wbr` -> `normal` is * ignored). * * @param {Parents} node * Node. * @param {State} state * Info passed around. * @returns {Whitespace} * Whitespace. */ function inferWhiteSpace(node, state) { if ('tagName' in node && node.properties) { switch (node.tagName) { // Whitespace in script/style, while not displayed by CSS as significant, // could have some meaning in JS/CSS, so we can’t touch them. case 'listing': case 'plaintext': case 'script': case 'style': case 'xmp': { return 'pre' } case 'nobr': { return 'nowrap' } case 'pre': { return node.properties.wrap ? 'pre-wrap' : 'pre' } case 'td': case 'th': { return node.properties.noWrap ? 'nowrap' : state.whitespace } case 'textarea': { return 'pre-wrap' } default: } } return state.whitespace }