UNPKG

hast-util-truncate

Version:

hast utility to truncate the tree to a certain number of characters

168 lines (144 loc) 4.97 kB
/** * @typedef {import('hast').Nodes} Nodes * @typedef {import('hast').RootContent} RootContent * @typedef {import('hast').Text} Text */ /** * @typedef Options * Configuration. * @property {string | null | undefined} [ellipsis] * Value to use at truncation point (optional). * @property {Array<RootContent> | null | undefined} [ignore=[]] * Nodes to exclude from the resulting tree; these are not counted towards * `size` (optional). * @property {number | null | undefined} [maxCharacterStrip=30] * How far to walk back (default: `30`). * * The algorithm attempts to break right after a word rather than the exact * `size`. * Take for example the `|`, which is the actual break defined by `size`, and * the `…` is the location where the ellipsis is placed: `This… an|d that`. * Breaking at `|` would at best look bad but could likely result in things * such as `ass…` for `assignment` — which is not ideal. * `maxCharacterStrip` defines how far back the algorithm will walk to find * a pretty word break. * This prevents a potential slow operation on larger `size`s without any * whitespace. * If `maxCharacterStrip` characters are walked back and no nice break point * is found, the bad break point is used. * Set `maxCharacterStrip: 0` to not find a nice break. * @property {number | null | undefined} [size=140] * Number of characters to truncate to (default: `140`). */ import structuredClone from '@ungap/structured-clone' import {unicodePunctuation, unicodeWhitespace} from 'micromark-util-character' /** @type {ReadonlyArray<RootContent>} */ const emptyIgnore = [] /** @type {Readonly<Options>} */ const emptyOptions = {} /** * Truncate the tree to a certain number of characters. * * @template {Nodes} Tree * Type of tree. * @param {Tree} tree * Tree to truncate. * @param {Options | null | undefined} [options] * Configuration (optional). * @returns {Tree} * A shallow copy of `tree`, truncated. */ export function truncate(tree, options) { // To do: support units. // Not sure what the above comment means, like words I guess? const config = options || emptyOptions const size = typeof config.size === 'number' ? config.size : 140 const maxCharacterStrip = typeof config.maxCharacterStrip === 'number' ? config.maxCharacterStrip : 30 const ignore = config.ignore || emptyIgnore const ellipsis = config.ellipsis let searchSize = 0 /** @type {Text | undefined} */ let overflowingText // `preorder` for the top node always returns itself. const result = /** @type {Tree} */ (preorder(tree)) if (overflowingText) { const uglyBreakpoint = size - searchSize let breakpoint = uglyBreakpoint // If the number at the break is not an alphanumerical… if (unicodeAlphanumeric(overflowingText.value.charCodeAt(breakpoint))) { let remove = -1 // Move back while the character before breakpoint is an alphanumerical. while ( breakpoint && ++remove < maxCharacterStrip && unicodeAlphanumeric(overflowingText.value.charCodeAt(breakpoint - 1)) ) { breakpoint-- } // Move back while the character before breakpoint is *not* an alphanumerical. while ( breakpoint && ++remove < maxCharacterStrip && !unicodeAlphanumeric(overflowingText.value.charCodeAt(breakpoint - 1)) ) { breakpoint-- } } overflowingText.value = overflowingText.value.slice( 0, breakpoint || uglyBreakpoint ) if (ellipsis) { overflowingText.value += ellipsis } } return structuredClone(result) /** * Transform in `preorder`. * * @param {Nodes} node * Node to truncate. * @returns {Nodes} * Shallow copy of `node`. */ function preorder(node) { if (node.type === 'text') { if (searchSize + node.value.length > size) { overflowingText = {...node} return overflowingText } searchSize += node.value.length } /** @type {Nodes} */ const replacement = {...node} if ('children' in node) { /** @type {Array<RootContent>} */ const children = [] let index = -1 while (++index < node.children.length) { const child = node.children[index] if (!ignore.includes(child)) { const result = /** @type {RootContent} */ (preorder(child)) if (result) children.push(result) } // One of the descendant texts included the breakpoint. if (overflowingText) { break } } // @ts-expect-error: content model matches. replacement.children = children } return replacement } } /** * @param {number} code * Character code. * @returns {boolean} * Whether `code` is not punctuation and not whitespace. */ function unicodeAlphanumeric(code) { return !unicodeWhitespace(code) && !unicodePunctuation(code) }