hast-util-truncate
Version:
hast utility to truncate the tree to a certain number of characters
168 lines (144 loc) • 4.97 kB
JavaScript
/**
* @typedef {import('hast').Nodes} Nodes
* @typedef {import('hast').RootContent} RootContent
* @typedef {import('hast').Text} Text
*/
/**
* @typedef Options
* Configuration.
* @property {string | null | undefined} [ellipsis]
* Value to use at truncation point (optional).
* @property {Array<RootContent> | null | undefined} [ignore=[]]
* Nodes to exclude from the resulting tree; these are not counted towards
* `size` (optional).
* @property {number | null | undefined} [maxCharacterStrip=30]
* How far to walk back (default: `30`).
*
* The algorithm attempts to break right after a word rather than the exact
* `size`.
* Take for example the `|`, which is the actual break defined by `size`, and
* the `…` is the location where the ellipsis is placed: `This… an|d that`.
* Breaking at `|` would at best look bad but could likely result in things
* such as `ass…` for `assignment` — which is not ideal.
* `maxCharacterStrip` defines how far back the algorithm will walk to find
* a pretty word break.
* This prevents a potential slow operation on larger `size`s without any
* whitespace.
* If `maxCharacterStrip` characters are walked back and no nice break point
* is found, the bad break point is used.
* Set `maxCharacterStrip: 0` to not find a nice break.
* @property {number | null | undefined} [size=140]
* Number of characters to truncate to (default: `140`).
*/
import structuredClone from '@ungap/structured-clone'
import {unicodePunctuation, unicodeWhitespace} from 'micromark-util-character'
/** @type {ReadonlyArray<RootContent>} */
const emptyIgnore = []
/** @type {Readonly<Options>} */
const emptyOptions = {}
/**
* Truncate the tree to a certain number of characters.
*
* @template {Nodes} Tree
* Type of tree.
* @param {Tree} tree
* Tree to truncate.
* @param {Options | null | undefined} [options]
* Configuration (optional).
* @returns {Tree}
* A shallow copy of `tree`, truncated.
*/
export function truncate(tree, options) {
// To do: support units.
// Not sure what the above comment means, like words I guess?
const config = options || emptyOptions
const size = typeof config.size === 'number' ? config.size : 140
const maxCharacterStrip =
typeof config.maxCharacterStrip === 'number' ? config.maxCharacterStrip : 30
const ignore = config.ignore || emptyIgnore
const ellipsis = config.ellipsis
let searchSize = 0
/** @type {Text | undefined} */
let overflowingText
// `preorder` for the top node always returns itself.
const result = /** @type {Tree} */ (preorder(tree))
if (overflowingText) {
const uglyBreakpoint = size - searchSize
let breakpoint = uglyBreakpoint
// If the number at the break is not an alphanumerical…
if (unicodeAlphanumeric(overflowingText.value.charCodeAt(breakpoint))) {
let remove = -1
// Move back while the character before breakpoint is an alphanumerical.
while (
breakpoint &&
++remove < maxCharacterStrip &&
unicodeAlphanumeric(overflowingText.value.charCodeAt(breakpoint - 1))
) {
breakpoint--
}
// Move back while the character before breakpoint is *not* an alphanumerical.
while (
breakpoint &&
++remove < maxCharacterStrip &&
!unicodeAlphanumeric(overflowingText.value.charCodeAt(breakpoint - 1))
) {
breakpoint--
}
}
overflowingText.value = overflowingText.value.slice(
0,
breakpoint || uglyBreakpoint
)
if (ellipsis) {
overflowingText.value += ellipsis
}
}
return structuredClone(result)
/**
* Transform in `preorder`.
*
* @param {Nodes} node
* Node to truncate.
* @returns {Nodes}
* Shallow copy of `node`.
*/
function preorder(node) {
if (node.type === 'text') {
if (searchSize + node.value.length > size) {
overflowingText = {...node}
return overflowingText
}
searchSize += node.value.length
}
/** @type {Nodes} */
const replacement = {...node}
if ('children' in node) {
/** @type {Array<RootContent>} */
const children = []
let index = -1
while (++index < node.children.length) {
const child = node.children[index]
if (!ignore.includes(child)) {
const result = /** @type {RootContent} */ (preorder(child))
if (result) children.push(result)
}
// One of the descendant texts included the breakpoint.
if (overflowingText) {
break
}
}
// @ts-expect-error: content model matches.
replacement.children = children
}
return replacement
}
}
/**
* @param {number} code
* Character code.
* @returns {boolean}
* Whether `code` is not punctuation and not whitespace.
*/
function unicodeAlphanumeric(code) {
return !unicodeWhitespace(code) && !unicodePunctuation(code)
}