UNPKG

tty-strings

Version:

Tools for working with strings displayed in the terminal

52 lines (50 loc) 2.14 kB
import { graphemeBreakProperty, shouldBreak } from './graphemeBreak'; /** * A generator function that splits a string into its component graphemes. Does not handle ANSI escape codes, * so make sure to remove them from input strings before calling this function. * * @remarks * This function is an implementation of UAX #29 grapheme cluster boundary splitting: * {@link https://www.unicode.org/reports/tr29/tr29-21.html#Grapheme_Cluster_Boundaries} * * @example * ```js * import { splitChars } from 'tty-strings'; * * // [...'à̰ḇ́ĉ̥'] -> ['a', '\u0300', '\u0330', 'b', '\u0341', '\u0331', 'c', '\u0302', '\u0325'] * const chars = [...splitChars('à̰ḇ́ĉ̥')]; // ['à̰', 'ḇ́', 'ĉ̥'] * ``` * * @param string - Input string to split. * @returns A generator that yields each grapheme in the input string. */ export default function* splitChars(string: string): Generator<string, void> { if (typeof string !== 'string' || string.length === 0) return; // lower string index of the first grapheme cluster let i = 0, // initialize array to store grapheme break properties props: number[] = [], // get first code point cp = string.codePointAt(0)!, // get grapheme break property of the first code point prev = graphemeBreakProperty(cp); for (let j = cp > 0xFFFF ? 2 : 1, n = string.length; j < n; j += 1) { cp = string.codePointAt(j)!; // get grapheme break property of the next code point const next = graphemeBreakProperty(cp); // check if there is a cluster boundary between the two adjacent code points if (shouldBreak(props, prev, next)) { // a cluster boundry exists, yield the current grapheme cluster yield string.slice(i, j); // reset grapheme break properties array props = []; // set lower string index of the next cluster i = j; } else props.push(prev); // ignore surrogates if (cp > 0xFFFF) j += 1; prev = next; } // yield the final grapheme cluster yield string.slice(i); }