UNPKG

tty-strings

Version:

Tools for working with strings displayed in the terminal

127 lines 5.83 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); const codePoint_1 = require("./codePoint"); const graphemeBreak_1 = require("./graphemeBreak"); const emoji_1 = require("./emoji"); const { RI, ExtendedPictographic, Extend, V, T, } = graphemeBreak_1.GBProps; /** * A generator function that splits a string into measured graphemes. Does not handle ANSI escape codes, * so make sure to remove them from input strings before calling this function. * * @remarks * This function is an implementation of UAX #29 grapheme cluster boundary splitting: * {@link https://www.unicode.org/reports/tr29/tr29-21.html#Grapheme_Cluster_Boundaries} * * @example * ```js * import { charWidths } from 'tty-strings'; * * // Basic latin characters * const chars = [...charWidths('abc')]; // [['a', 1], ['b', 1], ['c', 1]] * * // Full width emoji characters * const emojis = [...charWidths('🙈🙉🙊')]; // [['🙈', 2], ['🙉', 2], ['🙊', 2]] * ``` * * @param string - Input string to split. * @returns A generator that yields a tuple with each grapheme and its width in the input string. */ function* charWidths(string) { if (typeof string !== 'string' || string.length === 0) return; // lower string index of the first grapheme cluster let i = 0, // get first code point cp = string.codePointAt(0), // initialize code points array for the first grapheme cluster cpoints = [cp], // initialize array to store grapheme break properties props = [], // get width of first code point in the first grapheme cluster fcw = (0, codePoint_1.default)(cp), // initialize total width of the first grapheme cluster cw = fcw, // track if grapheme cluster contains a zero-width-joiner zwj = cp === 0x200D, // get grapheme break property of the first code point prev = (0, graphemeBreak_1.graphemeBreakProperty)(cp), // track if grapheme cluster contains a Grapheme_Base base = false, // track index last index within the cluster that has been checked for Grapheme_Base baseIndex = 0; for (let j = cp > 0xFFFF ? 2 : 1, n = string.length; j < n; j += 1) { const nextCp = string.codePointAt(j), // get grapheme break property of the next code point next = (0, graphemeBreak_1.graphemeBreakProperty)(nextCp); // check if there is a cluster boundary between the two adjacent code points if ((0, graphemeBreak_1.shouldBreak)(props, prev, next)) { // a cluster boundry exists, yield the current grapheme cluster yield [ string.slice(i, j), // if cluster is an emoji zwj sequence, its width is that of the first code point in the sequence (zwj && props[0] === ExtendedPictographic && (0, emoji_1.isEmojiZwjSequence)(cpoints)) ? fcw : cw, ]; // reset grapheme break properties array props = []; // initialize code points array for the next grapheme cluster cpoints = [nextCp]; // measure width of the first code point in the next grapheme cluster fcw = (0, codePoint_1.default)(nextCp); // initialize total width of the next grapheme cluster cw = fcw; // set lower string index of the next cluster i = j; // reset zero-width-joiner flag zwj = nextCp === 0x200D; // reset Grapheme_Base flag base = false; // reset Grapheme_Base index baseIndex = 0; } else { // add code point to code points array for the current grapheme cluster cpoints.push(nextCp); // add grapheme break property to props array props.push(prev); // determine if the width of this code point should be factored into the width of the grapheme cluster let combining = false; if ((next & 0xF) !== Extend) { // check for regional indicator sequence or hangul jungseong / jongseong combining = (prev === RI && next === RI) || next === V || next === T; } else if (next & graphemeBreak_1.Emoji_Modifier) { // check for an emoji modifier sequence combining = (prev === ExtendedPictographic && (0, emoji_1.isEmojiModifierBase)(cp)); // set Grapheme_Base flag, since emoji modifiers have the Grapheme_Base property base || (base = true); } else { // go back and check each code point in the cluster to see if there is a Grapheme_Base for (; !base && baseIndex < props.length; baseIndex += 1) { base = (0, graphemeBreak_1.isGraphemeBase)(cpoints[baseIndex], props[baseIndex]); } // this code point has the property Grapheme_Extend, it is only combining if it follows a Grapheme_Base combining = base; } // if not a combining point, increment width of the current grapheme cluster if (!combining) cw += (0, codePoint_1.default)(nextCp); // update zero-width-joiner flag if (nextCp === 0x200D) zwj = true; } cp = nextCp; // ignore surrogates if (cp > 0xFFFF) j += 1; prev = next; } // yield the final grapheme cluster yield [ string.slice(i), // if last cluster is an emoji zwj sequence, its width is that of the first code point in the sequence (zwj && props[0] === ExtendedPictographic && (0, emoji_1.isEmojiZwjSequence)(cpoints)) ? fcw : cw, ]; } exports.default = charWidths; //# sourceMappingURL=charWidths.js.map