tty-strings
Version:
Tools for working with strings displayed in the terminal
127 lines • 5.83 kB
JavaScript
Object.defineProperty(exports, "__esModule", { value: true });
const codePoint_1 = require("./codePoint");
const graphemeBreak_1 = require("./graphemeBreak");
const emoji_1 = require("./emoji");
const { RI, ExtendedPictographic, Extend, V, T, } = graphemeBreak_1.GBProps;
/**
* A generator function that splits a string into measured graphemes. Does not handle ANSI escape codes,
* so make sure to remove them from input strings before calling this function.
*
* @remarks
* This function is an implementation of UAX #29 grapheme cluster boundary splitting:
* {@link https://www.unicode.org/reports/tr29/tr29-21.html#Grapheme_Cluster_Boundaries}
*
* @example
* ```js
* import { charWidths } from 'tty-strings';
*
* // Basic latin characters
* const chars = [...charWidths('abc')]; // [['a', 1], ['b', 1], ['c', 1]]
*
* // Full width emoji characters
* const emojis = [...charWidths('🙈🙉🙊')]; // [['🙈', 2], ['🙉', 2], ['🙊', 2]]
* ```
*
* @param string - Input string to split.
* @returns A generator that yields a tuple with each grapheme and its width in the input string.
*/
function* charWidths(string) {
if (typeof string !== 'string' || string.length === 0)
return;
// lower string index of the first grapheme cluster
let i = 0,
// get first code point
cp = string.codePointAt(0),
// initialize code points array for the first grapheme cluster
cpoints = [cp],
// initialize array to store grapheme break properties
props = [],
// get width of first code point in the first grapheme cluster
fcw = (0, codePoint_1.default)(cp),
// initialize total width of the first grapheme cluster
cw = fcw,
// track if grapheme cluster contains a zero-width-joiner
zwj = cp === 0x200D,
// get grapheme break property of the first code point
prev = (0, graphemeBreak_1.graphemeBreakProperty)(cp),
// track if grapheme cluster contains a Grapheme_Base
base = false,
// track index last index within the cluster that has been checked for Grapheme_Base
baseIndex = 0;
for (let j = cp > 0xFFFF ? 2 : 1, n = string.length; j < n; j += 1) {
const nextCp = string.codePointAt(j),
// get grapheme break property of the next code point
next = (0, graphemeBreak_1.graphemeBreakProperty)(nextCp);
// check if there is a cluster boundary between the two adjacent code points
if ((0, graphemeBreak_1.shouldBreak)(props, prev, next)) {
// a cluster boundry exists, yield the current grapheme cluster
yield [
string.slice(i, j),
// if cluster is an emoji zwj sequence, its width is that of the first code point in the sequence
(zwj && props[0] === ExtendedPictographic && (0, emoji_1.isEmojiZwjSequence)(cpoints)) ? fcw : cw,
];
// reset grapheme break properties array
props = [];
// initialize code points array for the next grapheme cluster
cpoints = [nextCp];
// measure width of the first code point in the next grapheme cluster
fcw = (0, codePoint_1.default)(nextCp);
// initialize total width of the next grapheme cluster
cw = fcw;
// set lower string index of the next cluster
i = j;
// reset zero-width-joiner flag
zwj = nextCp === 0x200D;
// reset Grapheme_Base flag
base = false;
// reset Grapheme_Base index
baseIndex = 0;
}
else {
// add code point to code points array for the current grapheme cluster
cpoints.push(nextCp);
// add grapheme break property to props array
props.push(prev);
// determine if the width of this code point should be factored into the width of the grapheme cluster
let combining = false;
if ((next & 0xF) !== Extend) {
// check for regional indicator sequence or hangul jungseong / jongseong
combining = (prev === RI && next === RI) || next === V || next === T;
}
else if (next & graphemeBreak_1.Emoji_Modifier) {
// check for an emoji modifier sequence
combining = (prev === ExtendedPictographic && (0, emoji_1.isEmojiModifierBase)(cp));
// set Grapheme_Base flag, since emoji modifiers have the Grapheme_Base property
base || (base = true);
}
else {
// go back and check each code point in the cluster to see if there is a Grapheme_Base
for (; !base && baseIndex < props.length; baseIndex += 1) {
base = (0, graphemeBreak_1.isGraphemeBase)(cpoints[baseIndex], props[baseIndex]);
}
// this code point has the property Grapheme_Extend, it is only combining if it follows a Grapheme_Base
combining = base;
}
// if not a combining point, increment width of the current grapheme cluster
if (!combining)
cw += (0, codePoint_1.default)(nextCp);
// update zero-width-joiner flag
if (nextCp === 0x200D)
zwj = true;
}
cp = nextCp;
// ignore surrogates
if (cp > 0xFFFF)
j += 1;
prev = next;
}
// yield the final grapheme cluster
yield [
string.slice(i),
// if last cluster is an emoji zwj sequence, its width is that of the first code point in the sequence
(zwj && props[0] === ExtendedPictographic && (0, emoji_1.isEmojiZwjSequence)(cpoints)) ? fcw : cw,
];
}
exports.default = charWidths;
//# sourceMappingURL=charWidths.js.map
;