UNPKG

string-width

Version:

Get the visual width of a string - the number of columns required to display it

120 lines (93 loc) 3.71 kB
import stripAnsi from 'strip-ansi'; import {eastAsianWidth} from 'get-east-asian-width'; /** Logic: - Segment graphemes to match how terminals render clusters. - Width rules: 1. Skip non-printing clusters (Default_Ignorable, Control, pure Mark, lone Surrogates). Tabs are ignored by design. 2. RGI emoji clusters (\p{RGI_Emoji}) are double-width. 3. Minimally-qualified/unqualified emoji clusters (ZWJ sequences with 2+ Extended_Pictographic, or keycap sequences) are double-width. 4. Otherwise use East Asian Width of the cluster's first visible code point, and add widths for trailing Halfwidth/Fullwidth Forms within the same cluster (e.g., dakuten/handakuten/prolonged sound mark). */ const segmenter = new Intl.Segmenter(); // Whole-cluster zero-width const zeroWidthClusterRegex = /^(?:\p{Default_Ignorable_Code_Point}|\p{Control}|\p{Format}|\p{Mark}|\p{Surrogate})+$/v; // Pick the base scalar if the cluster starts with Prepend/Format/Marks const leadingNonPrintingRegex = /^[\p{Default_Ignorable_Code_Point}\p{Control}\p{Format}\p{Mark}\p{Surrogate}]+/v; // RGI emoji sequences const rgiEmojiRegex = /^\p{RGI_Emoji}$/v; // Detect minimally-qualified/unqualified emoji sequences (missing VS16 but still render as double-width) const unqualifiedKeycapRegex = /^[\d#*]\u20E3$/; const extendedPictographicRegex = /\p{Extended_Pictographic}/gu; function isDoubleWidthNonRgiEmojiSequence(segment) { // Real emoji clusters are < 30 chars; guard against pathological input if (segment.length > 50) { return false; } if (unqualifiedKeycapRegex.test(segment)) { return true; } // ZWJ sequences with 2+ Extended_Pictographic if (segment.includes('\u200D')) { const pictographics = segment.match(extendedPictographicRegex); return pictographics !== null && pictographics.length >= 2; } return false; } function baseVisible(segment) { return segment.replace(leadingNonPrintingRegex, ''); } function isZeroWidthCluster(segment) { return zeroWidthClusterRegex.test(segment); } function trailingHalfwidthWidth(segment, eastAsianWidthOptions) { let extra = 0; if (segment.length > 1) { for (const char of segment.slice(1)) { if (char >= '\uFF00' && char <= '\uFFEF') { extra += eastAsianWidth(char.codePointAt(0), eastAsianWidthOptions); } } } return extra; } export default function stringWidth(input, options = {}) { if (typeof input !== 'string' || input.length === 0) { return 0; } const { ambiguousIsNarrow = true, countAnsiEscapeCodes = false, } = options; let string = input; // Avoid calling stripAnsi when there are no ANSI escape sequences (ESC = 0x1B, CSI = 0x9B) if (!countAnsiEscapeCodes && (string.includes('\u001B') || string.includes('\u009B'))) { string = stripAnsi(string); } if (string.length === 0) { return 0; } // Fast path: printable ASCII (0x20–0x7E) needs no segmenter, regex, or EAW lookup — width equals length. if (/^[\u0020-\u007E]*$/.test(string)) { return string.length; } let width = 0; const eastAsianWidthOptions = {ambiguousAsWide: !ambiguousIsNarrow}; for (const {segment} of segmenter.segment(string)) { // Zero-width / non-printing clusters if (isZeroWidthCluster(segment)) { continue; } // Emoji width logic if (rgiEmojiRegex.test(segment) || isDoubleWidthNonRgiEmojiSequence(segment)) { width += 2; continue; } // Everything else: EAW of the cluster’s first visible scalar const codePoint = baseVisible(segment).codePointAt(0); width += eastAsianWidth(codePoint, eastAsianWidthOptions); // Add width for trailing Halfwidth and Fullwidth Forms (e.g., ゙, ゚, ー) width += trailingHalfwidthWidth(segment, eastAsianWidthOptions); } return width; }