UNPKG

tty-strings

Version:

Tools for working with strings displayed in the terminal

979 lines 47.9 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.shouldBreak = exports.isGraphemeBase = exports.graphemeBreakProperty = exports.InCBProps = exports.Emoji_Modifier = exports.GBProps = void 0; const None = 0, CR = 1, LF = 2, Control = 3, Extend = 4, ZWJ = 5, RI = 6, Prepend = 7, SpacingMark = 8, L = 9, V = 10, T = 11, LV = 12, LVT = 13, ExtendedPictographic = 14; /** * Base grapheme break properties */ exports.GBProps = { None, CR, LF, Control, Extend, ZWJ, RI, Prepend, SpacingMark, L, V, T, LV, LVT, ExtendedPictographic, }; const InCB_Extend = 0b00010000, InCB_Linker = 0b00100000, InCB_Consonant = 0b01000000; exports.Emoji_Modifier = 0b10000000; /** * Indic conjunct break properties */ exports.InCBProps = { Extend: InCB_Extend, Linker: InCB_Linker, Consonant: InCB_Consonant, }; /** * Get the grapheme cluster break property of a given Unicode code point * * @remarks * Properties are derived from {@link https://unicode.org/Public/16.0.0/ucd/auxiliary/GraphemeBreakProperty.txt} * Extended_Pictographic and Emoji_Modifier values are derived from {@link https://unicode.org/Public/16.0.0/ucd/emoji/emoji-data.txt} * InCB properties are derived from {@link https://www.unicode.org/Public/16.0.0/ucd/DerivedCoreProperties.txt} * * @param code - unicode code point * @returns The grapheme cluster break property */ function graphemeBreakProperty(code) { // Basic Latin (0000 - 007F) if (code < 0x7F) { return code > 0x1F ? None // Carriage return (CR) : code === 0x0D ? CR // Line feed (LF) : code === 0x0A ? LF : Control; // [30] } // Latin-1 Supplement ... Combining Diacritical Marks (0080 - 036F) if (code < 0x0370) { return code <= 0xAE ? ( // Latin-1 Supplement (code <= 0x9F || code === 0xAD) ? Control // [34] : (code === 0xA9 || code === 0xAE) ? ExtendedPictographic : None) : code >= 0x0300 ? Extend | InCB_Extend : None; // [112] } // Greek and Coptic ... Georgian (0370 - 10FF) if (code < 0x1100) { return code < 0x0600 ? ((code < 0x0591 ? ( // Cyrillic code >= 0x0483 && code <= 0x0489 // [7] ) : ( // Hebrew code <= 0x05BD // [45] || (code >= 0x05BF && code <= 0x05C2 && code !== 0x05C0) // [3] || (code >= 0x05C4 && code <= 0x05C7 && code !== 0x05C6) // [3] )) ? Extend | InCB_Extend : None) : code < 0x0700 ? ( // Arabic code < 0x06D6 ? (code <= 0x0605 ? Prepend // [6] : code === 0x061C ? Control : ((code >= 0x0610 && code <= 0x061A) // [11] || (code >= 0x064B && code <= 0x065F) // [21] || code === 0x0670 // [1] ) ? Extend | InCB_Extend : None) : (code === 0x06DD) ? Prepend : ((code <= 0x06E4 && code !== 0x06DE) // [13] || (code >= 0x06E7 && code <= 0x06ED && code !== 0x06E9) // [6] ) ? Extend | InCB_Extend : None) : code < 0x0800 ? (code <= 0x074A ? ( // Syriac (code >= 0x0730 || code === 0x0711) ? Extend | InCB_Extend // [28] : (code === 0x070F) ? Prepend : None // [1] ) : (code < 0x07EB ? ( // Thaana code >= 0x07A6 && code <= 0x07B0 // [11] ) : ( // Nko code <= 0x07F3 || code === 0x07FD // [10] )) ? Extend | InCB_Extend : None) : code < 0x0903 ? (code <= 0x085B ? (( // Samaritan (code >= 0x0816 && code <= 0x0823 && code !== 0x081A) // [13] || (code >= 0x0825 && code <= 0x082D && code !== 0x0828) // [8] // Mandaic || code >= 0x0859 // [3] ) ? Extend | InCB_Extend : None) : code < 0x08CA ? ((code >= 0x0890 && code <= 0x0891) ? Prepend // [2] : (code >= 0x0897 && code <= 0x089F) ? Extend | InCB_Extend : None // [9] ) : ( // Arabic Extended-A code === 0x08E2 ? Prepend : Extend | InCB_Extend // [1/56] )) : code < 0x0981 ? ( // Devanagari code < 0x093E ? ((code === 0x0903 || code === 0x093B) ? SpacingMark // [2] : code < 0x093A ? (code >= 0x0915 ? InCB_Consonant : None) // [37] : code <= 0x093C ? Extend | InCB_Extend : None // [2] ) : code <= 0x094F ? ((code >= 0x0941 && code <= 0x0948) ? Extend | InCB_Extend // [8] : code === 0x094D ? Extend | InCB_Linker : SpacingMark // [1/9] ) : code <= 0x095F ? (code < 0x0958 ? (code >= 0x0951 ? Extend | InCB_Extend : None // [7] ) : InCB_Consonant // [8] ) : (code >= 0x0962 && code <= 0x0963) ? Extend | InCB_Extend // [2] : (code >= 0x0978 && code <= 0x097F) ? InCB_Consonant : None // [8] ) : code < 0x0A01 ? ( // Bengali code <= 0x09B9 ? (code < 0x0995 ? (code <= 0x0983 ? (code === 0x0981 ? Extend | InCB_Extend : SpacingMark) : None // [1/2] ) : ((code <= 0x09B0 && code !== 0x09A9) // [27] || code >= 0x09B6 // [4] || code === 0x09B2 // [1] ) ? InCB_Consonant : None) : code <= 0x09CC ? (code <= 0x09C4 ? (code < 0x09BE ? (code === 0x09BC ? Extend | InCB_Extend : None) // [1] : (code <= 0x09C0 && code >= 0x09BF) ? SpacingMark : Extend | InCB_Extend // [2/5] ) : (code >= 0x09CB // [2] || (code >= 0x09C7 && code <= 0x09C8) // [2] ) ? SpacingMark : None) : code < 0x09E2 ? (code < 0x09DC ? (code === 0x09CD ? Extend | InCB_Linker : code === 0x09D7 ? Extend | InCB_Extend : None // [1/1] ) : (code <= 0x09DD || code === 0x09DF) ? InCB_Consonant : None // [3] ) : code <= 0x09F1 ? (code <= 0x09E3 ? Extend | InCB_Extend : code >= 0x09F0 ? InCB_Consonant : None // [2/2] ) : code === 0x09FE ? Extend | InCB_Extend : None // [1] ) : code < 0x0A81 ? ( // Gurmukhi code < 0x0A41 ? ((code === 0x0A03 || code >= 0x0A3E) ? SpacingMark // [4] : (code <= 0x0A02 || code === 0x0A3C) ? Extend | InCB_Extend : None // [3] ) : (code <= 0x0A4D ? (code <= 0x0A42 // [2] || (code >= 0x0A47 && code <= 0x0A48) // [2] || code >= 0x0A4B // [3] ) : (code === 0x0A51 // [1] || (code >= 0x0A70 && code <= 0x0A71) // [2] || code === 0x0A75 // [1] )) ? Extend | InCB_Extend : None) : code < 0x0B00 ? ( // Gujarati code <= 0x0AB9 ? (code < 0x0A95 ? (code <= 0x0A82 ? Extend | InCB_Extend : code === 0x0A83 ? SpacingMark : None // [2/1] ) : (code !== 0x0AA9 && code !== 0x0AB1 && code !== 0x0AB4) ? InCB_Consonant : None // [34] ) : code <= 0x0AC8 ? ((code >= 0x0ABE && code <= 0x0AC0) ? SpacingMark // [3] : ((code >= 0x0AC1 && code !== 0x0AC6) || code === 0x0ABC) ? Extend | InCB_Extend : None // [8] ) : code < 0x0AE2 ? (code <= 0x0ACC ? (code !== 0x0ACA ? SpacingMark : None) // [3] : code === 0x0ACD ? Extend | InCB_Linker : None // [1] ) : (code >= 0x0AFA || code <= 0x0AE3) ? Extend | InCB_Extend // [8] : code === 0x0AF9 ? InCB_Consonant : None // [1] ) : code < 0x0B80 ? ( // Oriya code <= 0x0B39 ? (code < 0x0B15 ? ((code >= 0x0B02 && code <= 0x0B03) ? SpacingMark // [2] : code === 0x0B01 ? Extend | InCB_Extend : None // [1] ) : ((code <= 0x0B30 && code !== 0x0B29) // [27] || (code >= 0x0B32 && code !== 0x0B34) // [7] ) ? InCB_Consonant : None) : code <= 0x0B4C ? (code <= 0x0B44 ? (code === 0x0B40 ? SpacingMark // [1] : (code >= 0x0B3E || code === 0x0B3C) ? Extend | InCB_Extend : None // [7] ) : ((code >= 0x0B47 && code <= 0x0B48) || code >= 0x0B4B) ? SpacingMark : None // [4] ) : code < 0x0B5C ? (code < 0x0B55 ? (code === 0x0B4D ? Extend | InCB_Linker : None) // [1] : code <= 0x0B57 ? Extend | InCB_Extend : None // [3] ) : code < 0x0B62 ? ((code <= 0x0B5D || code === 0x0B5F) ? InCB_Consonant : None // [3] ) : code <= 0x0B63 ? Extend | InCB_Extend : code === 0x0B71 ? InCB_Consonant : None // [2/1] ) : code < 0x0C00 ? ( // Tamil code < 0x0BC1 ? ((code === 0x0BBF) ? SpacingMark // [1] : (code === 0x0B82 || code >= 0x0BBE) ? Extend | InCB_Extend : None // [3] ) : code <= 0x0BCC ? ((code <= 0x0BC2 // [2] || (code >= 0x0BC6 && code <= 0x0BC8) // [3] || code >= 0x0BCA // [3] ) ? SpacingMark : None) : (code === 0x0BCD // [1] || code === 0x0BD7 // [1] ) ? Extend | InCB_Extend : None) : code < 0x0C64 ? ( // Telugu code < 0x0C3E ? (code <= 0x0C04 ? ((code >= 0x0C01 && code <= 0x0C03) ? SpacingMark : Extend | InCB_Extend) // [3/2] : (code >= 0x0C15 && code <= 0x0C39 && code !== 0x0C29) ? InCB_Consonant // [36] : code === 0x0C3C ? Extend | InCB_Extend : None // [1] ) : code <= 0x0C4C ? ((code <= 0x0C44 && code >= 0xC41) ? SpacingMark // [4] : (code <= 0x0C40 || (code >= 0x0C46 && code !== 0x0C49)) ? Extend | InCB_Extend : None // [9] ) : code <= 0x0C5A ? (code === 0x0C4D ? Extend | InCB_Linker // [1] : (code >= 0x0C55 && code <= 0x0C56) ? Extend | InCB_Extend // [2] : code >= 0x0C58 ? InCB_Consonant : None // [3] ) : code >= 0x0C62 ? Extend | InCB_Extend : None // [2] ) : code < 0x0D00 ? ( // Kannada code <= 0x0CC4 ? (code < 0x0CBC ? ((code >= 0x0C82 && code <= 0x0C83) ? SpacingMark // [2] : code === 0x0C81 ? Extend | InCB_Extend : None // [1] ) : code < 0x0CC1 ? (code === 0x0CBE ? SpacingMark // [1] : code !== 0x0CBD ? Extend | InCB_Extend : None // [3] ) : code === 0x0CC2 ? Extend | InCB_Extend : SpacingMark // [1/3] ) : code <= 0x0CE3 ? (((code >= 0x0CC6 && code <= 0x0CC8) // [3] || (code >= 0x0CCA && code <= 0x0CCD) // [4] || (code >= 0x0CD5 && code <= 0x0CD6) // [2] || code >= 0x0CE2 // [2] ) ? Extend | InCB_Extend : None) : code === 0x0CF3 ? SpacingMark : None // [1] ) : code < 0x0D64 ? ( // Malayalam code <= 0x0D44 ? (code < 0x0D3B ? (code >= 0x0D15 ? InCB_Consonant // [38] : code <= 0x0D01 ? Extend | InCB_Extend // [2] : code <= 0x0D03 ? SpacingMark : None // [2] ) : (code >= 0x0D3F && code <= 0x0D40) ? SpacingMark // [2] : code !== 0x0D3D ? Extend | InCB_Extend : None // [7] ) : code <= 0x0D4C ? ((code >= 0x0D46 && code !== 0x0D49) ? SpacingMark : None // [6] ) : code === 0x0D4D ? Extend | InCB_Linker : (code === 0x0D57 // [1] || code >= 0x0D62 // [2] ) ? Extend | InCB_Extend : (code === 0x0D4E) ? Prepend : None) : code < 0x0DF4 ? ( // Sinhala code <= 0x0DD4 ? (((code >= 0x0D82 && code <= 0x0D83) // [2] || (code >= 0x0DD0 && code <= 0x0DD1) // [2] ) ? SpacingMark : (code === 0x0D81 || code === 0x0DCA || code >= 0x0DCF) ? Extend | InCB_Extend : None // [6] ) : (code === 0x0DD6 || code === 0x0DDF) ? Extend | InCB_Extend // [2] : ((code >= 0x0DD8 && code <= 0x0DDE) || code >= 0x0DF2) ? SpacingMark : None // [9] ) : code < 0x0F18 ? (code <= 0x0E4E ? ( // Thai code === 0x0E33 ? SpacingMark : ( // [1] (code >= 0x0E31 && code <= 0x0E3A && code !== 0x0E32) // [8] || code >= 0x0E47 // [8] ) ? Extend | InCB_Extend : None) : ( // Lao code === 0x0EB3 ? SpacingMark : ( // [1] (code >= 0x0EB1 && code <= 0x0EBC && code !== 0x0EB2) // [10] || (code >= 0x0EC8 && code <= 0x0ECE) // [7] ) ? Extend | InCB_Extend : None)) : code < 0x102D ? ( // Tibetan code < 0x0F71 ? (code <= 0x0F39 ? ((code <= 0x0F19 // [2] || (code >= 0x0F35 && code !== 0x0F36 && code !== 0x0F38) // [3] ) ? Extend | InCB_Extend : None) : (code <= 0x0F3F && code >= 0x0F3E // [2] ) ? SpacingMark : None) : code <= 0x0F87 ? (code === 0x0F7F ? SpacingMark : code !== 0x0F85 ? Extend | InCB_Extend : None // [1/21] ) : ((code >= 0x0F8D && code <= 0x0FBC && code !== 0x0F98) // [47] || code === 0x0FC6 // [1] ) ? Extend | InCB_Extend : None) : ( // Myanmar code <= 0x103E ? ((code === 0x1031 || (code >= 0x103B && code <= 0x103C)) ? SpacingMark // [3] : code !== 0x1038 ? Extend | InCB_Extend : None // [14] ) : code < 0x1085 ? (((code >= 0x1056 && code <= 0x1057) || code === 0x1084) ? SpacingMark : ( // [3] (code >= 0x1058 && code <= 0x1059) // [2] || (code >= 0x105E && code <= 0x1060) // [3] || (code >= 0x1071 && code <= 0x1074) // [4] || code === 0x1082 // [1] ) ? Extend | InCB_Extend : None) : (code <= 0x1086 // [2] || code === 0x108D // [1] || code === 0x109D // [1] ) ? Extend | InCB_Extend : None); } // Hangul Jamo ... Vedic Extensions (1100 - 1CFF) if (code < 0x1D00) { return code < 0x1200 ? ( // Hangul Jamo (code < 0x1160) ? L : (code < 0x11A8) ? V : T // L [96] V [72] T [88] ) : code < 0x1774 ? ((code <= 0x1715 ? ( // Ethiopic (code >= 0x135D && code <= 0x135F) // [3] // Tagalog || code >= 0x1712 // [4] ) : ( // Hanunoo (code >= 0x1732 && code <= 0x1734) // [3] // Buhid || (code >= 0x1752 && code <= 0x1753) // [2] // Tagbanwa || code >= 0x1772 // [2] )) ? Extend | InCB_Extend : None) : code < 0x180B ? ( // Khmer (code >= 0x17B4 && code <= 0x17D3) ? ((code === 0x17B6 || (code >= 0x17BE && code <= 0x17C5) || (code >= 0x17C7 && code <= 0x17C8)) ? SpacingMark : Extend | InCB_Extend // [11/21] ) : code === 0x17DD ? Extend | InCB_Extend : None // [1] ) : code < 0x1920 ? ( // Mongolian (code === 0x180E) ? Control // [1] : (code <= 0x180F || (code >= 0x1885 && code <= 0x1886) || code === 0x18A9) ? Extend | InCB_Extend : None // [7] ) : code < 0x1A55 ? (code <= 0x193B ? ( // Limbu code <= 0x192B ? ((code <= 0x1922 || (code >= 0x1927 && code <= 0x1928)) ? Extend | InCB_Extend : SpacingMark // [5/7] ) : code >= 0x1930 ? ((code >= 0x1939 || code === 0x1932) ? Extend | InCB_Extend : SpacingMark // [4/8] ) : None) : (code >= 0x1A17 && code <= 0x1A1B) ? ( // Buginese (code >= 0x1A19 && code <= 0x1A1A) ? SpacingMark : Extend | InCB_Extend // [2/3] ) : None) : code < 0x1B00 ? ( // Tai Tham code < 0x1A65 ? (code <= 0x1A5E ? ((code >= 0x1A58 || code === 0x1A56) ? Extend | InCB_Extend : SpacingMark // [8/2] ) : (code === 0x1A60 || code === 0x1A62) ? Extend | InCB_Extend : None // [2] ) : code <= 0x1A7C ? ((code <= 0x1A6C || code >= 0x1A73) ? Extend | InCB_Extend : SpacingMark // [18/6] ) : (code === 0x1A7F // [1] // Combining Diacritical Marks Extended || (code >= 0x1AB0 && code <= 0x1ACE) // [31] ) ? Extend | InCB_Extend : None) : code < 0x1B80 ? ( // Balinese code < 0x1B34 ? (code <= 0x1B03 ? Extend | InCB_Extend // [4] : code === 0x1B04 ? SpacingMark : None // [1] ) : code <= 0x1B44 ? ((code <= 0x1B3D || code >= 0x1B42) ? Extend | InCB_Extend : SpacingMark // [13/4] ) : (code >= 0x1B6B && code <= 0x1B73 // [9] ) ? Extend | InCB_Extend : None) : code < 0x1BE6 ? ( // Sundanese code < 0x1BA2 ? (code <= 0x1B81 ? Extend | InCB_Extend // [2] : (code === 0x1B82 || code === 0x1BA1) ? SpacingMark : None // [2] ) : code <= 0x1BAD ? ((code <= 0x1BA5 || code >= 0x1BA8) ? Extend | InCB_Extend : SpacingMark // [10/2] ) : None) : code < 0x1CD0 ? (code <= 0x1BF3 ? ( // Batak ((code >= 0x1BEA && code <= 0x1BEC) || code === 0x1BE7 || code === 0x1BEE) ? SpacingMark : Extend | InCB_Extend // [5/9] ) : (code >= 0x1C24 && code <= 0x1C37) ? ( // Lepcha (code <= 0x1C2B || (code >= 0x1C34 && code <= 0x1C35)) ? SpacingMark : Extend | InCB_Extend // [10/10] ) : None) : ( // Vedic Extensions (code === 0x1CE1 || code === 0x1CF7) ? SpacingMark : (code <= 0x1CD2 // [3] || (code >= 0x1CD4 && code <= 0x1CE8) // [20] || code === 0x1CED || code === 0x1CF4 // [2] || (code >= 0x1CF8 && code <= 0x1CF9) // [2] ) ? Extend | InCB_Extend : None); } // Phonetic Extensions ... Combining Diacritical Marks for Symbols (1D00 - 20FF) if (code < 0x2100) { return code < 0x200B ? ( // Combining Diacritical Marks Supplement (code >= 0x1DC0 && code <= 0x1DFF) ? Extend | InCB_Extend : None // [64] ) : code === 0x200D ? ZWJ | InCB_Extend : code <= 0x206F ? ( // General Punctuation code === 0x200C ? Extend : (code <= 0x200F // [3] || (code >= 0x2028 && code <= 0x202E) // [7] || code >= 0x2060 // [16] ) ? Control : (code === 0x203C || code === 0x2049) ? ExtendedPictographic : None) : ( // Combining Diacritical Marks for Symbols code >= 0x20D0 && code <= 0x20F0 // [33] ) ? Extend | InCB_Extend : None; } // Letterlike Symbols ... Miscellaneous Symbols and Arrows (2100 - 2BFF) if (code < 0x2C00) { return (code < 0x231A ? (code < 0x2194 ? ( // Letterlike Symbols code === 0x2122 // [1] || code === 0x2139 // [2] ) : (code <= 0x21AA && ( // Arrows code <= 0x2199 // [6] || (code >= 0x21A9) // [2] ))) : code < 0x23FB ? ( // Miscellaneous Technical code <= 0x231B // [2] || code === 0x2328 // [1] || code === 0x2388 // [1] || code === 0x23CF // [1] || (code >= 0x23E9 && code <= 0x23F3) // [11] || code >= 0x23F8 // [3] ) : code < 0x2600 ? (code < 0x25AA ? ( // Enclosed Alphanumerics code === 0x24C2) : (code <= 0x25FE && ( // Geometric Shapes code <= 0x25AB // [2] || code === 0x25B6 || code === 0x25C0 // [2] || code >= 0x25FB // [4] ))) : code < 0x27C0 ? (code <= 0x2716 ? ( // Miscellaneous Symbols (code <= 0x2685 && code !== 0x2606 && code !== 0x2613) // [132] // Dingbats || (code >= 0x2690 && code <= 0x2705) // [118] || (code >= 0x2708 && code !== 0x2713 && code !== 0x2715) // [13] ) : code <= 0x2734 ? ( // Dingbats code === 0x271D // [1] || code === 0x2721 // [1] || code === 0x2728 // [1] || code >= 0x2733 // [2] ) : code <= 0x2757 ? ( // Dingbats code === 0x2744 // [1] || code === 0x2747 // [1] || code === 0x274C // [1] || code === 0x274E // [1] || (code >= 0x2753 && code !== 0x2756) // [4] ) : ( // Dingbats (code >= 0x2763 && code <= 0x2767) // [5] || (code >= 0x2795 && code <= 0x2797) // [3] || code === 0x27A1 // [1] || code === 0x27B0 // [1] || code === 0x27BF // [1] )) : ( // Supplemental Arrows-B (code >= 0x2934 && code <= 0x2935) // [2] // Miscellaneous Symbols and Arrows || (code >= 0x2B05 && (code <= 0x2B07 // [3] || (code >= 0x2B1B && code <= 0x2B1C) // [2] || code === 0x2B50 // [1] || code === 0x2B55 // [1] )))) ? ExtendedPictographic : None; } // Glagolitic ... Meetei Mayek (2C00 - ABFF) if (code < 0xAC00) { return code < 0x302A ? ((code <= 0x2DFF && code >= 0x2CEF && ( // Coptic code <= 0x2CF1 // [3] // Tifinagh || code === 0x2D7F // [1] // Cyrillic Extended-A || code >= 0x2DE0 // [32] )) ? Extend | InCB_Extend : None) : code < 0x3300 ? (code <= 0x309A ? ( // CJK Symbols and Punctuation + Hiragana (code <= 0x302F || code >= 0x3099) ? Extend | InCB_Extend // [8] : (code === 0x3030 || code === 0x303D) ? ExtendedPictographic : None // [2] ) : ( // Enclosed CJK Letters and Months code === 0x3297 || code === 0x3299 // [2] ) ? ExtendedPictographic : None) : code < 0xA800 ? ((code >= 0xA66F && code <= 0xA6F1 && ( // Cyrillic Extended-B (code <= 0xA67D && code !== 0xA673) // [14] || (code >= 0xA69E && code <= 0xA69F) // [2] || code >= 0xA6F0 // [2] )) ? Extend | InCB_Extend : None) : code < 0xA880 ? ( // Syloti Nagri code < 0xA823 ? ((code === 0xA802 || code === 0xA806 || code === 0xA80B) ? Extend | InCB_Extend : None // [3] ) : code <= 0xA827 ? ((code >= 0xA825 && code <= 0xA826) ? Extend | InCB_Extend : SpacingMark // [2/3] ) : code === 0xA82C ? Extend | InCB_Extend : None // [1] ) : code < 0xA900 ? ( // Saurashtra, Devanagari Extended code < 0xA8C4 ? ((code <= 0xA881 || code >= 0xA8B4) ? SpacingMark : None // [18] ) : (code <= 0xA8C5 // [2] || (code >= 0xA8E0 && code <= 0xA8F1) // [18] || code === 0xA8FF // [1] ) ? Extend | InCB_Extend : None) : code < 0xA980 ? (code < 0xA947 ? ( // Kayah Li (code >= 0xA926 && code <= 0xA92D) ? Extend | InCB_Extend : None // [8] ) : code <= 0xA953 ? ( // Rejang code === 0xA952 ? SpacingMark : Extend | InCB_Extend // [1/12] ) : ( // Hangul Jamo Extended-A code >= 0xA960 && code <= 0xA97C // [29] ) ? L : None) : code < 0xAA29 ? ( // Javanese code < 0xA9B4 ? ((code <= 0xA982 || code === 0xA9B3) ? Extend | InCB_Extend // [4] : code === 0xA983 ? SpacingMark : None // [1] ) : code < 0xA9C0 ? ((code <= 0xA9B5 || (code >= 0xA9BA && code <= 0xA9BB) || code >= 0xA9BE) ? SpacingMark : Extend | InCB_Extend // [6/6] ) : (code === 0xA9C0 // [1] // Myanmar Extended-B || code === 0xA9E5 // [1] ) ? Extend | InCB_Extend : None) : code < 0xAA80 ? ( // Cham, Myanmar Extended-A code <= 0xAA36 ? ((code <= 0xAA2E || (code >= 0xAA31 && code <= 0xAA32) || code >= 0xAA35) ? Extend | InCB_Extend : SpacingMark // [10/4] ) : (code === 0xAA43 || code === 0xAA4C || code === 0xAA7C) ? Extend | InCB_Extend // [3] : (code === 0xAA4D) ? SpacingMark : None // [1] ) : code < 0xAB00 ? (code < 0xAAEB ? (( // Tai Viet (code >= 0xAAB0 && code <= 0xAAB4 && code !== 0xAAB1) // [4] || (code >= 0xAAB7 && code <= 0xAAB8) // [2] || (code >= 0xAABE && code <= 0xAAC1 && code !== 0xAAC0) // [3] ) ? Extend | InCB_Extend : None) : code <= 0xAAEF ? ( // Meetei Mayek Extensions (code >= 0xAAEC && code <= 0xAAED) ? Extend | InCB_Extend : SpacingMark // [2/3] ) : (code === 0xAAF5) ? SpacingMark // [1] : (code === 0xAAF6) ? Extend | InCB_Extend : None // [1] ) : code >= 0xABE3 ? ( // Meetei Mayek code <= 0xABEA ? ((code === 0xABE5 || code === 0xABE8) ? Extend | InCB_Extend : SpacingMark // [2/6] ) : (code === 0xABEC) ? SpacingMark // [1] : (code === 0xABED) ? Extend | InCB_Extend : None // [1] ) : None; } // Hangul Syllables ... Specials (AC00 - FFFF) if (code < 0x10000) { return code <= 0xD7A3 ? ( // Hangul Syllable ((code - 0xAC00) % 28 ? LVT : LV) // [11,172] ) : code <= 0xD7FB ? ( // Hangul Jamo Extended-B code >= 0xD7CB ? T // [49] : (code >= 0xD7B0 && code <= 0xD7C6) ? V : None // [23] ) : code <= 0xFE2F ? ((code < 0xFE00 ? ( // Alphabetic Presentation Forms code === 0xFB1E // [1] ) : ( // Variation Selectors code <= 0xFE0F // [16] // Combining Half Marks || code >= 0xFE20 // [16] )) ? Extend | InCB_Extend : None) : ( // Zero Width No-Break Space + Specials (code === 0xFEFF || (code >= 0xFFF0 && code <= 0xFFFB)) ? Control // [1] // Halfwidth and Fullwidth Forms : (code >= 0xFF9E && code <= 0xFF9F) ? Extend | InCB_Extend : None // [2] ); } // Linear B Syllabary .. Newa (10000 - 1147F) if (code < 0x11480) { return code < 0x11000 ? ((code < 0x10A01 ? (code <= 0x1037A && ( // Phaistos Disc code === 0x101FD // [1] // Coptic Epact Numbers || code === 0x102E0 // [1] // Old Permic || code >= 0x10376 // [5] )) : code < 0x10AE5 ? ( // Kharoshthi (code <= 0x10A06 && code !== 0x10A04) // [5] || (code >= 0x10A0C && code <= 0x10A0F) // [4] || (code >= 0x10A38 && code <= 0x10A3A) // [3] || code === 0x10A3F // [1] ) : code < 0x10EFC ? ( // Manichaean code <= 0x10AE6 // [2] // Hanifi Rohingya || (code >= 0x10D24 && code <= 0x10D27) // [4] // Garay || (code >= 0x10D69 && code <= 0x10D6D) // [5] // Yezidi || (code >= 0x10EAB && code <= 0x10EAC) // [2] ) : ( // Arabic Extended-C code <= 0x10EFF // [4] // Sogdian || (code >= 0x10F46 && code <= 0x10F50) // [11] // Old Uyghur || (code >= 0x10F82 && code <= 0x10F85) // [4] )) ? Extend | InCB_Extend : None) : code < 0x11082 ? ( // Brahmi code <= 0x11002 ? (code === 0x11001 ? Extend | InCB_Extend : SpacingMark // [1/2] ) : (code < 0x11073 ? ((code >= 0x11038 && code <= 0x11046) // [15] || code === 0x11070 // [1] ) : (code <= 0x11074 // [2] || code >= 0x1107F // [3] )) ? Extend | InCB_Extend : None) : code < 0x11100 ? ( // Kaithi code <= 0x110BA ? (code < 0x110B0 ? (code === 0x11082 ? SpacingMark : None) // [1] : (code <= 0x110B2 || (code >= 0x110B7 && code <= 0x110B8)) ? SpacingMark : Extend | InCB_Extend // [5/6] ) : (code === 0x110BD || code === 0x110CD) ? Prepend // [2] : (code === 0x110C2) ? Extend | InCB_Extend : None // [1] ) : code < 0x11180 ? ( // Chakma code <= 0x11134 ? (code === 0x1112C ? SpacingMark // [1] : (code <= 0x11102 || code >= 0x11127) ? Extend | InCB_Extend : None // [16] ) : code <= 0x11146 ? (code >= 0x11145 ? SpacingMark : None // [2] ) : ( // Mahajani code === 0x11173 ? Extend | InCB_Extend : None // [1] )) : code < 0x1122C ? ( // Sharada code <= 0x111C0 ? (code < 0x111B6 ? (code <= 0x11181 ? Extend | InCB_Extend // [2] : (code === 0x11182 || code >= 0x111B3) ? SpacingMark : None // [4] ) : code === 0x111BF ? SpacingMark : Extend | InCB_Extend // [1/10] ) : code <= 0x111CC ? (code >= 0x111C9 ? Extend | InCB_Extend // [4] : (code <= 0x111C3 && code >= 0x111C2) ? Prepend : None // [2] ) : code === 0x111CE ? SpacingMark : code === 0x111CF ? Extend | InCB_Extend : None // [1/1] ) : code < 0x11300 ? ( // Khojki code <= 0x11237 ? ((code <= 0x1122E || (code >= 0x11232 && code <= 0x11233)) ? SpacingMark : Extend | InCB_Extend // [5/7] ) : code < 0x112DF ? ((code === 0x1123E || code === 0x11241) ? Extend | InCB_Extend : None // [2] ) : code <= 0x112EA ? ( // Khudawadi (code >= 0x112E0 && code <= 0x112E2) ? SpacingMark : Extend | InCB_Extend // [3/9] ) : None) : code < 0x11375 ? ( // Grantha code < 0x11341 ? (code < 0x1133B ? (code <= 0x11303 ? (code <= 0x11301 ? Extend | InCB_Extend : SpacingMark) : None // [2/2] ) : code === 0x1133F ? SpacingMark // [1] : code !== 0x1133D ? Extend | InCB_Extend : None // [4] ) : code < 0x1134D ? ((code <= 0x11344 // [4] || (code >= 0x11347 && code <= 0x11348) // [2] || code >= 0x1134B // [2] ) ? SpacingMark : None) : code <= 0x11363 ? (code >= 0x11362 ? SpacingMark // [2] : (code === 0x1134D || code === 0x11357) ? Extend | InCB_Extend : None // [2] ) : ((code >= 0x11366 && code <= 0x1136C) || code >= 0x11370) ? Extend | InCB_Extend : None // [12] ) : code < 0x113E3 ? ( // Tulu-Tigalari code <= 0x113C2 ? (code >= 0x113B8 ? ((code >= 0x113B9 && code <= 0x113BA) ? SpacingMark // [2] : code !== 0x113C1 ? Extend | InCB_Extend : None // [8] ) : None) : code <= 0x113C9 ? ((code === 0x113C5 || code >= 0x113C7) ? Extend | InCB_Extend : None // [4] ) : code < 0x113CE ? (code !== 0x113CB ? SpacingMark : None // [3] ) : code === 0x113D1 ? Prepend // [1] : (code <= 0x113D2 || code >= 0x113E1) ? Extend | InCB_Extend : None // [6] ) : ( // Newa code <= 0x11444 ? (code >= 0x11435 ? ((code <= 0x11437 || (code >= 0x11440 && code <= 0x11441)) ? SpacingMark : Extend | InCB_Extend // [5/11] ) : None) : code === 0x11445 ? SpacingMark // [1] : (code === 0x11446 || code === 0x1145E) ? Extend | InCB_Extend : None // [2] ); } // Tirhuta ... Pau Cin Hau (11480 - 11AFF) if (code < 0x11C00) { return code < 0x115AF ? ( // Tirhuta (code >= 0x114B0 && code <= 0x114C3) ? (((code >= 0x114B1 && code <= 0x114B2) // [2] || code === 0x114B9 // [1] || (code >= 0x114BB && code <= 0x114BC) // [2] || code === 0x114BE // [1] || code === 0x114C1 // [1] ) ? SpacingMark : Extend | InCB_Extend) : None) : code < 0x11630 ? ( // Siddham code <= 0x115BB ? (((code >= 0x115B0 && code <= 0x115B1) || code >= 0x115B8) ? SpacingMark // [6] : code <= 0x115B5 ? Extend | InCB_Extend : None // [5] ) : code <= 0x115C0 ? (code === 0x115BE ? SpacingMark : Extend | InCB_Extend // [1/4] ) : (code >= 0x115DC && code <= 0x115DD) ? Extend | InCB_Extend : None // [2] ) : code < 0x116AB ? ( // Modi code <= 0x11640 ? ((code <= 0x11632 // [3] || (code >= 0x1163B && code <= 0x1163C) // [2] || code === 0x1163E // [1] ) ? SpacingMark : Extend | InCB_Extend) : None) : code < 0x1171D ? ( // Takri code <= 0x116B7 ? ((code === 0x116AC || (code >= 0x116AE && code <= 0x116AF)) ? SpacingMark : Extend | InCB_Extend // [3/10] ) : None) : code < 0x11930 ? (code <= 0x1172B ? ( // Ahom (code === 0x1171E || code === 0x11726) ? SpacingMark // [2] : (code <= 0x1171F || code >= 0x11722) ? Extend | InCB_Extend : None // [11] ) : (code >= 0x1182C && code <= 0x1183A) ? ( // Dogra (code <= 0x1182E || code === 0x11838) ? SpacingMark : Extend | InCB_Extend // [4/11] ) : None) : code < 0x119D1 ? ( // Dives Akuru code < 0x1193B ? ((code === 0x11930) ? Extend | InCB_Extend // [1] : (code <= 0x11938 && code !== 0x11936) ? SpacingMark : None // [7] ) : code <= 0x11943 ? ((code <= 0x1193E || code === 0x11943) ? Extend | InCB_Extend // [5] : (code === 0x1193F || code === 0x11941) ? Prepend : SpacingMark // [2/2] ) : None) : code < 0x11A01 ? ( // Nandinagari code <= 0x119D7 ? (code <= 0x119D3 ? SpacingMark : Extend | InCB_Extend // [3/4] ) : (code >= 0x119DA && code <= 0x119E0) ? ((code >= 0x119DC && code <= 0x119DF) ? SpacingMark : Extend | InCB_Extend // [4/3] ) : (code === 0x119E4) ? SpacingMark : None // [1] ) : code < 0x11A51 ? ( // Zanabazar Square code < 0x11A33 ? (code <= 0x11A0A ? Extend | InCB_Extend : None // [10] ) : code <= 0x11A3E ? ((code <= 0x11A38 || code >= 0x11A3B) ? Extend | InCB_Extend // [10] : code === 0x11A39 ? SpacingMark : Prepend // [1/1] ) : (code === 0x11A47) ? Extend | InCB_Extend : None // [1] ) : ( // Soyombo code <= 0x11A5B ? ((code >= 0x11A57 && code <= 0x11A58) ? SpacingMark : Extend | InCB_Extend // [2/9] ) : (code >= 0x11A84 && code <= 0x11A99) ? (code <= 0x11A89 ? Prepend // [6] : code === 0x11A97 ? SpacingMark : Extend | InCB_Extend // [1/15] ) : None); } // Bhaiksuki ... Arabic Mathematical Alphabetic Symbols (11C00 - 1EEFF) if (code < 0x1F000) { return code < 0x11D31 ? (code <= 0x11C3F ? (code >= 0x11C2F ? ( // Bhaiksuki (code === 0x11C2F || code === 0x11C3E) ? SpacingMark // [2] : code !== 0x11C37 ? Extend | InCB_Extend : None // [14] ) : None) : (code >= 0x11C92 && code <= 0x11CB6) ? ( // Marchen (code === 0x11CA9 || code === 0x11CB1 || code === 0x11CB4) ? SpacingMark // [3] : code !== 0x11CA8 ? Extend | InCB_Extend : None) : None) : code < 0x11D8A ? ( // Masaram Gondi code < 0x11D3C ? ((code <= 0x11D36 || code === 0x11D3A) ? Extend | InCB_Extend : None // [7] ) : (code <= 0x11D47 && code !== 0x11D3E) ? (code === 0x11D46 ? Prepend : Extend | InCB_Extend // [1/10] ) : None) : code < 0x11EF3 ? ( // Gunjala Gondi code <= 0x11D94 ? ((code <= 0x11D8E || code >= 0x11D93) ? SpacingMark // [7] : (code >= 0x11D90 && code <= 0x11D91) ? Extend | InCB_Extend : None // [2] ) : code <= 0x11D97 ? (code === 0x11D96 ? SpacingMark : Extend | InCB_Extend // [1/2] ) : None) : code < 0x13430 ? (code <= 0x11F42 ? (code < 0x11F00 ? (code <= 0x11EF6 ? ( // Makasar code <= 0x11EF4 ? Extend | InCB_Extend : SpacingMark // [2/2] ) : None) : code < 0x11F34 ? (code <= 0x11F03 ? (code <= 0x11F01 ? Extend | InCB_Extend // [2] : code === 0x11F02 ? Prepend : SpacingMark // [1/1] ) : None) : code < 0x11F3E ? (code <= 0x11F35 ? SpacingMark // 2 : code <= 0x11F3A ? Extend | InCB_Extend : None // [5] ) : code <= 0x11F3F ? SpacingMark : Extend | InCB_Extend // [2/3] ) : code === 0x11F5A ? Extend | InCB_Extend : None // [1] ) : code < 0x16F4F ? (code < 0x1611E ? ( // Egyptian Hieroglyph Format Controls code <= 0x13455 ? (code <= 0x1343F ? Control // [16] : (code >= 0x13447 || code === 0x13440) ? Extend | InCB_Extend : None // [16] ) : None) : code <= 0x16B36 ? (code <= 0x1612F ? ( // Gurung Khema (code <= 0x16129 || code >= 0x1612D) ? Extend | InCB_Extend : SpacingMark // [15/3] ) : (code >= 0x16AF0 && code <= 0x16B36) ? (( // Bassa Vah code <= 0x16AF4 // [5] // Pahawh Hmong || code >= 0x16B30 // [7] ) ? Extend | InCB_Extend : None) : None) : ( // Kirat Rai code === 0x16D63 // [1] || (code >= 0x16D67 && code <= 0x16D6A) // [4] ) ? V : None) : code < 0x1D165 ? (code <= 0x16F92 ? ( // Miao (code >= 0x16F51 && code <= 0x16F87) ? SpacingMark // [55] : (code === 0x16F4F || code >= 0x16F8F) ? Extend | InCB_Extend : None // [5] ) : code <= 0x16FF1 ? ( // Ideographic Symbols and Punctuation (code === 0x16FE4 || code >= 0x16FF0) ? Extend | InCB_Extend : None // [3] ) : (code >= 0x1BC9D && code <= 0x1BCA3) ? ( // Duployan code <= 0x1BC9E ? Extend | InCB_Extend // [2] // Shorthand Format Controls : code >= 0x1BCA0 ? Control : None // [4] ) : (code >= 0x1CF00 && code <= 0x1CF46) ? ( // Znamenny Musical Notation (code <= 0x1CF2D || code >= 0x1CF30) ? Extend | InCB_Extend : None // [69] ) : None) : code < 0x1DAB0 ? ( // Musical Symbols code < 0x1D17B ? (code < 0x1D173 ? ((code <= 0x1D169 || code >= 0x1D16D) ? Extend | InCB_Extend : None // [11] ) : Control // [8] ) : (code <= 0x1D244 ? (code <= 0x1D182 // [8] || (code >= 0x1D185 && code <= 0x1D18B) // [7] || (code >= 0x1D1AA && code <= 0x1D1AD) // [4] // Ancient Greek Musical Notation || code >= 0x1D242) : (code >= 0x1DA00 && ( // Sutton SignWriting code <= 0x1DA36 // [55] || (code >= 0x1DA3B && code <= 0x1DA6C) // [50] || code === 0x1DA75 // [1] || code === 0x1DA84 // [1] || (code >= 0x1DA9B && code !== 0x1DAA0) // [20] ))) ? Extend | InCB_Extend : None) : (code <= 0x1E02A ? (code >= 0x1E000 && ( // Glagolitic Supplement (code <= 0x1E018 && code !== 0x1E007) // [24] || (code >= 0x1E01B && code !== 0x1E022 && code !== 0x1E025) // [14] )) : code <= 0x1E4EF ? (code <= 0x1E136 ? ( // Cyrillic Extended-D code === 0x1E08F // [1] // Nyiakeng Puachue Hmong || code >= 0x1E130 // [7] ) : ( // Toto code === 0x1E2AE // [1] // Wancho || (code >= 0x1E2EC && code <= 0x1E2EF) // [4] // Nag Mundari || code >= 0x1E4EC // [4] )) : code < 0x1E8D0 ? ( // Ol Onal code >= 0x1E5EE && code <= 0x1E5EF // [2] ) : (code <= 0x1E94A && ( // Mende Kikakui code <= 0x1E8D6 // [7] // Adlam || code >= 0x1E944 // [7] ))) ? Extend | InCB_Extend : None; } // Mahjong Tiles ... Symbols for Legacy Computing (1F000 - 1FBFF) if (code < 0x1FFFE) { return code < 0x1F200 ? ( // Enclosed Alphanumeric Supplement code < 0x1F1AD ? ((code <= 0x1F171 ? ( // Mahjong Tiles + Domino Tiles + Playing Cards code <= 0x1F0FF // [256] || (code >= 0x1F10D && code <= 0x1F10F) // [3] || code === 0x1F12F // [1] || code >= 0x1F16C // [6] ) : (code >= 0x1F17E && code <= 0x1F19A && (code <= 0x1F17F // [2] || code === 0x1F18E // [1] || code >= 0x1F191 // [10] ))) ? ExtendedPictographic : None) : (code <= 0x1F1E5 ? ExtendedPictographic : RI // [57/26] )) : code < 0x1F249 ? ( // Enclosed Ideographic Supplement (code >= 0x1F201 && code <= 0x1F23F && (code <= 0x1F20F // [15] || code === 0x1F21A // [1] || code === 0x1F22F // [1] || (code >= 0x1F232 && code !== 0x1F23B) // [13] )) ? ExtendedPictographic : None) : code < 0x1F53E ? ( // Miscellaneous Symbols and Pictographs (code >= 0x1F3FB && code <= 0x1F3FF) ? Extend | InCB_Extend | exports.Emoji_Modifier : ExtendedPictographic // [5/752] ) : (code <= 0x1F7FF ? (code >= 0x1F546 && ( // Miscellaneous Symbols and Pictographs + Emoticons code <= 0x1F64F // [266] // Transport and Map Symbols || (code >= 0x1F680 && code <= 0x1F6FF) // [128] // Alchemical Symbols || (code >= 0x1F774 && code <= 0x1F77F) // [12] // Geometric Shapes Extended || code >= 0x1F7D5 // [43] )) : code < 0x1F8AE ? (code >= 0x1F80C && code <= 0x1F88F && ( // Supplemental Arrows-C code <= 0x1F80F // [4] || (code >= 0x1F848 && code <= 0x1F84F) // [8] || (code >= 0x1F85A && code <= 0x1F85F) // [6] || code >= 0x1F888 // [8] )) : (code <= 0x1F8FF // [82] // Supplemental Symbols and Pictographs + Chess Symbols + Symbols and Pictographs Extended-A || (code >= 0x1F90C && code <= 0x1FAFF && code !== 0x1F93B && code !== 0x1F946) // [498] // Symbols for Legacy Computing || code >= 0x1FC00 // [1,022] )) ? ExtendedPictographic : None; } // Tags, Variation Selectors Supplement (E0000 - E01EF) if (code >= 0xE0000) { return code > 0xE0FFF ? None : (code >= 0xE01F0 // [3,600] || code <= 0xE001F // [32] || (code >= 0xE0080 && code <= 0xE00FF) // [128] ) ? Control : Extend | InCB_Extend; // [336] } return None; } exports.graphemeBreakProperty = graphemeBreakProperty; /** * An approximation of whether a code point has the derived property `Grapheme_Base`. * The point is to be a quicker check than exaustively checking all code point ranges with the property, * since this check does not have to be super accurate. * The `Grapheme_Base` prop is derived from `[0..10FFFF] - Cc - Cf - Cs - Co - Cn - Zl - Zp - Grapheme_Extend` * * @remarks * This approximation returns false if the code point has the `Extend` grapheme break property, * (derived from {@link https://unicode.org/Public/16.0.0/ucd/auxiliary/GraphemeBreakProperty.txt}) * with the exception of the 5 code points `1F3FB - 1F3FF5` that also have the `Emoji_Modifier` property * (derived from {@link https://unicode.org/Public/16.0.0/ucd/emoji/emoji-data.txt}). * Otherwise, returns false if the code point has a general category value of `Cf`, `Zl`, or `Zp`, derived from * {@link https://unicode.org/Public/16.0.0/ucd/extracted/DerivedGeneralCategory.txt}. * * @remarks * This implementation will not produce any false negatives (a response of `false` when the code point actually does * have the `Grapheme_Base` property), but it will produce false positives. All code points that produce * false positives will have a `General_Category` value of `Cn`, `Cs`, or `Co`. * * @param cp - code point to check * @param gbp - the grapheme break property of the code point * @returns `true` if the code point has the property `Grapheme_Base`, or `false` otherwise. */ function isGraphemeBase(cp, gbp) { if ((gbp & 0xF) === Extend) { // code point has grapheme break property `Extend` return !!(gbp & exports.Emoji_Modifier); } // otherwise, check for Cf, Zl, Zp // Basic Latin ... Hebrew (0000 - 05FF) if (cp < 0x0600) { return !(cp <= 0x009F ? (cp <= 0x001F || cp >= 0x007F) // Cc [65] : (cp === 0x00AD)); // Cf [1] } // Arabic ... Arabic Extended-A (0600 - 08FF) if (cp < 0x0900) { return !(cp < 0x0750 ? ( // Arabic cp <= 0x0605 // Cf [6] || cp === 0x061C // Cf [1] || cp === 0x06DD // Cf [1] // Syriac || cp === 0x070F // Cf [1] ) : cp >= 0x0890 && ( // Arabic Extended-B cp <= 0x0891 // Cf [2] // Arabic Extended-A || cp === 0x08E2 // Cf [1] )); } // Devanagari ... General Punctuation (0900 - 206F) if (cp < 0x2070) { return !(cp < 0x200B ? ( // Mongolian cp === 0x180E // Cf [1] ) : ( // General Punctuation cp <= 0x200F || (cp >= 0x2028 && cp <= 0x202E) // Zl [1] & Zp [1] & Cf [5] || cp >= 0x2060 // Cf [15] & Cn [1] )); } // Superscripts and Subscripts ... Specials (2070 - FFFB) if (cp <= 0xFFFB) { return !( // Arabic Presentation Forms-B cp === 0xFEFF // Cf [1] // Specials || cp >= 0xFFEF // Cn [10] + Cf [3] ); } // Linear B Syllabary ... Musical Symbols (FFFC - 1D17A) if (cp <= 0x1D17A) { return !(cp < 0x13430 ? ( // Kaithi cp === 0x110BD // Cf [1] || cp === 0x110CD // Cf [1] ) : cp < 0x1BCA0 ? ( // Egyptian Hieroglyph Format Controls cp <= 0x1343F // Cf [16] ) : ( // Shorthand Format Controls cp <= 0x1BCA3 // Cf [4] // Musical Symbols || cp >= 0x1D173 // Cf [8] )); } // Ancient Greek Musical Notation ... Supplementary Private Use Area-B (1D17B - 10FFFF) return !(cp >= 0x323B0 && cp <= 0xEFFFF && ( // CJK Unified Ideographs Extension H cp <= 0xE001F // Cn [711,791] & Cf [1] // Variation Selectors Supplement || cp >= 0xE01F0 // Cn [65,040] // Tags || (cp >= 0xE0080 && cp <= 0xE00FF) // Cn [128] )); } exports.isGraphemeBase = isGraphemeBase; /** * Determines if there is a cluster boundary between two grapheme cluster break property values * * @remarks * Rules are from {@link http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules} * * @param breakProps - grapheme break properties for characters preceeding `prev` * @param prev - grapheme break property of the previous character * @param next - grapheme break property of the next character * @returns Whether there is a cluster boundary between `prev` and `next` */ function shouldBreak(breakProps, prev, next) { // Do not break between a CR and LF. Otherwise, break before and after controls. var _a; // GB3: CR × LF if (prev === CR && next === LF) { return false; } // GB4: (Control|CR|LF) ÷ Any if (prev === Control || prev === CR || prev === LF) { return true; } // GB5: Any ÷ (Control|CR|LF) if (next === Control || next === CR || next === LF) { return true; } // Do not break Hangul syllable or other conjoining sequences. // GB6: L × (L|V|LV|LVT) if (prev === L && (next === L || next === V || next === LV || next === LVT)) { return false; } // GB7: (LV|V) × (V|T) if ((prev === LV || prev === V) && (next === V || next === T)) { return false; } // GB8: (LVT|T) × T if ((prev === LVT || prev === T) && next === T) { return false; } // Do not break before extending characters or ZWJ. // GB9: Any × (Extend | ZWJ) if ((next & 0xF) === Extend || (next & 0xF) === ZWJ) { return false; } // Do not break before SpacingMarks, or after Prepend characters. // GB9a: Any × SpacingMark if (next === SpacingMark) { return false; } // GB9b: Prepend × Any if (prev === Prepend) { return false; } // Do not break within certain combinations with Indic_Conjunct_Break (InCB)=Linker. // GB9c: InCB_Consonant [InCB_Extend InCB_Linker]* InCB_Linker [InCB_Extend InCB_Linker]* × InCB_Consonant if (next & InCB_Consonant && prev & 0b0110000) { let linker = !!(prev & InCB_Linker); for (let i = breakProps.length - 1; i >= 0; i -= 1) { if (breakProps[i] & InCB_Consonant) { if (linker) return false; break; } if (breakProps[i] & 0b0110000) { linker || (linker = !!(breakProps[i] & InCB_Linker)); continue; } break; } } // Do not break within emoji modifier sequences or emoji zwj sequences. // GB11: ExtendedPictographic Extend* ZWJ × ExtendedPictographic if ((prev & 0xF) === ZWJ && next === ExtendedPictographic) { let i = breakProps.length - 1; while ((((_a = breakProps[i]) !== null && _a !== void 0 ? _a : 0) & 0xF) === Extend) i -= 1; return breakProps[i] !== ExtendedPictographic; } // Do not break within emoji flag sequences // GB12: sot (RI RI)* RI × RI // GB13: [^RI] (RI RI)* RI × RI if (prev === RI && next === RI) { let i = breakProps.length - 1; while (breakProps[i] === RI) i -= 1; return (breakProps.length - 1 - i) % 2 === 1; } // GB999: Any ÷ Any return true; } exports.shouldBreak = shouldBreak; //# sourceMappingURL=graphemeBreak.js.map