UNPKG

runes2

Version:

Unicode-aware JS string splitting, typescript version

1 lines 11.6 kB
{"version":3,"file":"index.umd.production.min.cjs","sources":["../src/index.ts"],"sourcesContent":["export const enum EnumRunesCode\r\n{\r\n\tHIGH_SURROGATE_START = 0xd800,\r\n\tHIGH_SURROGATE_END = 0xdbff,\r\n\r\n\tLOW_SURROGATE_START = 0xdc00,\r\n\r\n\tREGIONAL_INDICATOR_START = 0x1f1e6,\r\n\tREGIONAL_INDICATOR_END = 0x1f1ff,\r\n\r\n\tFITZPATRICK_MODIFIER_START = 0x1f3fb,\r\n\tFITZPATRICK_MODIFIER_END = 0x1f3ff,\r\n\r\n\tVARIATION_MODIFIER_START = 0xfe00,\r\n\tVARIATION_MODIFIER_END = 0xfe0f,\r\n\r\n\tDIACRITICAL_MARKS_START = 0x20d0,\r\n\tDIACRITICAL_MARKS_END = 0x20ff,\r\n\r\n\tSUBDIVISION_INDICATOR_START = 0x1f3f4,\r\n\tTAGS_START = 0xe0000,\r\n\tTAGS_END = 0xe007f,\r\n\r\n\tZWJ = 0x200d,\r\n}\r\n\r\nexport const GRAPHEMES = Object.freeze([\r\n\t0x0308, // ( ◌̈ ) COMBINING DIAERESIS\r\n\t0x0937, // ( ष ) DEVANAGARI LETTER SSA\r\n\t0x093F, // ( ि ) DEVANAGARI VOWEL SIGN I\r\n\t0x0BA8, // ( ந ) TAMIL LETTER NA\r\n\t0x0BBF, // ( ி ) TAMIL VOWEL SIGN I\r\n\t0x0BCD, // ( ◌்) TAMIL SIGN VIRAMA\r\n\t0x0E31, // ( ◌ั ) THAI CHARACTER MAI HAN-AKAT\r\n\t0x0E33, // ( ำ ) THAI CHARACTER SARA AM\r\n\t0x0E40, // ( เ ) THAI CHARACTER SARA E\r\n\t0x0E49, // ( เ ) THAI CHARACTER MAI THO\r\n\t0x1100, // ( ᄀ ) HANGUL CHOSEONG KIYEOK\r\n\t0x1161, // ( ᅡ ) HANGUL JUNGSEONG A\r\n\t0x11A8, // ( ᆨ ) HANGUL JONGSEONG KIYEOK\r\n]);\r\n\r\nexport const enum EnumCodeUnits\r\n{\r\n\tunit_1 = 1,\r\n\tunit_2 = 2,\r\n\tunit_4 = 4,\r\n}\r\n\r\nexport function runes(string: string): string[]\r\n{\r\n\tif (typeof string !== 'string')\r\n\t{\r\n\t\tthrow new TypeError('string cannot be undefined or null')\r\n\t}\r\n\tconst result: string[] = []\r\n\tlet i = 0\r\n\tlet increment = 0\r\n\twhile (i < string.length)\r\n\t{\r\n\t\tincrement += nextUnits(i + increment, string)\r\n\t\tif (isGrapheme(string[i + increment]))\r\n\t\t{\r\n\t\t\tincrement++\r\n\t\t}\r\n\t\tif (isVariationSelector(string[i + increment]))\r\n\t\t{\r\n\t\t\tincrement++\r\n\t\t}\r\n\t\tif (isDiacriticalMark(string[i + increment]))\r\n\t\t{\r\n\t\t\tincrement++\r\n\t\t}\r\n\t\tif (isZeroWidthJoiner(string[i + increment]))\r\n\t\t{\r\n\t\t\tincrement++\r\n\t\t\tcontinue\r\n\t\t}\r\n\t\tresult.push(string.substring(i, i + increment))\r\n\t\ti += increment\r\n\t\tincrement = 0\r\n\t}\r\n\treturn result\r\n}\r\n\r\n// Decide how many code units make up the current character.\r\n// BMP characters: 1 code unit\r\n// Non-BMP characters (represented by surrogate pairs): 2 code units\r\n// Emoji with skin-tone modifiers: 4 code units (2 code points)\r\n// Country flags: 4 code units (2 code points)\r\n// Variations: 2 code units\r\n// Subdivision flags: 14 code units (7 code points)\r\nexport function nextUnits(i: number, string: string)\r\n{\r\n\tconst current = string[i]\r\n\t// If we don't have a value that is part of a surrogate pair, or we're at\r\n\t// the end, only take the value at i\r\n\tif (!isFirstOfSurrogatePair(current) || i === string.length - 1)\r\n\t{\r\n\t\treturn EnumCodeUnits.unit_1\r\n\t}\r\n\r\n\tconst currentPair = current + string[i + 1]\r\n\tlet nextPair = string.substring(i + 2, i + 5)\r\n\r\n\t// Country flags are comprised of two regional indicator symbols,\r\n\t// each represented by a surrogate pair.\r\n\t// See http://emojipedia.org/flags/\r\n\t// If both pairs are regional indicator symbols, take 4\r\n\tif (isRegionalIndicator(currentPair) && isRegionalIndicator(nextPair))\r\n\t{\r\n\t\treturn EnumCodeUnits.unit_4\r\n\t}\r\n\r\n\t// https://unicode.org/emoji/charts/full-emoji-list.html#subdivision-flag\r\n\t// See https://emojipedia.org/emoji-tag-sequence/\r\n\t// If nextPair is in Tags(https://en.wikipedia.org/wiki/Tags_(Unicode_block)),\r\n\t// then find next closest U+E007F(CANCEL TAG)\r\n\tif (isSubdivisionFlag(currentPair) &&\tisSupplementarySpecialpurposePlane(nextPair))\r\n\t{\r\n\t\treturn string.slice(i).indexOf(String.fromCodePoint(EnumRunesCode.TAGS_END)) + 2\r\n\t}\r\n\r\n\t// If the next pair make a Fitzpatrick skin tone\r\n\t// modifier, take 4\r\n\t// See http://emojipedia.org/modifiers/\r\n\t// Technically, only some code points are meant to be\r\n\t// combined with the skin tone modifiers. This function\r\n\t// does not check the current pair to see if it is\r\n\t// one of them.\r\n\tif (isFitzpatrickModifier(nextPair))\r\n\t{\r\n\t\treturn EnumCodeUnits.unit_4\r\n\t}\r\n\treturn EnumCodeUnits.unit_2\r\n}\r\n\r\nexport function isFirstOfSurrogatePair(string: string)\r\n{\r\n\treturn string && betweenInclusive(string[0].charCodeAt(0), EnumRunesCode.HIGH_SURROGATE_START, EnumRunesCode.HIGH_SURROGATE_END)\r\n}\r\n\r\nexport function isRegionalIndicator(string: string)\r\n{\r\n\treturn betweenInclusive(codePointFromSurrogatePair(string), EnumRunesCode.REGIONAL_INDICATOR_START, EnumRunesCode.REGIONAL_INDICATOR_END)\r\n}\r\n\r\nexport function isSubdivisionFlag(string: string)\r\n{\r\n\treturn betweenInclusive(codePointFromSurrogatePair(string),\tEnumRunesCode.SUBDIVISION_INDICATOR_START, EnumRunesCode.SUBDIVISION_INDICATOR_START)\r\n}\r\n\r\nexport function isFitzpatrickModifier(string: string)\r\n{\r\n\treturn betweenInclusive(codePointFromSurrogatePair(string), EnumRunesCode.FITZPATRICK_MODIFIER_START, EnumRunesCode.FITZPATRICK_MODIFIER_END)\r\n}\r\n\r\nexport function isVariationSelector(string: string)\r\n{\r\n\treturn typeof string === 'string' && betweenInclusive(string.charCodeAt(0), EnumRunesCode.VARIATION_MODIFIER_START, EnumRunesCode.VARIATION_MODIFIER_END)\r\n}\r\n\r\nexport function isDiacriticalMark(string: string)\r\n{\r\n\treturn typeof string === 'string' && betweenInclusive(string.charCodeAt(0), EnumRunesCode.DIACRITICAL_MARKS_START, EnumRunesCode.DIACRITICAL_MARKS_END)\r\n}\r\n\r\nexport function isSupplementarySpecialpurposePlane(string: string)\r\n{\r\n\tconst codePoint = string.codePointAt(0)\r\n\treturn (typeof string === 'string' &&\ttypeof codePoint === 'number' && betweenInclusive(codePoint, EnumRunesCode.TAGS_START, EnumRunesCode.TAGS_END))\r\n}\r\n\r\nexport function isGrapheme(string: string)\r\n{\r\n\treturn typeof string === 'string' && GRAPHEMES.includes(string.charCodeAt(0))\r\n}\r\n\r\nexport function isZeroWidthJoiner(string: string)\r\n{\r\n\treturn typeof string === 'string' && string.charCodeAt(0) === EnumRunesCode.ZWJ\r\n}\r\n\r\nexport function codePointFromSurrogatePair(pair: string)\r\n{\r\n\tconst highOffset = pair.charCodeAt(0) - EnumRunesCode.HIGH_SURROGATE_START\r\n\tconst lowOffset = pair.charCodeAt(1) - EnumRunesCode.LOW_SURROGATE_START\r\n\treturn (highOffset << 10) + lowOffset + 0x10000\r\n}\r\n\r\nexport function betweenInclusive(value: number, lower: number, upper: number)\r\n{\r\n\treturn value >= lower && value <= upper\r\n}\r\n\r\nexport function substring(string: string, start?: number, width?: number)\r\n{\r\n\tconst chars = runes(string)\r\n\tif (start === undefined)\r\n\t{\r\n\t\treturn string\r\n\t}\r\n\tif (start >= chars.length)\r\n\t{\r\n\t\treturn ''\r\n\t}\r\n\tconst rest = chars.length - start\r\n\tconst stringWidth = width === undefined ? rest : width\r\n\tlet endIndex = start + stringWidth\r\n\tif (endIndex > (start + rest))\r\n\t{\r\n\t\tendIndex = undefined\r\n\t}\r\n\treturn chars.slice(start, endIndex).join('')\r\n}\r\n\r\nexport { substring as substr }\r\n\r\n// @ts-ignore\r\nif (process.env.TSDX_FORMAT !== 'esm')\r\n{\r\n\tObject.defineProperty(runes, 'runes', { value: runes });\r\n\tObject.defineProperty(runes, 'default', { value: runes });\r\n\tObject.defineProperty(runes, \"__esModule\", { value: true });\r\n\r\n\tObject.defineProperty(runes, 'substr', { value: substring });\r\n\tObject.defineProperty(runes, 'substring', { value: substring });\r\n\r\n\t// @ts-ignore\r\n\tObject.defineProperty(runes, 'EnumRunesCode', { value: EnumRunesCode });\r\n\t// @ts-ignore\r\n\tObject.defineProperty(runes, 'EnumCodeUnits', { value: EnumCodeUnits });\r\n\tObject.defineProperty(runes, 'GRAPHEMES', { value: GRAPHEMES });\r\n}\r\n\r\nexport default runes\r\n"],"names":["EnumRunesCode","exports","GRAPHEMES","Object","freeze","EnumCodeUnits","runes","string","TypeError","result","i","increment","length","nextUnits","isGrapheme","isVariationSelector","isDiacriticalMark","isZeroWidthJoiner","push","substring","current","isFirstOfSurrogatePair","currentPair","nextPair","isRegionalIndicator","isSubdivisionFlag","isSupplementarySpecialpurposePlane","slice","indexOf","String","fromCodePoint","isFitzpatrickModifier","betweenInclusive","charCodeAt","codePointFromSurrogatePair","codePoint","codePointAt","includes","pair","value","lower","upper","start","width","chars","undefined","rest","endIndex","join","defineProperty"],"mappings":";;;;EAAA,IAAkBA;EAwBjBC,EAAAD,qBAAA,IAxBiBA,IAAAA,oBAAAA,EAAAA,gBAwBjB,CAAA,IAtBAA,EAAA,uBAAA,SAAA;EACAA,EAAAA,EAAA,qBAAA,SAAA,sBAEAA,EAAAA,EAAA,sBAAA,SAAA;EAEAA,EAAAA,EAAA,2BAAA,UAAA,4BACAA,EAAAA,EAAA,yBAAA,UAAA;EAEAA,EAAAA,EAAA,6BAAA,UAAA,8BACAA,EAAAA,EAAA,2BAAA,UAAA;EAEAA,EAAAA,EAAA,2BAAA,SAAA,4BACAA,EAAAA,EAAA,yBAAA,SAAA;EAEAA,EAAAA,EAAA,0BAAA,QAAA,2BACAA,EAAAA,EAAA,wBAAA,QAAA;EAEAA,EAAAA,EAAA,8BAAA,UAAA,+BACAA,EAAAA,EAAA,aAAA,UAAA;EACAA,EAAAA,EAAA,WAAA,UAAA,YAEAA,EAAAA,EAAA,MAAA,QAAA;EAGYE,MAAAA,IAAYC,OAAOC,OAAO,EACtC,QACA,QACA,QACA,QACA,QACA,QACA,QACA,QACA,QACA,QACA,QACA,QACA;EAGD,IAAkBC;EAOZ,SAAUC,MAAMC;IAErB,IAAsB,mBAAXA,GAEV,MAAM,IAAIC,UAAU;IAErB,MAAMC,IAAmB;IACzB,IAAIC,IAAI,GACJC,IAAY;IAChB,MAAOD,IAAIH,EAAOK,UAEjBD,KAAaE,UAAUH,IAAIC,GAAWJ,IAClCO,WAAWP,EAAOG,IAAIC,OAEzBA,KAEGI,oBAAoBR,EAAOG,IAAIC,OAElCA;IAEGK,kBAAkBT,EAAOG,IAAIC,OAEhCA,KAEGM,kBAAkBV,EAAOG,IAAIC,MAEhCA,OAGDF,EAAOS,KAAKX,EAAOY,UAAUT,GAAGA,IAAIC;IACpCD,KAAKC,GACLA,IAAY;IAEb,OAAOF;AACR;EASgB,SAAAI,UAAUH,GAAWH;IAEpC,MAAMa,IAAUb,EAAOG;IAGvB,KAAKW,uBAAuBD,MAAYV,MAAMH,EAAOK,SAAS,GAE7D,OAA2B;IAG5B,MAAMU,IAAcF,IAAUb,EAAOG,IAAI;IACzC,IAAIa,IAAWhB,EAAOY,UAAUT,IAAI,GAAGA,IAAI;IAM3C,OAAIc,oBAAoBF,MAAgBE,oBAAoBD,KAEhC,IAOxBE,kBAAkBH,MAAgBI,mCAAmCH,KAEjEhB,EAAOoB,MAAMjB,GAAGkB,QAAQC,OAAOC,cAAa,WAA4B,IAU5EC,sBAAsBR,KAEE,IAED;AAC5B;EAEM,SAAUF,uBAAuBd;IAEtC,OAAOA,KAAUyB,iBAAiBzB,EAAO,GAAG0B,WAAW;AACxD;EAEM,SAAUT,oBAAoBjB;IAEnC,OAAOyB,iBAAiBE,2BAA2B3B;AACpD;EAEM,SAAUkB,kBAAkBlB;IAEjC,OAAOyB,iBAAiBE,2BAA2B3B;AACpD;EAEM,SAAUwB,sBAAsBxB;IAErC,OAAOyB,iBAAiBE,2BAA2B3B;AACpD;EAEM,SAAUQ,oBAAoBR;IAEnC,OAAyB,mBAAXA,KAAuByB,iBAAiBzB,EAAO0B,WAAW;AACzE;EAEM,SAAUjB,kBAAkBT;IAEjC,OAAyB,mBAAXA,KAAuByB,iBAAiBzB,EAAO0B,WAAW;AACzE;EAEM,SAAUP,mCAAmCnB;IAElD,MAAM4B,IAAY5B,EAAO6B,YAAY;IACrC,OAA0B,mBAAX7B,KAA4C,mBAAd4B,KAA0BH,iBAAiBG,GAAS,QAAA;AAClG;EAEM,SAAUrB,WAAWP;IAE1B,OAAyB,mBAAXA,KAAuBL,EAAUmC,SAAS9B,EAAO0B,WAAW;AAC3E;EAEM,SAAUhB,kBAAkBV;IAEjC,OAAyB,mBAAXA,KAA2C,SAApBA,EAAO0B,WAAW;AACxD;EAEM,SAAUC,2BAA2BI;IAI1C,QAFmBA,EAAKL,WAAW,KAAE,SAEf,OADJK,EAAKL,WAAW,KAAE,SACI;AACzC;WAEgBD,iBAAiBO,GAAeC,GAAeC;IAE9D,OAAOF,KAASC,KAASD,KAASE;AACnC;WAEgBtB,UAAUZ,GAAgBmC,GAAgBC;IAEzD,MAAMC,IAAQtC,MAAMC;IACpB,SAAcsC,MAAVH,GAEH,OAAOnC;IAER,IAAImC,KAASE,EAAMhC,QAElB,OAAO;IAER,MAAMkC,IAAOF,EAAMhC,SAAS8B;IAE5B,IAAIK,IAAWL,UADeG,MAAVF,IAAsBG,IAAOH;IAMjD,OAJII,IAAYL,IAAQI,MAEvBC,SAAWF,IAELD,EAAMjB,MAAMe,GAAOK,GAAUC,KAAK;AAC1C;EAvKC/C,EAAAI,qBAAA,IALiBA,IAAAA,oBAAAA,EAAAA,gBAKjB,CAAA,IAHAA,EAAA,SAAA,KAAA;EACAA,EAAAA,EAAA,SAAA,KAAA,UACAA,EAAAA,EAAA,SAAA,KAAA,UA+KAF,OAAO8C,eAAe3C,OAAO,SAAS;IAAEiC,OAAOjC;MAC/CH,OAAO8C,eAAe3C,OAAO,WAAW;IAAEiC,OAAOjC;MACjDH,OAAO8C,eAAe3C,OAAO,cAAc;IAAEiC,QAAO;MAEpDpC,OAAO8C,eAAe3C,OAAO,UAAU;IAAEiC,OAAOpB;MAChDhB,OAAO8C,eAAe3C,OAAO,aAAa;IAAEiC,OAAOpB;MAGnDhB,OAAO8C,eAAe3C,OAAO,iBAAiB;IAAEiC,OAAOvC,EAAAA;MAEvDG,OAAO8C,eAAe3C,OAAO,iBAAiB;IAAEiC,OAAOlC,EAAAA;MACvDF,OAAO8C,eAAe3C,OAAO,aAAa;IAAEiC,OAAOrC;;;;;;;;;;"}