UNPKG

myanmar-lorem-ipsum

Version:
29 lines (21 loc) 1.09 kB
const myConsonant = "\u1000-\u1021"; // "က-အ" const enChar = "a-zA-Z0-9"; // "ဣဤဥဦဧဩဪဿ၌၍၏၀-၉၊။!-/:-@[-`{-~\s" const otherChar = "\u1023\u1024\u1025\u1026\u1027\u1029\u102a\u103f\u104c\u104d\u104f\u1040-\u1049\u104a\u104b!-/:-@\\[-`\\{-~\\s"; const ssSymbol = "\u1039"; const ngaThat = "\u1004\u103a"; const aThat = "\u103a"; // Regular expression pattern for Myanmar syllable breaking // *** a consonant not after a subscript symbol AND a consonant is not // followed by a-That character or a subscript symbol const BREAK_PATTERN = new RegExp("((?!" + ssSymbol + ")[" + myConsonant + "](?![" + aThat + ssSymbol + "])" + "|[" + enChar + otherChar + "])", "mg"); export function _segment(raw: string): string[] { const outputs: string[] = raw.replace(BREAK_PATTERN, "\uD835\uDD4A$1").split("\uD835\uDD4A"); if (outputs.length > 0) { outputs.shift(); } return outputs; } export function segment(text: string, separator: string): string{ return text.replace(BREAK_PATTERN, separator + "$1"); }