UNPKG

myanmar-lorem-ipsum

Version:
26 lines (25 loc) 1.15 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); const myConsonant = "\u1000-\u1021"; // "က-အ" const enChar = "a-zA-Z0-9"; // "ဣဤဥဦဧဩဪဿ၌၍၏၀-၉၊။!-/:-@[-`{-~\s" const otherChar = "\u1023\u1024\u1025\u1026\u1027\u1029\u102a\u103f\u104c\u104d\u104f\u1040-\u1049\u104a\u104b!-/:-@\\[-`\\{-~\\s"; const ssSymbol = "\u1039"; const ngaThat = "\u1004\u103a"; const aThat = "\u103a"; // Regular expression pattern for Myanmar syllable breaking // *** a consonant not after a subscript symbol AND a consonant is not // followed by a-That character or a subscript symbol const BREAK_PATTERN = new RegExp("((?!" + ssSymbol + ")[" + myConsonant + "](?![" + aThat + ssSymbol + "])" + "|[" + enChar + otherChar + "])", "mg"); function _segment(raw) { const outputs = raw.replace(BREAK_PATTERN, "\uD835\uDD4A$1").split("\uD835\uDD4A"); if (outputs.length > 0) { outputs.shift(); } return outputs; } exports._segment = _segment; function segment(text, separator) { return text.replace(BREAK_PATTERN, separator + "$1"); } exports.segment = segment;