myanmar-lorem-ipsum
Version:
Myanmar Languages Dummy Text Generator
29 lines (21 loc) • 1.09 kB
text/typescript
const myConsonant = "\u1000-\u1021"; // "က-အ"
const enChar = "a-zA-Z0-9";
// "ဣဤဥဦဧဩဪဿ၌၍၏၀-၉၊။!-/:-@[-`{-~\s"
const otherChar = "\u1023\u1024\u1025\u1026\u1027\u1029\u102a\u103f\u104c\u104d\u104f\u1040-\u1049\u104a\u104b!-/:-@\\[-`\\{-~\\s";
const ssSymbol = "\u1039";
const ngaThat = "\u1004\u103a";
const aThat = "\u103a";
// Regular expression pattern for Myanmar syllable breaking
// *** a consonant not after a subscript symbol AND a consonant is not
// followed by a-That character or a subscript symbol
const BREAK_PATTERN = new RegExp("((?!" + ssSymbol + ")[" + myConsonant + "](?![" + aThat + ssSymbol + "])" + "|[" + enChar + otherChar + "])", "mg");
export function _segment(raw: string): string[] {
const outputs: string[] = raw.replace(BREAK_PATTERN, "\uD835\uDD4A$1").split("\uD835\uDD4A");
if (outputs.length > 0) {
outputs.shift();
}
return outputs;
}
export function segment(text: string, separator: string): string{
return text.replace(BREAK_PATTERN, separator + "$1");
}