UNPKG

@bntk/transliteration

Version:

Build applications with Bengali natural language processing tools.

1,399 lines (1,395 loc) 32 kB
// packages/core/transliteration/assets/orva.json var patterns = [ { find: "`", replace: "", rules: [] }, { find: "ই", replace: "i", rules: [] }, { find: "ঈ", replace: "I", rules: [] }, { find: "উ", replace: "oo", rules: [] }, { find: "ঊ", replace: "OO", rules: [] }, { find: "ঋ", replace: "R", rules: [] }, { find: "এ", replace: "e", rules: [] }, { find: "ঐ", replace: "oi", rules: [] }, { find: "ও", replace: "o", rules: [] }, { find: "ঔ", replace: "ou", rules: [] }, { find: "অ", replace: "o", rules: [] }, { find: "আ", replace: "a", rules: [] }, { find: "ক", replace: "k", rules: [] }, { find: "খ", replace: "kh", rules: [] }, { find: "গ", replace: "g", rules: [] }, { find: "ঘ", replace: "gh", rules: [] }, { find: "ঙ", replace: "ng", rules: [] }, { find: "চ", replace: "ch", rules: [] }, { find: "ছ", replace: "Ch", rules: [] }, { find: "জ", replace: "j", rules: [] }, { find: "ঝ", replace: "jh", rules: [] }, { find: "ঞ", replace: "n", rules: [] }, { find: "ট", replace: "T", rules: [] }, { find: "ঠ", replace: "Th", rules: [] }, { find: "ড", replace: "D", rules: [] }, { find: "ঢ", replace: "Dh", rules: [] }, { find: "ণ", replace: "N", rules: [] }, { find: "ত", replace: "t", rules: [] }, { find: "থ", replace: "th", rules: [] }, { find: "দ", replace: "d", rules: [] }, { find: "ধ", replace: "dh", rules: [] }, { find: "ন", replace: "n", rules: [] }, { find: "প", replace: "p", rules: [] }, { find: "ফ", replace: "ph", rules: [] }, { find: "ব", replace: "b", rules: [] }, { find: "ভ", replace: "bh", rules: [] }, { find: "ম", replace: "m", rules: [] }, { find: "য", replace: "z", rules: [] }, { find: "র", replace: "r", rules: [] }, { find: "ল", replace: "l", rules: [] }, { find: "শ", replace: "sh", rules: [] }, { find: "ষ", replace: "Sh", rules: [] }, { find: "স", replace: "s", rules: [] }, { find: "হ", replace: "h", rules: [] }, { find: "ড়", replace: "R", rules: [] }, { find: "ঢ়", replace: "Rh", rules: [] }, { find: "য়", replace: "y", rules: [] }, { find: "ৎ", replace: "t", rules: [] }, { find: "ং", replace: "ng", rules: [] }, { find: "ঃ", replace: ":", rules: [] }, { find: "ঁ", replace: "~", rules: [] }, { find: "w", replace: "O", rules: [] }, { find: "oo", replace: "u", rules: [] }, { find: "a্", replace: "", rules: [] }, { find: "b্", replace: "", rules: [] }, { find: "c্", replace: "", rules: [] }, { find: "d্", replace: "", rules: [] }, { find: "e্", replace: "", rules: [] }, { find: "f্", replace: "", rules: [] }, { find: "g্", replace: "", rules: [] }, { find: "h্", replace: "", rules: [] }, { find: "i্", replace: "", rules: [] }, { find: "j্", replace: "", rules: [] }, { find: "k্", replace: "", rules: [] }, { find: "l্", replace: "", rules: [] }, { find: "m্", replace: "", rules: [] }, { find: "n্", replace: "", rules: [] }, { find: "o্", replace: "", rules: [] }, { find: "p্", replace: "", rules: [] }, { find: "q্", replace: "", rules: [] }, { find: "r্", replace: "", rules: [] }, { find: "s্", replace: "", rules: [] }, { find: "t্", replace: "", rules: [] }, { find: "u্", replace: "", rules: [] }, { find: "v্", replace: "", rules: [] }, { find: "w্", replace: "", rules: [] }, { find: "x্", replace: "", rules: [] }, { find: "y্", replace: "", rules: [] }, { find: "z্", replace: "", rules: [] }, { find: "A্", replace: "", rules: [] }, { find: "B্", replace: "", rules: [] }, { find: "C্", replace: "", rules: [] }, { find: "D্", replace: "", rules: [] }, { find: "E্", replace: "", rules: [] }, { find: "F্", replace: "", rules: [] }, { find: "G্", replace: "", rules: [] }, { find: "H্", replace: "", rules: [] }, { find: "I্", replace: "", rules: [] }, { find: "J্", replace: "", rules: [] }, { find: "K্", replace: "", rules: [] }, { find: "L্", replace: "", rules: [] }, { find: "M্", replace: "", rules: [] }, { find: "N্", replace: "", rules: [] }, { find: "O্", replace: "", rules: [] }, { find: "P্", replace: "", rules: [] }, { find: "Q্", replace: "", rules: [] }, { find: "R্", replace: "", rules: [] }, { find: "S্", replace: "", rules: [] }, { find: "T্", replace: "", rules: [] }, { find: "U্", replace: "", rules: [] }, { find: "V্", replace: "", rules: [] }, { find: "W্", replace: "", rules: [] }, { find: "X্", replace: "", rules: [] }, { find: "Y্", replace: "", rules: [] }, { find: "Z্", replace: "", rules: [] }, { find: "u", replace: "", rules: [] } ]; // packages/core/transliteration/assets/rules.json var patterns2 = [ { find: "bhl", replace: "ভ্ল" }, { find: "psh", replace: "পশ" }, { find: "bdh", replace: "ব্ধ" }, { find: "bj", replace: "ব্জ" }, { find: "bd", replace: "ব্দ" }, { find: "bb", replace: "ব্ব" }, { find: "bl", replace: "ব্ল" }, { find: "bh", replace: "ভ" }, { find: "vl", replace: "ভ্ল" }, { find: "b", replace: "ব" }, { find: "v", replace: "ভ" }, { find: "cNG", replace: "চ্ঞ" }, { find: "cch", replace: "চ্ছ" }, { find: "cc", replace: "চ্চ" }, { find: "ch", replace: "ছ" }, { find: "c", replace: "চ" }, { find: "dhn", replace: "ধ্ন" }, { find: "dhm", replace: "ধ্ম" }, { find: "dgh", replace: "দ্ঘ" }, { find: "ddh", replace: "দ্ধ" }, { find: "dbh", replace: "দ্ভ" }, { find: "dv", replace: "দ্ভ" }, { find: "dm", replace: "দ্ম" }, { find: "DD", replace: "ড্ড" }, { find: "Dh", replace: "ঢ" }, { find: "dh", replace: "ধ" }, { find: "dg", replace: "দ্গ" }, { find: "dd", replace: "দ্দ" }, { find: "D", replace: "ড" }, { find: "d", replace: "দ" }, { find: "...", replace: "..." }, { find: ".`", replace: "." }, { find: "..", replace: "।।" }, { find: ".", replace: "।" }, { find: "ghn", replace: "ঘ্ন" }, { find: "Ghn", replace: "ঘ্ন" }, { find: "gdh", replace: "গ্ধ" }, { find: "Gdh", replace: "গ্ধ" }, { find: "gN", replace: "গ্ণ" }, { find: "GN", replace: "গ্ণ" }, { find: "gn", replace: "গ্ন" }, { find: "Gn", replace: "গ্ন" }, { find: "gm", replace: "গ্ম" }, { find: "Gm", replace: "গ্ম" }, { find: "gl", replace: "গ্ল" }, { find: "Gl", replace: "গ্ল" }, { find: "gg", replace: "জ্ঞ" }, { find: "GG", replace: "জ্ঞ" }, { find: "Gg", replace: "জ্ঞ" }, { find: "gG", replace: "জ্ঞ" }, { find: "gh", replace: "ঘ" }, { find: "Gh", replace: "ঘ" }, { find: "g", replace: "গ" }, { find: "G", replace: "গ" }, { find: "hN", replace: "হ্ণ" }, { find: "hn", replace: "হ্ন" }, { find: "hm", replace: "হ্ম" }, { find: "hl", replace: "হ্ল" }, { find: "h", replace: "হ" }, { find: "jjh", replace: "জ্ঝ" }, { find: "jNG", replace: "জ্ঞ" }, { find: "jh", replace: "ঝ" }, { find: "jj", replace: "জ্জ" }, { find: "j", replace: "জ" }, { find: "J", replace: "জ" }, { find: "kkhN", replace: "ক্ষ্ণ" }, { find: "kShN", replace: "ক্ষ্ণ" }, { find: "kkhm", replace: "ক্ষ্ম" }, { find: "kShm", replace: "ক্ষ্ম" }, { find: "kxN", replace: "ক্ষ্ণ" }, { find: "kxm", replace: "ক্ষ্ম" }, { find: "kkh", replace: "ক্ষ" }, { find: "kSh", replace: "ক্ষ" }, { find: "ksh", replace: "কশ" }, { find: "kx", replace: "ক্ষ" }, { find: "kk", replace: "ক্ক" }, { find: "kT", replace: "ক্ট" }, { find: "kt", replace: "ক্ত" }, { find: "kl", replace: "ক্ল" }, { find: "ks", replace: "ক্স" }, { find: "kh", replace: "খ" }, { find: "k", replace: "ক" }, { find: "lbh", replace: "ল্ভ" }, { find: "ldh", replace: "ল্ধ" }, { find: "lkh", replace: "লখ" }, { find: "lgh", replace: "লঘ" }, { find: "lph", replace: "লফ" }, { find: "lk", replace: "ল্ক" }, { find: "lg", replace: "ল্গ" }, { find: "lT", replace: "ল্ট" }, { find: "lD", replace: "ল্ড" }, { find: "lp", replace: "ল্প" }, { find: "lv", replace: "ল্ভ" }, { find: "lm", replace: "ল্ম" }, { find: "ll", replace: "ল্ল" }, { find: "lb", replace: "ল্ব" }, { find: "l", replace: "ল" }, { find: "mth", replace: "ম্থ" }, { find: "mph", replace: "ম্ফ" }, { find: "mbh", replace: "ম্ভ" }, { find: "mpl", replace: "মপ্ল" }, { find: "mn", replace: "ম্ন" }, { find: "mp", replace: "ম্প" }, { find: "mv", replace: "ম্ভ" }, { find: "mm", replace: "ম্ম" }, { find: "ml", replace: "ম্ল" }, { find: "mb", replace: "ম্ব" }, { find: "mf", replace: "ম্ফ" }, { find: "m", replace: "ম" }, { find: "0", replace: "০" }, { find: "1", replace: "১" }, { find: "2", replace: "২" }, { find: "3", replace: "৩" }, { find: "4", replace: "৪" }, { find: "5", replace: "৫" }, { find: "6", replace: "৬" }, { find: "7", replace: "৭" }, { find: "8", replace: "৮" }, { find: "9", replace: "৯" }, { find: "NgkSh", replace: "ঙ্ক্ষ" }, { find: "Ngkkh", replace: "ঙ্ক্ষ" }, { find: "NGch", replace: "ঞ্ছ" }, { find: "Nggh", replace: "ঙ্ঘ" }, { find: "Ngkh", replace: "ঙ্খ" }, { find: "NGjh", replace: "ঞ্ঝ" }, { find: "ngOU", replace: "ঙ্গৌ" }, { find: "ngOI", replace: "ঙ্গৈ" }, { find: "Ngkx", replace: "ঙ্ক্ষ" }, { find: "NGc", replace: "ঞ্চ" }, { find: "nch", replace: "ঞ্ছ" }, { find: "njh", replace: "ঞ্ঝ" }, { find: "ngh", replace: "ঙ্ঘ" }, { find: "Ngk", replace: "ঙ্ক" }, { find: "Ngx", replace: "ঙ্ষ" }, { find: "Ngg", replace: "ঙ্গ" }, { find: "Ngm", replace: "ঙ্ম" }, { find: "NGj", replace: "ঞ্জ" }, { find: "ndh", replace: "ন্ধ" }, { find: "nTh", replace: "ন্ঠ" }, { find: "NTh", replace: "ণ্ঠ" }, { find: "nth", replace: "ন্থ" }, { find: "nkh", replace: "ঙ্খ" }, { find: "ngo", replace: "ঙ্গ" }, { find: "nga", replace: "ঙ্গা" }, { find: "ngi", replace: "ঙ্গি" }, { find: "ngI", replace: "ঙ্গী" }, { find: "ngu", replace: "ঙ্গু" }, { find: "ngU", replace: "ঙ্গূ" }, { find: "nge", replace: "ঙ্গে" }, { find: "ngO", replace: "ঙ্গো" }, { find: "NDh", replace: "ণ্ঢ" }, { find: "nsh", replace: "নশ" }, { find: "Ngr", replace: "ঙর" }, { find: "NGr", replace: "ঞর" }, { find: "ngr", replace: "ংর" }, { find: "nj", replace: "ঞ্জ" }, { find: "Ng", replace: "ঙ" }, { find: "NG", replace: "ঞ" }, { find: "nk", replace: "ঙ্ক" }, { find: "ng", replace: "ং" }, { find: "nn", replace: "ন্ন" }, { find: "NN", replace: "ণ্ণ" }, { find: "Nn", replace: "ণ্ন" }, { find: "nm", replace: "ন্ম" }, { find: "Nm", replace: "ণ্ম" }, { find: "nd", replace: "ন্দ" }, { find: "nT", replace: "ন্ট" }, { find: "NT", replace: "ণ্ট" }, { find: "nD", replace: "ন্ড" }, { find: "ND", replace: "ণ্ড" }, { find: "nt", replace: "ন্ত" }, { find: "ns", replace: "ন্স" }, { find: "nc", replace: "ঞ্চ" }, { find: "n", replace: "ন" }, { find: "N", replace: "ণ" }, { find: "OI`", replace: "ৈ" }, { find: "OU`", replace: "ৌ" }, { find: "O`", replace: "ো" }, { find: "OI", replace: "ৈ", rules: [ { matches: [ { type: "prefix", scope: "!consonant", value: null } ], replace: "ঐ" }, { matches: [ { type: "prefix", scope: "punctuation", value: null } ], replace: "ঐ" } ] }, { find: "OU", replace: "ৌ", rules: [ { matches: [ { type: "prefix", scope: "!consonant", value: null } ], replace: "ঔ" }, { matches: [ { type: "prefix", scope: "punctuation", value: null } ], replace: "ঔ" } ] }, { find: "O", replace: "ো", rules: [ { matches: [ { type: "prefix", scope: "!consonant", value: null } ], replace: "ও" }, { matches: [ { type: "prefix", scope: "punctuation", value: null } ], replace: "ও" } ] }, { find: "phl", replace: "ফ্ল" }, { find: "pT", replace: "প্ট" }, { find: "pt", replace: "প্ত" }, { find: "pn", replace: "প্ন" }, { find: "pp", replace: "প্প" }, { find: "pl", replace: "প্ল" }, { find: "ps", replace: "প্স" }, { find: "ph", replace: "ফ" }, { find: "fl", replace: "ফ্ল" }, { find: "f", replace: "ফ" }, { find: "p", replace: "প" }, { find: "rri`", replace: "ৃ" }, { find: "rri", replace: "ৃ", rules: [ { matches: [ { type: "prefix", scope: "!consonant", value: null } ], replace: "ঋ" }, { matches: [ { type: "prefix", scope: "punctuation", value: null } ], replace: "ঋ" } ] }, { find: "rrZ", replace: "রর‍্য" }, { find: "rry", replace: "রর‍্য" }, { find: "rZ", replace: "র‍্য", rules: [ { matches: [ { type: "prefix", scope: "consonant", value: null }, { type: "prefix", scope: "!exact", value: "r" }, { type: "prefix", scope: "!exact", value: "y" }, { type: "prefix", scope: "!exact", value: "w" }, { type: "prefix", scope: "!exact", value: "x" } ], replace: "্র্য" } ] }, { find: "ry", replace: "র‍্য", rules: [ { matches: [ { type: "prefix", scope: "consonant", value: null }, { type: "prefix", scope: "!exact", value: "r" }, { type: "prefix", scope: "!exact", value: "y" }, { type: "prefix", scope: "!exact", value: "w" }, { type: "prefix", scope: "!exact", value: "x" } ], replace: "্র্য" } ] }, { find: "rr", replace: "রর", rules: [ { matches: [ { type: "prefix", scope: "!consonant", value: null }, { type: "suffix", scope: "!vowel", value: null }, { type: "suffix", scope: "!exact", value: "r" }, { type: "suffix", scope: "!punctuation", value: null } ], replace: "র্" }, { matches: [ { type: "prefix", scope: "consonant", value: null }, { type: "prefix", scope: "!exact", value: "r" } ], replace: "্রর" } ] }, { find: "Rg", replace: "ড়্গ" }, { find: "Rh", replace: "ঢ়" }, { find: "R", replace: "ড়" }, { find: "r", replace: "র", rules: [ { matches: [ { type: "prefix", scope: "consonant", value: null }, { type: "prefix", scope: "!exact", value: "r" }, { type: "prefix", scope: "!exact", value: "y" }, { type: "prefix", scope: "!exact", value: "w" }, { type: "prefix", scope: "!exact", value: "x" }, { type: "prefix", scope: "!exact", value: "Z" } ], replace: "্র" } ] }, { find: "shch", replace: "শ্ছ" }, { find: "ShTh", replace: "ষ্ঠ" }, { find: "Shph", replace: "ষ্ফ" }, { find: "Sch", replace: "শ্ছ" }, { find: "skl", replace: "স্ক্ল" }, { find: "skh", replace: "স্খ" }, { find: "sth", replace: "স্থ" }, { find: "sph", replace: "স্ফ" }, { find: "shc", replace: "শ্চ" }, { find: "sht", replace: "শ্ত" }, { find: "shn", replace: "শ্ন" }, { find: "shm", replace: "শ্ম" }, { find: "shl", replace: "শ্ল" }, { find: "Shk", replace: "ষ্ক" }, { find: "ShT", replace: "ষ্ট" }, { find: "ShN", replace: "ষ্ণ" }, { find: "Shp", replace: "ষ্প" }, { find: "Shf", replace: "ষ্ফ" }, { find: "Shm", replace: "ষ্ম" }, { find: "spl", replace: "স্প্ল" }, { find: "sk", replace: "স্ক" }, { find: "Sc", replace: "শ্চ" }, { find: "sT", replace: "স্ট" }, { find: "st", replace: "স্ত" }, { find: "sn", replace: "স্ন" }, { find: "sp", replace: "স্প" }, { find: "sf", replace: "স্ফ" }, { find: "sm", replace: "স্ম" }, { find: "sl", replace: "স্ল" }, { find: "sh", replace: "শ" }, { find: "Sc", replace: "শ্চ" }, { find: "St", replace: "শ্ত" }, { find: "Sn", replace: "শ্ন" }, { find: "Sm", replace: "শ্ম" }, { find: "Sl", replace: "শ্ল" }, { find: "Sh", replace: "ষ" }, { find: "s", replace: "স" }, { find: "S", replace: "শ" }, { find: "oo`", replace: "ু" }, { find: "oo", replace: "ু", rules: [ { matches: [ { type: "prefix", scope: "!consonant", value: null }, { type: "suffix", scope: "!exact", value: "`" } ], replace: "উ" }, { matches: [ { type: "prefix", scope: "punctuation", value: null }, { type: "suffix", scope: "!exact", value: "`" } ], replace: "উ" } ] }, { find: "o`", replace: "" }, { find: "oZ", replace: "অ্য" }, { find: "o", replace: "", rules: [ { matches: [ { type: "prefix", scope: "vowel", value: null }, { type: "prefix", scope: "!exact", value: "o" } ], replace: "ও" }, { matches: [ { type: "prefix", scope: "vowel", value: null }, { type: "prefix", scope: "exact", value: "o" } ], replace: "অ" }, { matches: [ { type: "prefix", scope: "punctuation", value: null } ], replace: "অ" } ] }, { find: "tth", replace: "ত্থ" }, { find: "t``", replace: "ৎ" }, { find: "TT", replace: "ট্ট" }, { find: "Tm", replace: "ট্ম" }, { find: "Th", replace: "ঠ" }, { find: "tn", replace: "ত্ন" }, { find: "tm", replace: "ত্ম" }, { find: "th", replace: "থ" }, { find: "tt", replace: "ত্ত" }, { find: "T", replace: "ট" }, { find: "t", replace: "ত" }, { find: "aZ", replace: "অ্যা" }, { find: "AZ", replace: "অ্যা" }, { find: "a`", replace: "া" }, { find: "A`", replace: "া" }, { find: "a", replace: "া", rules: [ { matches: [ { type: "prefix", scope: "punctuation", value: null }, { type: "suffix", scope: "!exact", value: "`" } ], replace: "আ" }, { matches: [ { type: "prefix", scope: "!consonant", value: null }, { type: "prefix", scope: "!exact", value: "a" }, { type: "suffix", scope: "!exact", value: "`" } ], replace: "য়া" }, { matches: [ { type: "prefix", scope: "exact", value: "a" }, { type: "suffix", scope: "!exact", value: "`" } ], replace: "আ" } ] }, { find: "i`", replace: "ি" }, { find: "i", replace: "ি", rules: [ { matches: [ { type: "prefix", scope: "!consonant", value: null }, { type: "suffix", scope: "!exact", value: "`" } ], replace: "ই" }, { matches: [ { type: "prefix", scope: "punctuation", value: null }, { type: "suffix", scope: "!exact", value: "`" } ], replace: "ই" } ] }, { find: "I`", replace: "ী" }, { find: "I", replace: "ী", rules: [ { matches: [ { type: "prefix", scope: "!consonant", value: null }, { type: "suffix", scope: "!exact", value: "`" } ], replace: "ঈ" }, { matches: [ { type: "prefix", scope: "punctuation", value: null }, { type: "suffix", scope: "!exact", value: "`" } ], replace: "ঈ" } ] }, { find: "u`", replace: "ু" }, { find: "u", replace: "ু", rules: [ { matches: [ { type: "prefix", scope: "!consonant", value: null }, { type: "suffix", scope: "!exact", value: "`" } ], replace: "উ" }, { matches: [ { type: "prefix", scope: "punctuation", value: null }, { type: "suffix", scope: "!exact", value: "`" } ], replace: "উ" } ] }, { find: "U`", replace: "ূ" }, { find: "U", replace: "ূ", rules: [ { matches: [ { type: "prefix", scope: "!consonant", value: null }, { type: "suffix", scope: "!exact", value: "`" } ], replace: "ঊ" }, { matches: [ { type: "prefix", scope: "punctuation", value: null }, { type: "suffix", scope: "!exact", value: "`" } ], replace: "ঊ" } ] }, { find: "ee`", replace: "ী" }, { find: "ee", replace: "ী", rules: [ { matches: [ { type: "prefix", scope: "!consonant", value: null }, { type: "suffix", scope: "!exact", value: "`" } ], replace: "ঈ" }, { matches: [ { type: "prefix", scope: "punctuation", value: null }, { type: "suffix", scope: "!exact", value: "`" } ], replace: "ঈ" } ] }, { find: "e`", replace: "ে" }, { find: "e", replace: "ে", rules: [ { matches: [ { type: "prefix", scope: "!consonant", value: null }, { type: "suffix", scope: "!exact", value: "`" } ], replace: "এ" }, { matches: [ { type: "prefix", scope: "punctuation", value: null }, { type: "suffix", scope: "!exact", value: "`" } ], replace: "এ" } ] }, { find: "z", replace: "য" }, { find: "Z", replace: "্য" }, { find: "y", replace: "্য", rules: [ { matches: [ { type: "prefix", scope: "!consonant", value: null }, { type: "prefix", scope: "!punctuation", value: null } ], replace: "য়" }, { matches: [ { type: "prefix", scope: "punctuation", value: null } ], replace: "ইয়" } ] }, { find: "Y", replace: "য়" }, { find: "q", replace: "ক" }, { find: "w", replace: "ও", rules: [ { matches: [ { type: "prefix", scope: "punctuation", value: null }, { type: "suffix", scope: "vowel", value: null } ], replace: "ওয়" }, { matches: [ { type: "prefix", scope: "consonant", value: null } ], replace: "্ব" } ] }, { find: "x", replace: "ক্স", rules: [ { matches: [ { type: "prefix", scope: "punctuation", value: null } ], replace: "এক্স" } ] }, { find: ":`", replace: ":" }, { find: ":", replace: "ঃ" }, { find: "^`", replace: "^" }, { find: "^", replace: "ঁ" }, { find: ",,", replace: "্‌" }, { find: ",", replace: "," }, { find: "$", replace: "৳" }, { find: "`", replace: "" } ]; var vowel = "aeiou"; var consonant = "bcdfghjklmnpqrstvwxyz"; var casesensitive = "oiudgjnrstyz"; // packages/core/transliteration/src/phonetic.ts var phonetic = { fixString(input) { let fixed = ""; for (let i = 0;i < input.length; ++i) { const cChar = input.charAt(i); if (this.isCaseSensitive(cChar)) { fixed += cChar; } else { fixed += cChar.toLowerCase(); } } return fixed; }, isVowel(c) { return vowel.includes(c.toLowerCase()); }, isConsonant(c) { return consonant.includes(c.toLowerCase()); }, isPunctuation(c) { return !(this.isVowel(c) || this.isConsonant(c)); }, isExact(needle, heystack, start, end, not) { return (start >= 0 && end < heystack.length && heystack.substring(start, end) === needle) !== not; }, isCaseSensitive(c) { return casesensitive.includes(c.toLowerCase()); } }; // packages/core/transliteration/src/transliterate.ts function transliterate(text, options = { mode: "avro" }) { const fn = MODE_TRANSLITERATION_FUNCTIONS[options.mode]; if (!fn) { throw new Error("Invalid mode. Available modes are: 'avro', 'orva', 'banglish', 'lishbang'"); } return fn(text); } var MODE_TRANSLITERATION_FUNCTIONS = { avro, orva, banglish, lishbang }; var patternMatchCache = new Map; function avro(text) { const fixed = phonetic.fixString(text); const output = []; const len = fixed.length; const patternsWithLength = patterns2.map((pattern) => ({ pattern, length: pattern.find.length })); for (let currentIndex = 0;currentIndex < len; ++currentIndex) { const startIndex = currentIndex; let isMatched = false; const cacheKey = fixed.slice(startIndex, startIndex + 8); const cachedMatch = patternMatchCache.get(cacheKey); if (cachedMatch) { const { pattern, endIndex } = cachedMatch; if (endIndex <= len && fixed.substring(startIndex, endIndex) === pattern.find) { const result = processPattern(pattern, fixed, startIndex, endIndex, currentIndex); if (result.isMatched) { output.push(result.output); currentIndex = result.newIndex; continue; } } } for (const { pattern, length } of patternsWithLength) { const endIndex = currentIndex + length; if (endIndex > len) continue; const segment = fixed.substring(startIndex, endIndex); if (segment === pattern.find) { patternMatchCache.set(cacheKey, { pattern, endIndex }); const result = processPattern(pattern, fixed, startIndex, endIndex, currentIndex); if (result.isMatched) { output.push(result.output); currentIndex = result.newIndex; isMatched = true; break; } } } if (!isMatched) { output.push(fixed.charAt(currentIndex)); } } return output.join(""); } function processPattern(pattern, fixed, startIndex, endIndex, currentIndex) { if (!pattern.rules) { return { isMatched: true, output: pattern.replace, newIndex: endIndex - 1 }; } const previousIndex = startIndex - 1; for (const rule of pattern.rules) { let shouldReplace = true; for (const match of rule.matches) { const checkIndex = match.type === "suffix" ? endIndex : previousIndex; const isNegative = match.scope?.charAt(0) === "!"; const scope = isNegative ? match.scope.substring(1) : match.scope; switch (scope) { case "punctuation": { const isPunctuation = checkIndex < 0 && match.type === "prefix" || checkIndex >= fixed.length && match.type === "suffix" || phonetic.isPunctuation(fixed.charAt(checkIndex)); if (isPunctuation === isNegative) { shouldReplace = false; } break; } case "vowel": { const isVowelMatch = (checkIndex >= 0 && match.type === "prefix" || checkIndex < fixed.length && match.type === "suffix") && phonetic.isVowel(fixed.charAt(checkIndex)); if (isVowelMatch === isNegative) { shouldReplace = false; } break; } case "consonant": { const isConsonantMatch = (checkIndex >= 0 && match.type === "prefix" || checkIndex < fixed.length && match.type === "suffix") && phonetic.isConsonant(fixed.charAt(checkIndex)); if (isConsonantMatch === isNegative) { shouldReplace = false; } break; } case "exact": { const [s, e] = match.type === "suffix" ? [endIndex, endIndex + (match.value?.length || 0)] : [startIndex - (match.value?.length || 0), startIndex]; if (!phonetic.isExact(match.value || "", fixed, s, e, isNegative)) { shouldReplace = false; } break; } } if (!shouldReplace) break; } if (shouldReplace) { return { isMatched: true, output: rule.replace, newIndex: endIndex - 1 }; } } return { isMatched: true, output: pattern.replace, newIndex: endIndex - 1 }; } function orva(text) { const reversePatterns = patterns2.filter((pattern) => pattern.replace && pattern.find && pattern.replace.length > 0 && pattern.find.length > 0).filter((pattern) => pattern.find !== "o" && pattern.replace !== "").map((pattern) => ({ find: pattern.replace, replace: pattern.find, rules: pattern.rules })).sort((a, b) => b.find.length - a.find.length); let output = ""; let maxIterations = text.length * 2; let iterations = 0; for (let cur = 0;cur < text.length; ++cur) { iterations++; if (iterations > maxIterations) { console.warn("Orva transliteration exceeded maximum iterations, breaking to prevent infinite loop"); break; } const start = cur; let matched = false; for (const pattern of reversePatterns) { const end = cur + pattern.find.length; if (end > text.length) continue; const segment = text.substring(start, end); if (segment === pattern.find) { output += pattern.replace; cur = end - 1; matched = true; break; } } if (!matched) { output += text.charAt(cur); } } for (const rule of patterns) { output = output.replace(new RegExp(rule.find, "g"), rule.replace); } return output; } function banglish(text) { throw new Error("Banglish transliteration is not implemented yet"); } function lishbang(text) { throw new Error("Lishbang transliteration is not implemented yet"); } export { transliterate };