@bntk/transliteration
Version:
Build applications with Bengali natural language processing tools.
1,399 lines (1,395 loc) • 32 kB
JavaScript
// packages/core/transliteration/assets/orva.json
var patterns = [
{
find: "`",
replace: "",
rules: []
},
{
find: "ই",
replace: "i",
rules: []
},
{
find: "ঈ",
replace: "I",
rules: []
},
{
find: "উ",
replace: "oo",
rules: []
},
{
find: "ঊ",
replace: "OO",
rules: []
},
{
find: "ঋ",
replace: "R",
rules: []
},
{
find: "এ",
replace: "e",
rules: []
},
{
find: "ঐ",
replace: "oi",
rules: []
},
{
find: "ও",
replace: "o",
rules: []
},
{
find: "ঔ",
replace: "ou",
rules: []
},
{
find: "অ",
replace: "o",
rules: []
},
{
find: "আ",
replace: "a",
rules: []
},
{
find: "ক",
replace: "k",
rules: []
},
{
find: "খ",
replace: "kh",
rules: []
},
{
find: "গ",
replace: "g",
rules: []
},
{
find: "ঘ",
replace: "gh",
rules: []
},
{
find: "ঙ",
replace: "ng",
rules: []
},
{
find: "চ",
replace: "ch",
rules: []
},
{
find: "ছ",
replace: "Ch",
rules: []
},
{
find: "জ",
replace: "j",
rules: []
},
{
find: "ঝ",
replace: "jh",
rules: []
},
{
find: "ঞ",
replace: "n",
rules: []
},
{
find: "ট",
replace: "T",
rules: []
},
{
find: "ঠ",
replace: "Th",
rules: []
},
{
find: "ড",
replace: "D",
rules: []
},
{
find: "ঢ",
replace: "Dh",
rules: []
},
{
find: "ণ",
replace: "N",
rules: []
},
{
find: "ত",
replace: "t",
rules: []
},
{
find: "থ",
replace: "th",
rules: []
},
{
find: "দ",
replace: "d",
rules: []
},
{
find: "ধ",
replace: "dh",
rules: []
},
{
find: "ন",
replace: "n",
rules: []
},
{
find: "প",
replace: "p",
rules: []
},
{
find: "ফ",
replace: "ph",
rules: []
},
{
find: "ব",
replace: "b",
rules: []
},
{
find: "ভ",
replace: "bh",
rules: []
},
{
find: "ম",
replace: "m",
rules: []
},
{
find: "য",
replace: "z",
rules: []
},
{
find: "র",
replace: "r",
rules: []
},
{
find: "ল",
replace: "l",
rules: []
},
{
find: "শ",
replace: "sh",
rules: []
},
{
find: "ষ",
replace: "Sh",
rules: []
},
{
find: "স",
replace: "s",
rules: []
},
{
find: "হ",
replace: "h",
rules: []
},
{
find: "ড়",
replace: "R",
rules: []
},
{
find: "ঢ়",
replace: "Rh",
rules: []
},
{
find: "য়",
replace: "y",
rules: []
},
{
find: "ৎ",
replace: "t",
rules: []
},
{
find: "ং",
replace: "ng",
rules: []
},
{
find: "ঃ",
replace: ":",
rules: []
},
{
find: "ঁ",
replace: "~",
rules: []
},
{
find: "w",
replace: "O",
rules: []
},
{
find: "oo",
replace: "u",
rules: []
},
{
find: "a্",
replace: "",
rules: []
},
{
find: "b্",
replace: "",
rules: []
},
{
find: "c্",
replace: "",
rules: []
},
{
find: "d্",
replace: "",
rules: []
},
{
find: "e্",
replace: "",
rules: []
},
{
find: "f্",
replace: "",
rules: []
},
{
find: "g্",
replace: "",
rules: []
},
{
find: "h্",
replace: "",
rules: []
},
{
find: "i্",
replace: "",
rules: []
},
{
find: "j্",
replace: "",
rules: []
},
{
find: "k্",
replace: "",
rules: []
},
{
find: "l্",
replace: "",
rules: []
},
{
find: "m্",
replace: "",
rules: []
},
{
find: "n্",
replace: "",
rules: []
},
{
find: "o্",
replace: "",
rules: []
},
{
find: "p্",
replace: "",
rules: []
},
{
find: "q্",
replace: "",
rules: []
},
{
find: "r্",
replace: "",
rules: []
},
{
find: "s্",
replace: "",
rules: []
},
{
find: "t্",
replace: "",
rules: []
},
{
find: "u্",
replace: "",
rules: []
},
{
find: "v্",
replace: "",
rules: []
},
{
find: "w্",
replace: "",
rules: []
},
{
find: "x্",
replace: "",
rules: []
},
{
find: "y্",
replace: "",
rules: []
},
{
find: "z্",
replace: "",
rules: []
},
{
find: "A্",
replace: "",
rules: []
},
{
find: "B্",
replace: "",
rules: []
},
{
find: "C্",
replace: "",
rules: []
},
{
find: "D্",
replace: "",
rules: []
},
{
find: "E্",
replace: "",
rules: []
},
{
find: "F্",
replace: "",
rules: []
},
{
find: "G্",
replace: "",
rules: []
},
{
find: "H্",
replace: "",
rules: []
},
{
find: "I্",
replace: "",
rules: []
},
{
find: "J্",
replace: "",
rules: []
},
{
find: "K্",
replace: "",
rules: []
},
{
find: "L্",
replace: "",
rules: []
},
{
find: "M্",
replace: "",
rules: []
},
{
find: "N্",
replace: "",
rules: []
},
{
find: "O্",
replace: "",
rules: []
},
{
find: "P্",
replace: "",
rules: []
},
{
find: "Q্",
replace: "",
rules: []
},
{
find: "R্",
replace: "",
rules: []
},
{
find: "S্",
replace: "",
rules: []
},
{
find: "T্",
replace: "",
rules: []
},
{
find: "U্",
replace: "",
rules: []
},
{
find: "V্",
replace: "",
rules: []
},
{
find: "W্",
replace: "",
rules: []
},
{
find: "X্",
replace: "",
rules: []
},
{
find: "Y্",
replace: "",
rules: []
},
{
find: "Z্",
replace: "",
rules: []
},
{
find: "u",
replace: "",
rules: []
}
];
// packages/core/transliteration/assets/rules.json
var patterns2 = [
{ find: "bhl", replace: "ভ্ল" },
{ find: "psh", replace: "পশ" },
{ find: "bdh", replace: "ব্ধ" },
{ find: "bj", replace: "ব্জ" },
{ find: "bd", replace: "ব্দ" },
{ find: "bb", replace: "ব্ব" },
{ find: "bl", replace: "ব্ল" },
{ find: "bh", replace: "ভ" },
{ find: "vl", replace: "ভ্ল" },
{ find: "b", replace: "ব" },
{ find: "v", replace: "ভ" },
{ find: "cNG", replace: "চ্ঞ" },
{ find: "cch", replace: "চ্ছ" },
{ find: "cc", replace: "চ্চ" },
{ find: "ch", replace: "ছ" },
{ find: "c", replace: "চ" },
{ find: "dhn", replace: "ধ্ন" },
{ find: "dhm", replace: "ধ্ম" },
{ find: "dgh", replace: "দ্ঘ" },
{ find: "ddh", replace: "দ্ধ" },
{ find: "dbh", replace: "দ্ভ" },
{ find: "dv", replace: "দ্ভ" },
{ find: "dm", replace: "দ্ম" },
{ find: "DD", replace: "ড্ড" },
{ find: "Dh", replace: "ঢ" },
{ find: "dh", replace: "ধ" },
{ find: "dg", replace: "দ্গ" },
{ find: "dd", replace: "দ্দ" },
{ find: "D", replace: "ড" },
{ find: "d", replace: "দ" },
{ find: "...", replace: "..." },
{ find: ".`", replace: "." },
{ find: "..", replace: "।।" },
{ find: ".", replace: "।" },
{ find: "ghn", replace: "ঘ্ন" },
{ find: "Ghn", replace: "ঘ্ন" },
{ find: "gdh", replace: "গ্ধ" },
{ find: "Gdh", replace: "গ্ধ" },
{ find: "gN", replace: "গ্ণ" },
{ find: "GN", replace: "গ্ণ" },
{ find: "gn", replace: "গ্ন" },
{ find: "Gn", replace: "গ্ন" },
{ find: "gm", replace: "গ্ম" },
{ find: "Gm", replace: "গ্ম" },
{ find: "gl", replace: "গ্ল" },
{ find: "Gl", replace: "গ্ল" },
{ find: "gg", replace: "জ্ঞ" },
{ find: "GG", replace: "জ্ঞ" },
{ find: "Gg", replace: "জ্ঞ" },
{ find: "gG", replace: "জ্ঞ" },
{ find: "gh", replace: "ঘ" },
{ find: "Gh", replace: "ঘ" },
{ find: "g", replace: "গ" },
{ find: "G", replace: "গ" },
{ find: "hN", replace: "হ্ণ" },
{ find: "hn", replace: "হ্ন" },
{ find: "hm", replace: "হ্ম" },
{ find: "hl", replace: "হ্ল" },
{ find: "h", replace: "হ" },
{ find: "jjh", replace: "জ্ঝ" },
{ find: "jNG", replace: "জ্ঞ" },
{ find: "jh", replace: "ঝ" },
{ find: "jj", replace: "জ্জ" },
{ find: "j", replace: "জ" },
{ find: "J", replace: "জ" },
{ find: "kkhN", replace: "ক্ষ্ণ" },
{ find: "kShN", replace: "ক্ষ্ণ" },
{ find: "kkhm", replace: "ক্ষ্ম" },
{ find: "kShm", replace: "ক্ষ্ম" },
{ find: "kxN", replace: "ক্ষ্ণ" },
{ find: "kxm", replace: "ক্ষ্ম" },
{ find: "kkh", replace: "ক্ষ" },
{ find: "kSh", replace: "ক্ষ" },
{ find: "ksh", replace: "কশ" },
{ find: "kx", replace: "ক্ষ" },
{ find: "kk", replace: "ক্ক" },
{ find: "kT", replace: "ক্ট" },
{ find: "kt", replace: "ক্ত" },
{ find: "kl", replace: "ক্ল" },
{ find: "ks", replace: "ক্স" },
{ find: "kh", replace: "খ" },
{ find: "k", replace: "ক" },
{ find: "lbh", replace: "ল্ভ" },
{ find: "ldh", replace: "ল্ধ" },
{ find: "lkh", replace: "লখ" },
{ find: "lgh", replace: "লঘ" },
{ find: "lph", replace: "লফ" },
{ find: "lk", replace: "ল্ক" },
{ find: "lg", replace: "ল্গ" },
{ find: "lT", replace: "ল্ট" },
{ find: "lD", replace: "ল্ড" },
{ find: "lp", replace: "ল্প" },
{ find: "lv", replace: "ল্ভ" },
{ find: "lm", replace: "ল্ম" },
{ find: "ll", replace: "ল্ল" },
{ find: "lb", replace: "ল্ব" },
{ find: "l", replace: "ল" },
{ find: "mth", replace: "ম্থ" },
{ find: "mph", replace: "ম্ফ" },
{ find: "mbh", replace: "ম্ভ" },
{ find: "mpl", replace: "মপ্ল" },
{ find: "mn", replace: "ম্ন" },
{ find: "mp", replace: "ম্প" },
{ find: "mv", replace: "ম্ভ" },
{ find: "mm", replace: "ম্ম" },
{ find: "ml", replace: "ম্ল" },
{ find: "mb", replace: "ম্ব" },
{ find: "mf", replace: "ম্ফ" },
{ find: "m", replace: "ম" },
{ find: "0", replace: "০" },
{ find: "1", replace: "১" },
{ find: "2", replace: "২" },
{ find: "3", replace: "৩" },
{ find: "4", replace: "৪" },
{ find: "5", replace: "৫" },
{ find: "6", replace: "৬" },
{ find: "7", replace: "৭" },
{ find: "8", replace: "৮" },
{ find: "9", replace: "৯" },
{ find: "NgkSh", replace: "ঙ্ক্ষ" },
{ find: "Ngkkh", replace: "ঙ্ক্ষ" },
{ find: "NGch", replace: "ঞ্ছ" },
{ find: "Nggh", replace: "ঙ্ঘ" },
{ find: "Ngkh", replace: "ঙ্খ" },
{ find: "NGjh", replace: "ঞ্ঝ" },
{ find: "ngOU", replace: "ঙ্গৌ" },
{ find: "ngOI", replace: "ঙ্গৈ" },
{ find: "Ngkx", replace: "ঙ্ক্ষ" },
{ find: "NGc", replace: "ঞ্চ" },
{ find: "nch", replace: "ঞ্ছ" },
{ find: "njh", replace: "ঞ্ঝ" },
{ find: "ngh", replace: "ঙ্ঘ" },
{ find: "Ngk", replace: "ঙ্ক" },
{ find: "Ngx", replace: "ঙ্ষ" },
{ find: "Ngg", replace: "ঙ্গ" },
{ find: "Ngm", replace: "ঙ্ম" },
{ find: "NGj", replace: "ঞ্জ" },
{ find: "ndh", replace: "ন্ধ" },
{ find: "nTh", replace: "ন্ঠ" },
{ find: "NTh", replace: "ণ্ঠ" },
{ find: "nth", replace: "ন্থ" },
{ find: "nkh", replace: "ঙ্খ" },
{ find: "ngo", replace: "ঙ্গ" },
{ find: "nga", replace: "ঙ্গা" },
{ find: "ngi", replace: "ঙ্গি" },
{ find: "ngI", replace: "ঙ্গী" },
{ find: "ngu", replace: "ঙ্গু" },
{ find: "ngU", replace: "ঙ্গূ" },
{ find: "nge", replace: "ঙ্গে" },
{ find: "ngO", replace: "ঙ্গো" },
{ find: "NDh", replace: "ণ্ঢ" },
{ find: "nsh", replace: "নশ" },
{ find: "Ngr", replace: "ঙর" },
{ find: "NGr", replace: "ঞর" },
{ find: "ngr", replace: "ংর" },
{ find: "nj", replace: "ঞ্জ" },
{ find: "Ng", replace: "ঙ" },
{ find: "NG", replace: "ঞ" },
{ find: "nk", replace: "ঙ্ক" },
{ find: "ng", replace: "ং" },
{ find: "nn", replace: "ন্ন" },
{ find: "NN", replace: "ণ্ণ" },
{ find: "Nn", replace: "ণ্ন" },
{ find: "nm", replace: "ন্ম" },
{ find: "Nm", replace: "ণ্ম" },
{ find: "nd", replace: "ন্দ" },
{ find: "nT", replace: "ন্ট" },
{ find: "NT", replace: "ণ্ট" },
{ find: "nD", replace: "ন্ড" },
{ find: "ND", replace: "ণ্ড" },
{ find: "nt", replace: "ন্ত" },
{ find: "ns", replace: "ন্স" },
{ find: "nc", replace: "ঞ্চ" },
{ find: "n", replace: "ন" },
{ find: "N", replace: "ণ" },
{ find: "OI`", replace: "ৈ" },
{ find: "OU`", replace: "ৌ" },
{ find: "O`", replace: "ো" },
{
find: "OI",
replace: "ৈ",
rules: [
{
matches: [
{ type: "prefix", scope: "!consonant", value: null }
],
replace: "ঐ"
},
{
matches: [
{ type: "prefix", scope: "punctuation", value: null }
],
replace: "ঐ"
}
]
},
{
find: "OU",
replace: "ৌ",
rules: [
{
matches: [
{ type: "prefix", scope: "!consonant", value: null }
],
replace: "ঔ"
},
{
matches: [
{ type: "prefix", scope: "punctuation", value: null }
],
replace: "ঔ"
}
]
},
{
find: "O",
replace: "ো",
rules: [
{
matches: [
{ type: "prefix", scope: "!consonant", value: null }
],
replace: "ও"
},
{
matches: [
{ type: "prefix", scope: "punctuation", value: null }
],
replace: "ও"
}
]
},
{ find: "phl", replace: "ফ্ল" },
{ find: "pT", replace: "প্ট" },
{ find: "pt", replace: "প্ত" },
{ find: "pn", replace: "প্ন" },
{ find: "pp", replace: "প্প" },
{ find: "pl", replace: "প্ল" },
{ find: "ps", replace: "প্স" },
{ find: "ph", replace: "ফ" },
{ find: "fl", replace: "ফ্ল" },
{ find: "f", replace: "ফ" },
{ find: "p", replace: "প" },
{ find: "rri`", replace: "ৃ" },
{
find: "rri",
replace: "ৃ",
rules: [
{
matches: [
{ type: "prefix", scope: "!consonant", value: null }
],
replace: "ঋ"
},
{
matches: [
{ type: "prefix", scope: "punctuation", value: null }
],
replace: "ঋ"
}
]
},
{ find: "rrZ", replace: "রর্য" },
{ find: "rry", replace: "রর্য" },
{
find: "rZ",
replace: "র্য",
rules: [
{
matches: [
{ type: "prefix", scope: "consonant", value: null },
{ type: "prefix", scope: "!exact", value: "r" },
{ type: "prefix", scope: "!exact", value: "y" },
{ type: "prefix", scope: "!exact", value: "w" },
{ type: "prefix", scope: "!exact", value: "x" }
],
replace: "্র্য"
}
]
},
{
find: "ry",
replace: "র্য",
rules: [
{
matches: [
{ type: "prefix", scope: "consonant", value: null },
{ type: "prefix", scope: "!exact", value: "r" },
{ type: "prefix", scope: "!exact", value: "y" },
{ type: "prefix", scope: "!exact", value: "w" },
{ type: "prefix", scope: "!exact", value: "x" }
],
replace: "্র্য"
}
]
},
{
find: "rr",
replace: "রর",
rules: [
{
matches: [
{ type: "prefix", scope: "!consonant", value: null },
{ type: "suffix", scope: "!vowel", value: null },
{ type: "suffix", scope: "!exact", value: "r" },
{ type: "suffix", scope: "!punctuation", value: null }
],
replace: "র্"
},
{
matches: [
{ type: "prefix", scope: "consonant", value: null },
{ type: "prefix", scope: "!exact", value: "r" }
],
replace: "্রর"
}
]
},
{ find: "Rg", replace: "ড়্গ" },
{ find: "Rh", replace: "ঢ়" },
{ find: "R", replace: "ড়" },
{
find: "r",
replace: "র",
rules: [
{
matches: [
{ type: "prefix", scope: "consonant", value: null },
{ type: "prefix", scope: "!exact", value: "r" },
{ type: "prefix", scope: "!exact", value: "y" },
{ type: "prefix", scope: "!exact", value: "w" },
{ type: "prefix", scope: "!exact", value: "x" },
{ type: "prefix", scope: "!exact", value: "Z" }
],
replace: "্র"
}
]
},
{ find: "shch", replace: "শ্ছ" },
{ find: "ShTh", replace: "ষ্ঠ" },
{ find: "Shph", replace: "ষ্ফ" },
{ find: "Sch", replace: "শ্ছ" },
{ find: "skl", replace: "স্ক্ল" },
{ find: "skh", replace: "স্খ" },
{ find: "sth", replace: "স্থ" },
{ find: "sph", replace: "স্ফ" },
{ find: "shc", replace: "শ্চ" },
{ find: "sht", replace: "শ্ত" },
{ find: "shn", replace: "শ্ন" },
{ find: "shm", replace: "শ্ম" },
{ find: "shl", replace: "শ্ল" },
{ find: "Shk", replace: "ষ্ক" },
{ find: "ShT", replace: "ষ্ট" },
{ find: "ShN", replace: "ষ্ণ" },
{ find: "Shp", replace: "ষ্প" },
{ find: "Shf", replace: "ষ্ফ" },
{ find: "Shm", replace: "ষ্ম" },
{ find: "spl", replace: "স্প্ল" },
{ find: "sk", replace: "স্ক" },
{ find: "Sc", replace: "শ্চ" },
{ find: "sT", replace: "স্ট" },
{ find: "st", replace: "স্ত" },
{ find: "sn", replace: "স্ন" },
{ find: "sp", replace: "স্প" },
{ find: "sf", replace: "স্ফ" },
{ find: "sm", replace: "স্ম" },
{ find: "sl", replace: "স্ল" },
{ find: "sh", replace: "শ" },
{ find: "Sc", replace: "শ্চ" },
{ find: "St", replace: "শ্ত" },
{ find: "Sn", replace: "শ্ন" },
{ find: "Sm", replace: "শ্ম" },
{ find: "Sl", replace: "শ্ল" },
{ find: "Sh", replace: "ষ" },
{ find: "s", replace: "স" },
{ find: "S", replace: "শ" },
{ find: "oo`", replace: "ু" },
{
find: "oo",
replace: "ু",
rules: [
{
matches: [
{ type: "prefix", scope: "!consonant", value: null },
{ type: "suffix", scope: "!exact", value: "`" }
],
replace: "উ"
},
{
matches: [
{ type: "prefix", scope: "punctuation", value: null },
{ type: "suffix", scope: "!exact", value: "`" }
],
replace: "উ"
}
]
},
{ find: "o`", replace: "" },
{ find: "oZ", replace: "অ্য" },
{
find: "o",
replace: "",
rules: [
{
matches: [
{ type: "prefix", scope: "vowel", value: null },
{ type: "prefix", scope: "!exact", value: "o" }
],
replace: "ও"
},
{
matches: [
{ type: "prefix", scope: "vowel", value: null },
{ type: "prefix", scope: "exact", value: "o" }
],
replace: "অ"
},
{
matches: [
{ type: "prefix", scope: "punctuation", value: null }
],
replace: "অ"
}
]
},
{ find: "tth", replace: "ত্থ" },
{ find: "t``", replace: "ৎ" },
{ find: "TT", replace: "ট্ট" },
{ find: "Tm", replace: "ট্ম" },
{ find: "Th", replace: "ঠ" },
{ find: "tn", replace: "ত্ন" },
{ find: "tm", replace: "ত্ম" },
{ find: "th", replace: "থ" },
{ find: "tt", replace: "ত্ত" },
{ find: "T", replace: "ট" },
{ find: "t", replace: "ত" },
{ find: "aZ", replace: "অ্যা" },
{ find: "AZ", replace: "অ্যা" },
{ find: "a`", replace: "া" },
{ find: "A`", replace: "া" },
{
find: "a",
replace: "া",
rules: [
{
matches: [
{ type: "prefix", scope: "punctuation", value: null },
{ type: "suffix", scope: "!exact", value: "`" }
],
replace: "আ"
},
{
matches: [
{ type: "prefix", scope: "!consonant", value: null },
{ type: "prefix", scope: "!exact", value: "a" },
{ type: "suffix", scope: "!exact", value: "`" }
],
replace: "য়া"
},
{
matches: [
{ type: "prefix", scope: "exact", value: "a" },
{ type: "suffix", scope: "!exact", value: "`" }
],
replace: "আ"
}
]
},
{ find: "i`", replace: "ি" },
{
find: "i",
replace: "ি",
rules: [
{
matches: [
{ type: "prefix", scope: "!consonant", value: null },
{ type: "suffix", scope: "!exact", value: "`" }
],
replace: "ই"
},
{
matches: [
{ type: "prefix", scope: "punctuation", value: null },
{ type: "suffix", scope: "!exact", value: "`" }
],
replace: "ই"
}
]
},
{ find: "I`", replace: "ী" },
{
find: "I",
replace: "ী",
rules: [
{
matches: [
{ type: "prefix", scope: "!consonant", value: null },
{ type: "suffix", scope: "!exact", value: "`" }
],
replace: "ঈ"
},
{
matches: [
{ type: "prefix", scope: "punctuation", value: null },
{ type: "suffix", scope: "!exact", value: "`" }
],
replace: "ঈ"
}
]
},
{ find: "u`", replace: "ু" },
{
find: "u",
replace: "ু",
rules: [
{
matches: [
{ type: "prefix", scope: "!consonant", value: null },
{ type: "suffix", scope: "!exact", value: "`" }
],
replace: "উ"
},
{
matches: [
{ type: "prefix", scope: "punctuation", value: null },
{ type: "suffix", scope: "!exact", value: "`" }
],
replace: "উ"
}
]
},
{ find: "U`", replace: "ূ" },
{
find: "U",
replace: "ূ",
rules: [
{
matches: [
{ type: "prefix", scope: "!consonant", value: null },
{ type: "suffix", scope: "!exact", value: "`" }
],
replace: "ঊ"
},
{
matches: [
{ type: "prefix", scope: "punctuation", value: null },
{ type: "suffix", scope: "!exact", value: "`" }
],
replace: "ঊ"
}
]
},
{ find: "ee`", replace: "ী" },
{
find: "ee",
replace: "ী",
rules: [
{
matches: [
{ type: "prefix", scope: "!consonant", value: null },
{ type: "suffix", scope: "!exact", value: "`" }
],
replace: "ঈ"
},
{
matches: [
{ type: "prefix", scope: "punctuation", value: null },
{ type: "suffix", scope: "!exact", value: "`" }
],
replace: "ঈ"
}
]
},
{ find: "e`", replace: "ে" },
{
find: "e",
replace: "ে",
rules: [
{
matches: [
{ type: "prefix", scope: "!consonant", value: null },
{ type: "suffix", scope: "!exact", value: "`" }
],
replace: "এ"
},
{
matches: [
{ type: "prefix", scope: "punctuation", value: null },
{ type: "suffix", scope: "!exact", value: "`" }
],
replace: "এ"
}
]
},
{ find: "z", replace: "য" },
{ find: "Z", replace: "্য" },
{
find: "y",
replace: "্য",
rules: [
{
matches: [
{ type: "prefix", scope: "!consonant", value: null },
{ type: "prefix", scope: "!punctuation", value: null }
],
replace: "য়"
},
{
matches: [
{ type: "prefix", scope: "punctuation", value: null }
],
replace: "ইয়"
}
]
},
{ find: "Y", replace: "য়" },
{ find: "q", replace: "ক" },
{
find: "w",
replace: "ও",
rules: [
{
matches: [
{ type: "prefix", scope: "punctuation", value: null },
{ type: "suffix", scope: "vowel", value: null }
],
replace: "ওয়"
},
{
matches: [
{ type: "prefix", scope: "consonant", value: null }
],
replace: "্ব"
}
]
},
{
find: "x",
replace: "ক্স",
rules: [
{
matches: [
{ type: "prefix", scope: "punctuation", value: null }
],
replace: "এক্স"
}
]
},
{ find: ":`", replace: ":" },
{ find: ":", replace: "ঃ" },
{ find: "^`", replace: "^" },
{ find: "^", replace: "ঁ" },
{ find: ",,", replace: "্" },
{ find: ",", replace: "," },
{ find: "$", replace: "৳" },
{ find: "`", replace: "" }
];
var vowel = "aeiou";
var consonant = "bcdfghjklmnpqrstvwxyz";
var casesensitive = "oiudgjnrstyz";
// packages/core/transliteration/src/phonetic.ts
var phonetic = {
fixString(input) {
let fixed = "";
for (let i = 0;i < input.length; ++i) {
const cChar = input.charAt(i);
if (this.isCaseSensitive(cChar)) {
fixed += cChar;
} else {
fixed += cChar.toLowerCase();
}
}
return fixed;
},
isVowel(c) {
return vowel.includes(c.toLowerCase());
},
isConsonant(c) {
return consonant.includes(c.toLowerCase());
},
isPunctuation(c) {
return !(this.isVowel(c) || this.isConsonant(c));
},
isExact(needle, heystack, start, end, not) {
return (start >= 0 && end < heystack.length && heystack.substring(start, end) === needle) !== not;
},
isCaseSensitive(c) {
return casesensitive.includes(c.toLowerCase());
}
};
// packages/core/transliteration/src/transliterate.ts
function transliterate(text, options = { mode: "avro" }) {
const fn = MODE_TRANSLITERATION_FUNCTIONS[options.mode];
if (!fn) {
throw new Error("Invalid mode. Available modes are: 'avro', 'orva', 'banglish', 'lishbang'");
}
return fn(text);
}
var MODE_TRANSLITERATION_FUNCTIONS = {
avro,
orva,
banglish,
lishbang
};
var patternMatchCache = new Map;
function avro(text) {
const fixed = phonetic.fixString(text);
const output = [];
const len = fixed.length;
const patternsWithLength = patterns2.map((pattern) => ({
pattern,
length: pattern.find.length
}));
for (let currentIndex = 0;currentIndex < len; ++currentIndex) {
const startIndex = currentIndex;
let isMatched = false;
const cacheKey = fixed.slice(startIndex, startIndex + 8);
const cachedMatch = patternMatchCache.get(cacheKey);
if (cachedMatch) {
const { pattern, endIndex } = cachedMatch;
if (endIndex <= len && fixed.substring(startIndex, endIndex) === pattern.find) {
const result = processPattern(pattern, fixed, startIndex, endIndex, currentIndex);
if (result.isMatched) {
output.push(result.output);
currentIndex = result.newIndex;
continue;
}
}
}
for (const { pattern, length } of patternsWithLength) {
const endIndex = currentIndex + length;
if (endIndex > len)
continue;
const segment = fixed.substring(startIndex, endIndex);
if (segment === pattern.find) {
patternMatchCache.set(cacheKey, { pattern, endIndex });
const result = processPattern(pattern, fixed, startIndex, endIndex, currentIndex);
if (result.isMatched) {
output.push(result.output);
currentIndex = result.newIndex;
isMatched = true;
break;
}
}
}
if (!isMatched) {
output.push(fixed.charAt(currentIndex));
}
}
return output.join("");
}
function processPattern(pattern, fixed, startIndex, endIndex, currentIndex) {
if (!pattern.rules) {
return {
isMatched: true,
output: pattern.replace,
newIndex: endIndex - 1
};
}
const previousIndex = startIndex - 1;
for (const rule of pattern.rules) {
let shouldReplace = true;
for (const match of rule.matches) {
const checkIndex = match.type === "suffix" ? endIndex : previousIndex;
const isNegative = match.scope?.charAt(0) === "!";
const scope = isNegative ? match.scope.substring(1) : match.scope;
switch (scope) {
case "punctuation": {
const isPunctuation = checkIndex < 0 && match.type === "prefix" || checkIndex >= fixed.length && match.type === "suffix" || phonetic.isPunctuation(fixed.charAt(checkIndex));
if (isPunctuation === isNegative) {
shouldReplace = false;
}
break;
}
case "vowel": {
const isVowelMatch = (checkIndex >= 0 && match.type === "prefix" || checkIndex < fixed.length && match.type === "suffix") && phonetic.isVowel(fixed.charAt(checkIndex));
if (isVowelMatch === isNegative) {
shouldReplace = false;
}
break;
}
case "consonant": {
const isConsonantMatch = (checkIndex >= 0 && match.type === "prefix" || checkIndex < fixed.length && match.type === "suffix") && phonetic.isConsonant(fixed.charAt(checkIndex));
if (isConsonantMatch === isNegative) {
shouldReplace = false;
}
break;
}
case "exact": {
const [s, e] = match.type === "suffix" ? [endIndex, endIndex + (match.value?.length || 0)] : [startIndex - (match.value?.length || 0), startIndex];
if (!phonetic.isExact(match.value || "", fixed, s, e, isNegative)) {
shouldReplace = false;
}
break;
}
}
if (!shouldReplace)
break;
}
if (shouldReplace) {
return {
isMatched: true,
output: rule.replace,
newIndex: endIndex - 1
};
}
}
return {
isMatched: true,
output: pattern.replace,
newIndex: endIndex - 1
};
}
function orva(text) {
const reversePatterns = patterns2.filter((pattern) => pattern.replace && pattern.find && pattern.replace.length > 0 && pattern.find.length > 0).filter((pattern) => pattern.find !== "o" && pattern.replace !== "").map((pattern) => ({
find: pattern.replace,
replace: pattern.find,
rules: pattern.rules
})).sort((a, b) => b.find.length - a.find.length);
let output = "";
let maxIterations = text.length * 2;
let iterations = 0;
for (let cur = 0;cur < text.length; ++cur) {
iterations++;
if (iterations > maxIterations) {
console.warn("Orva transliteration exceeded maximum iterations, breaking to prevent infinite loop");
break;
}
const start = cur;
let matched = false;
for (const pattern of reversePatterns) {
const end = cur + pattern.find.length;
if (end > text.length)
continue;
const segment = text.substring(start, end);
if (segment === pattern.find) {
output += pattern.replace;
cur = end - 1;
matched = true;
break;
}
}
if (!matched) {
output += text.charAt(cur);
}
}
for (const rule of patterns) {
output = output.replace(new RegExp(rule.find, "g"), rule.replace);
}
return output;
}
function banglish(text) {
throw new Error("Banglish transliteration is not implemented yet");
}
function lishbang(text) {
throw new Error("Lishbang transliteration is not implemented yet");
}
export {
transliterate
};