anvim
Version:
ANVIM - A New Vietnamese Input Method (AVIM-compatible Vietnamese input engine in TypeScript)
999 lines (998 loc) • 36.8 kB
JavaScript
/**
* ANVIM - A New Vietnamese Input Method
* Direct TypeScript migration of AVIM.js (A Vietnamese Input Method)
*
* Based on AVIM JavaScript Vietnamese Input Method by Hieu Tran Dang
* Migrated to TypeScript while preserving exact logic and behavior
*/
export class AnvimEngine {
constructor(config) {
// Core AVIM properties and character maps
this.whit = false;
this.skey = [
97, 226, 259, 101, 234, 105, 111, 244, 417, 117, 432, 121, 65, 194, 258, 69,
202, 73, 79, 212, 416, 85, 431, 89,
];
this.db1 = [273, 272];
this.ds1 = ["d", "D"];
this.os1 = "o,O,ơ,Ơ,ó,Ó,ò,Ò,ọ,Ọ,ỏ,Ỏ,õ,Õ,ớ,Ớ,ờ,Ờ,ợ,Ợ,ở,Ở,ỡ,Ỡ".split(",");
this.ob1 = "ô,Ô,ô,Ô,ố,Ố,ồ,Ồ,ộ,Ộ,ổ,Ổ,ỗ,Ỗ,ố,Ố,ồ,Ồ,ộ,Ộ,ổ,Ổ,ỗ,Ỗ".split(",");
this.mocs1 = "o,O,ô,Ô,u,U,ó,Ó,ò,Ò,ọ,Ọ,ỏ,Ỏ,õ,Õ,ú,Ú,ù,Ù,ụ,Ụ,ủ,Ủ,ũ,Ũ,ố,Ố,ồ,Ồ,ộ,Ộ,ổ,Ổ,ỗ,Ỗ".split(",");
this.mocb1 = "ơ,Ơ,ơ,Ơ,ư,Ư,ớ,Ớ,ờ,Ờ,ợ,Ợ,ở,Ở,ỡ,Ỡ,ứ,Ứ,ừ,Ừ,ự,Ự,ử,Ử,ữ,Ữ,ớ,Ớ,ờ,Ờ,ợ,Ợ,ở,Ở,ỡ,Ỡ".split(",");
this.trangs1 = "a,A,â,Â,á,Á,à,À,ạ,Ạ,ả,Ả,ã,Ã,ấ,Ấ,ầ,Ầ,ậ,Ậ,ẩ,Ẩ,ẫ,Ẫ".split(",");
this.trangb1 = "ă,Ă,ă,Ă,ắ,Ắ,ằ,Ằ,ặ,Ặ,ẳ,Ẳ,ẵ,Ẵ,ắ,Ắ,ằ,Ằ,ặ,Ặ,ẳ,Ẳ,ẵ,Ẵ".split(",");
this.as1 = "a,A,ă,Ă,á,Á,à,À,ạ,Ạ,ả,Ả,ã,Ã,ắ,Ắ,ằ,Ằ,ặ,Ặ,ẳ,Ẳ,ẵ,Ẵ,ế,Ế,ề,Ề,ệ,Ệ,ể,Ể,ễ,Ễ".split(",");
this.ab1 = "â,Â,â,Â,ấ,Ấ,ầ,Ầ,ậ,Ậ,ẩ,Ẩ,ẫ,Ẫ,ấ,Ấ,ầ,Ầ,ậ,Ậ,ẩ,Ẩ,ẫ,Ẫ,é,É,è,È,ẹ,Ẹ,ẻ,Ẻ,ẽ,Ẽ".split(",");
this.es1 = "e,E,é,É,è,È,ẹ,Ẹ,ẻ,Ẻ,ẽ,Ẽ".split(",");
this.eb1 = "ê,Ê,ế,Ế,ề,Ề,ệ,Ệ,ể,Ể,ễ,Ễ".split(",");
this.english = "ĐÂĂƠƯÊÔ";
this.lowen = "đâăơưêô";
this.arA = "á,à,ả,ã,ạ,a,Á,À,Ả,Ã,Ạ,A".split(",");
this.mocrA = "ó,ò,ỏ,õ,ọ,o,ú,ù,ủ,ũ,ụ,u,Ó,Ò,Ỏ,Õ,Ọ,O,Ú,Ù,Ủ,Ũ,Ụ,U".split(",");
this.erA = "é,è,ẻ,ẽ,ẹ,e,É,È,Ẻ,Ẽ,Ẹ,E".split(",");
this.orA = "ó,ò,ỏ,õ,ọ,o,Ó,Ò,Ỏ,Õ,Ọ,O".split(",");
this.aA = "ấ,ầ,ẩ,ẫ,ậ,â,Ấ,Ầ,Ẩ,Ẫ,Ậ,Â".split(",");
this.oA = "ố,ồ,ổ,ỗ,ộ,ô,Ố,Ồ,Ổ,Ỗ,Ộ,Ô".split(",");
this.mocA = "ớ,ờ,ở,ỡ,ợ,ơ,ứ,ừ,ử,ữ,ự,ư,Ớ,Ờ,Ở,Ỡ,Ợ,Ơ,Ứ,Ừ,Ử,Ữ,Ự,Ư".split(",");
this.trangA = "ắ,ằ,ẳ,ẵ,ặ,ă,Ắ,Ằ,Ẳ,Ẵ,Ặ,Ă".split(",");
this.eA = "ế,ề,ể,ễ,ệ,ê,Ế,Ề,Ể,Ễ,Ệ,Ê".split(",");
this.skey2 = "a,a,a,e,e,i,o,o,o,u,u,y,A,A,A,E,E,I,O,O,O,U,U,Y".split(",");
// Method specific keys (configured per input method in setupForMethod)
this.DAWEO = "";
this.SFJRX = "";
this.S = "";
this.F = "";
this.J = "";
this.R = "";
this.X = "";
this.Z = "";
this.D = "";
this.moc = "";
this.trang = "";
this.A = "";
this.E = "";
this.O = "";
this.tw5 = "";
this.config = {
method: 1,
onOff: 1,
ckSpell: 1,
oldAccent: 0,
...config,
};
}
// =====================
// Low-level helpers
// =====================
fcc(x) {
return String.fromCharCode(x);
}
up(w) {
return w.toUpperCase();
}
nan(w) {
return isNaN(w) || w == "e";
}
getSF() {
const sf = [];
for (let x = 0; x < this.skey.length; x++)
sf[sf.length] = this.fcc(this.skey[x]);
return sf;
}
/**
* Return Unicode code points for tone-mark substitutions given a tone key.
* Mirrors AVIM tables for S/F/J/R/X.
*/
retKC(k) {
if (k == this.S)
return [
225, 7845, 7855, 233, 7871, 237, 243, 7889, 7899, 250, 7913, 253, 193,
7844, 7854, 201, 7870, 205, 211, 7888, 7898, 218, 7912, 221,
];
if (k == this.F)
return [
224, 7847, 7857, 232, 7873, 236, 242, 7891, 7901, 249, 7915, 7923, 192,
7846, 7856, 200, 7872, 204, 210, 7890, 7900, 217, 7914, 7922,
];
if (k == this.J)
return [
7841, 7853, 7863, 7865, 7879, 7883, 7885, 7897, 7907, 7909, 7921, 7925,
7840, 7852, 7862, 7864, 7878, 7882, 7884, 7896, 7906, 7908, 7920, 7924,
];
if (k == this.R)
return [
7843, 7849, 7859, 7867, 7875, 7881, 7887, 7893, 7903, 7911, 7917, 7927,
7842, 7848, 7858, 7866, 7874, 7880, 7886, 7892, 7902, 7910, 7916, 7926,
];
if (k == this.X)
return [
227, 7851, 7861, 7869, 7877, 297, 245, 7895, 7905, 361, 7919, 7929, 195,
7850, 7860, 7868, 7876, 296, 213, 7894, 7904, 360, 7918, 7928,
];
return [];
}
/**
* Build the set of all code points that represent any tone marks excluding
* the current one (when provided). Used for stripping tones.
*/
repSign(k) {
const u = [];
for (let a = 0; a < 5; a++) {
if (k == null || this.SFJRX.slice(a, a + 1) != this.up(k)) {
const temp = this.retKC(this.SFJRX.slice(a, a + 1));
for (let b = 0; b < temp.length; b++)
u[u.length] = temp[b];
}
}
return u;
}
/**
* Remove tone marks from a word, mapping marked characters back to base.
*/
unV(w) {
const u = this.repSign(null);
for (let a = 1; a <= w.length; a++) {
for (let b = 0; b < u.length; b++) {
if (u[b] == w.charCodeAt(w.length - a)) {
w =
w.slice(0, w.length - a) +
this.fcc(this.skey[b % 24]) +
w.slice(w.length - a + 1);
}
}
}
return w;
}
/**
* Convert Vietnamese base characters to ASCII-like placeholders per AVIM.
*/
unV2(w) {
for (let a = 1; a <= w.length; a++) {
for (let b = 0; b < this.skey.length; b++) {
if (this.skey[b] == w.charCodeAt(w.length - a)) {
w =
w.slice(0, w.length - a) +
this.skey2[b] +
w.slice(w.length - a + 1);
}
}
}
return w;
}
/**
* Map DAWEO (A/E/O + horn/mark) combinations for VIQR/VNI paths.
*/
DAWEOF(cc, k, g) {
const ret = [g];
const kA = [this.A, this.moc, this.trang, this.E, this.O];
const ccA = [this.aA, this.mocA, this.trangA, this.eA, this.oA];
const ccrA = [this.arA, this.mocrA, this.arA, this.erA, this.orA];
for (let a = 0; a < kA.length; a++) {
if (k == kA[a]) {
for (let z = 0; z < ccA[a].length; z++) {
if (cc == ccA[a][z])
ret[1] = ccrA[a][z];
}
}
}
if (ret[1])
return ret;
return false;
}
/**
* Spell checker hook (disabled, preserved for compatibility).
*/
ckspell(_w, _k) {
return false;
}
/**
* Core locator: determine position in word to apply transformation for key k
* given the method-specific vowel set sf.
*/
findC(w, k, sf) {
const method = this.config.method;
if ((method == 3 || method == 4) && w.slice(w.length - 1, w.length) == "\\")
return [1, k.charCodeAt(0)];
// Debug logging for problematic cases
const isDebug = (w === "ye" || w === "tuye") && (k === "j" || k === "s");
if (isDebug) {
console.log(`\n=== DEBUG findC for "${w}" + "${k}" ===`);
}
let str = "";
let res;
let cc = "";
let pc = "";
let tE = "";
const vowA = [];
const s = "ÂĂÊÔƠƯêâăơôư";
let c = 0;
let dn = false;
const uw = this.up(w);
let tv;
let g;
const DAWEOFA = this.up(this.aA.join() +
this.eA.join() +
this.mocA.join() +
this.trangA.join() +
this.oA.join() +
this.english);
let h;
let uc;
for (g = 0; g < sf.length; g++) {
if (this.nan(sf[g]))
str += sf[g];
else
str += this.fcc(sf[g]);
}
const uk = this.up(k);
const w2 = this.up(this.unV2(this.unV(w)));
const dont = "ƯA,ƯU".split(",");
if (this.DAWEO.indexOf(uk) >= 0) {
if (uk == this.moc) {
if (w2.indexOf("UU") >= 0 && this.tw5 != dont[1]) {
if (w2.indexOf("UU") == w.length - 2)
res = 2;
else
return false;
}
else if (w2.indexOf("UOU") >= 0) {
if (w2.indexOf("UOU") == w.length - 3)
res = 2;
else
return false;
}
}
if (!res) {
for (g = 1; g <= w.length; g++) {
cc = w.slice(w.length - g, w.length - g + 1);
pc = this.up(w.slice(w.length - g - 1, w.length - g));
uc = this.up(cc);
for (h = 0; h < dont.length; h++) {
if (this.tw5 == dont[h] && this.tw5 == this.unV(pc + uc))
dn = true;
}
if (dn) {
dn = false;
continue;
}
if (str.indexOf(uc) >= 0) {
if ((uk == this.moc &&
this.unV(uc) == "U" &&
this.up(this.unV(w.slice(w.length - g + 1, w.length - g + 2))) == "A") ||
(uk == this.trang && this.unV(uc) == "A" && this.unV(pc) == "U")) {
if (this.unV(uc) == "U")
tv = 1;
else
tv = 2;
const ccc = this.up(w.slice(w.length - g - tv, w.length - g - tv + 1));
if (ccc != "Q")
res = g + tv - 1;
else if (uk == this.trang)
res = g;
else if (this.moc != this.trang)
return false;
}
else {
res = g;
}
if (!this.whit || uw.indexOf("Ư") < 0 || uw.indexOf("W") < 0)
break;
}
else if (DAWEOFA.indexOf(uc) >= 0) {
if (uk == this.D) {
if (cc == "đ")
res = [g, "d"];
else if (cc == "Đ")
res = [g, "D"];
}
else {
res = this.DAWEOF(cc, uk, g);
}
if (res)
break;
}
}
}
}
if (uk != this.Z && this.DAWEO.indexOf(uk) < 0) {
const tEC = this.retKC(uk);
for (g = 0; g < tEC.length; g++)
tE += this.fcc(tEC[g]);
}
for (g = 1; g <= w.length; g++) {
if (this.DAWEO.indexOf(uk) < 0) {
cc = this.up(w.slice(w.length - g, w.length - g + 1));
pc = this.up(w.slice(w.length - g - 1, w.length - g));
if (str.indexOf(cc) >= 0) {
if (cc == "U") {
if (pc != "Q") {
c++;
vowA[vowA.length] = g;
}
}
else if (cc == "I") {
if (pc != "G" || c <= 0) {
c++;
vowA[vowA.length] = g;
}
}
else {
c++;
vowA[vowA.length] = g;
}
}
else if (uk != this.Z) {
const signs = this.repSign(k);
for (h = 0; h < signs.length; h++) {
if (signs[h] == w.charCodeAt(w.length - g)) {
if (this.ckspell(w, k))
return false;
return [g, this.retKC(uk)[h % 24]];
}
}
for (h = 0; h < this.retKC(uk).length; h++) {
if (this.retKC(uk)[h] == w.charCodeAt(w.length - g))
return [g, this.fcc(this.skey[h])];
}
}
}
}
if (uk != this.Z && typeof res != "object") {
if (this.ckspell(w, k))
return false;
}
if (this.DAWEO.indexOf(uk) < 0) {
for (g = 1; g <= w.length; g++) {
if (uk != this.Z &&
s.indexOf(w.slice(w.length - g, w.length - g + 1)) >= 0)
return g;
else if (tE.indexOf(w.slice(w.length - g, w.length - g + 1)) >= 0) {
for (h = 0; h < this.retKC(uk).length; h++) {
if (w.slice(w.length - g, w.length - g + 1).charCodeAt(0) ==
this.retKC(uk)[h])
return [g, this.fcc(this.skey[h])];
}
}
}
}
if (res)
return res;
if (isDebug) {
console.log(`Vowel count c = ${c}, vowA = [${vowA}]`);
console.log(`uk = "${uk}", this.Z = "${this.Z}"`);
}
if (c == 1 || uk == this.Z) {
if (isDebug)
console.log('Taking path: c == 1 || uk == this.Z, returning vowA[0] =', vowA[0]);
return vowA[0];
}
else if (c == 2) {
let v = 2;
if (w.slice(w.length - 1) == " ")
v = 3;
const ttt = this.up(w.slice(w.length - v, w.length));
if (isDebug) {
console.log(`Taking path: c == 2`);
console.log(`ttt = "${ttt}" (last ${v} chars of "${w}")`);
console.log(`oldAccent = ${this.config.oldAccent}`);
}
if (this.config.oldAccent == 0 &&
(ttt == "UY" || ttt == "OA" || ttt == "OE")) {
if (isDebug)
console.log('Taking UY/OA/OE rule, returning vowA[0] =', vowA[0]);
return vowA[0];
}
// For YE combinations, tone goes on E (second vowel)
if (this.config.oldAccent == 0 &&
ttt == "YE") {
if (isDebug)
console.log('Taking YE rule, returning vowA[1] =', vowA[1]);
return vowA[1];
}
let c2 = 0;
let fdconsonant;
const sc = "BCD" + this.fcc(272) + "GHKLMNPQRSTVX";
const dc = "CH,GI,KH,NGH,GH,NG,NH,PH,QU,TH,TR".split(",");
for (h = 1; h <= w.length; h++) {
fdconsonant = false;
for (g = 0; g < dc.length; g++) {
if (this.up(w.slice(w.length - h - dc[g].length + 1, w.length - h + 1)).indexOf(dc[g]) >= 0) {
c2++;
fdconsonant = true;
if (dc[g] != "NGH")
h++;
else
h += 2;
}
}
if (!fdconsonant) {
if (sc.indexOf(this.up(w.slice(w.length - h, w.length - h + 1))) >= 0)
c2++;
else
break;
}
}
if (isDebug) {
console.log(`Consonant count c2 = ${c2}`);
}
if (c2 == 1 || c2 == 2) {
if (isDebug)
console.log('Taking c2 == 1 || c2 == 2, returning vowA[0] =', vowA[0]);
return vowA[0];
}
else {
if (isDebug)
console.log('Taking else branch, returning vowA[1] =', vowA[1]);
return vowA[1];
}
}
else if (c == 3) {
if (isDebug)
console.log('Taking path: c == 3, returning vowA[1] =', vowA[1]);
return vowA[1];
}
else {
if (isDebug)
console.log('Taking path: else, returning false');
return false;
}
}
/**
* Transform character at located position according to mapping tables.
*/
tr(k, w, by, sf) {
const pos = this.findC(w, k, sf);
if (pos) {
if (Array.isArray(pos) && pos[1]) {
const p0 = pos[0];
const repl = typeof pos[1] === "number"
? this.fcc(pos[1])
: pos[1];
return w.slice(0, w.length - p0) + repl + w.slice(w.length - p0 + 1);
}
else {
const pC = w.slice(w.length - pos, w.length - pos + 1);
const r = sf;
for (let g = 0; g < r.length; g++) {
let cmp;
if (this.nan(r[g]) || r[g] == "e")
cmp = pC;
else
cmp = pC.charCodeAt(0);
if (cmp == r[g]) {
let c;
if (!this.nan(by[g]))
c = by[g];
else
c = by[g].charCodeAt(0);
return (w.slice(0, w.length - pos) +
this.fcc(c) +
w.slice(w.length - pos + 1));
}
}
}
}
return w;
}
/**
* Return Unicode code point to replace character at pos with tone k.
*/
retUni(w, k, pos) {
const u = this.retKC(this.up(k));
let uC = 0, lC = 0;
const c = w.charCodeAt(w.length - pos);
const t = this.fcc(c);
for (let a = 0; a < this.skey.length; a++) {
if (this.skey[a] == c) {
if (a < 12) {
lC = a;
uC = a + 12;
}
else {
lC = a - 12;
uC = a;
}
if (t != this.up(t))
return u[lC];
return u[uC];
}
}
return c;
}
/**
* Single replacement: apply tone or diacritic for one letter.
*/
sr(w, k) {
const sf = this.getSF();
const pos = this.findC(w, k, sf);
if (pos) {
if (Array.isArray(pos) && pos[1]) {
const p0 = pos[0];
const repl = typeof pos[1] === "number"
? this.fcc(pos[1])
: pos[1];
return w.slice(0, w.length - p0) + repl + w.slice(w.length - p0 + 1);
}
else {
const c = this.retUni(w, k, pos);
return (w.slice(0, w.length - pos) +
this.fcc(c) +
w.slice(w.length - pos + 1));
}
}
return w;
}
/** Determine which tone key (S/F/R/X/J) is currently applied in word w */
detectToneKey(w) {
const keys = [this.S, this.F, this.R, this.X, this.J];
for (const key of keys) {
const codes = this.retKC(key);
for (let i = 0; i < w.length; i++) {
const code = w.charCodeAt(i);
for (let j = 0; j < codes.length; j++) {
if (codes[j] === code)
return key;
}
}
}
return null;
}
/** Normalize tone placement within the last word by reapplying the detected tone
* after removing existing tone marks. This leverages the original findC logic,
* avoiding any hard-coded triphthong lists.
*/
normalizeTonePlacement(input) {
// Determine the last word boundaries (simple whitespace split)
let idx = input.length - 1;
while (idx >= 0 && input[idx] !== ' ' && input[idx] !== '\n' && input[idx] !== '\t')
idx--;
const start = idx + 1;
const prefix = input.slice(0, start);
const word = input.slice(start);
if (!word)
return input;
const toneKey = this.detectToneKey(word);
if (!toneKey)
return input;
// Remove tone marks and reapply using sr so findC can choose correct vowel
const withoutTone = this.unV(word);
const reapplied = this.sr(withoutTone, toneKey);
return prefix + reapplied;
}
/**
* Configure method-specific keys and markers for TELEX/VNI/VIQR variants.
*/
setupForMethod(a) {
const method = this.config.method;
if (method == 2 || (method == 0 && a[0] == "9")) {
this.DAWEO = "6789";
this.SFJRX = "12534";
this.S = "1";
this.F = "2";
this.J = "5";
this.R = "3";
this.X = "4";
this.Z = "0";
this.D = "9";
this.moc = "7";
this.trang = "8";
this.A = "^";
this.E = "^";
this.O = "^";
}
else if (method == 3 || (method == 0 && a[4] == "+")) {
this.DAWEO = "^+(D";
this.SFJRX = "'`.?~";
this.S = "'";
this.F = "`";
this.J = ".";
this.R = "?";
this.X = "~";
this.Z = "-";
this.D = "D";
this.moc = "+";
this.trang = "(";
this.A = "^";
this.E = "^";
this.O = "^";
}
else if (method == 4 || (method == 0 && a[4] == "*")) {
this.DAWEO = "^*(D";
this.SFJRX = "'`.?~";
this.S = "'";
this.F = "`";
this.J = ".";
this.R = "?";
this.X = "~";
this.Z = "-";
this.D = "D";
this.moc = "*";
this.trang = "(";
this.A = "^";
this.E = "^";
this.O = "^";
}
else {
this.SFJRX = "SFJRX";
this.DAWEO = "DAWEO";
this.D = "D";
this.S = "S";
this.F = "F";
this.J = "J";
this.R = "R";
this.X = "X";
this.Z = "Z";
this.trang = "W";
this.moc = "W";
this.A = "A";
this.E = "E";
this.O = "O";
}
}
/**
* Main AVIM transformation for a prefix w and typed key k under mapping a.
*/
main(w, k, a) {
const uk = this.up(k);
const bya = [
this.db1,
this.ab1,
this.eb1,
this.ob1,
this.mocb1,
this.trangb1,
];
const t = "d,D,a,A,a,A,o,O,u,U,e,E,o,O".split(",");
const sfa = [
this.ds1,
this.as1,
this.es1,
this.os1,
this.mocs1,
this.trangs1,
];
let by = [];
let sf = [];
this.setupForMethod(a);
let got = false;
if (this.SFJRX.indexOf(uk) >= 0) {
const ret = this.sr(w, k);
got = true;
if (ret !== w)
return ret;
}
else if (uk == this.Z) {
sf = this.repSign(null);
for (let h = 0; h < this.english.length; h++) {
sf[sf.length] = this.lowen.charCodeAt(h);
sf[sf.length] = this.english.charCodeAt(h);
}
for (let h = 0; h < 5; h++) {
for (let g = 0; g < this.skey.length; g++) {
by[by.length] = this.skey[g];
}
}
for (let h = 0; h < t.length; h++)
by[by.length] = t[h];
got = true;
}
else {
for (let h = 0; h < a.length; h++) {
if (a[h] == uk) {
got = true;
by = by.concat(bya[h]);
sf = sf.concat(sfa[h]);
}
}
}
if (uk == this.moc)
this.whit = true;
if (!got)
return w;
if (this.DAWEO.indexOf(uk) >= 0 || this.Z.indexOf(uk) >= 0)
return this.tr(k, w, by, sf);
return w;
}
/** Utility: does word contain any character from set? */
hasCharFromSet(word, set) {
for (const ch of set) {
if (word.indexOf(ch) >= 0)
return true;
}
return false;
}
/** Utility: does word contain any tone mark? */
hasTone(word) {
const all = [
...this.retKC("S"),
...this.retKC("F"),
...this.retKC("R"),
...this.retKC("X"),
...this.retKC("J"),
];
for (const code of all) {
if (word.indexOf(this.fcc(code)) >= 0)
return true;
}
return false;
}
/** Map an A/E/O/W/D key to its diacritic character set for toggle detection. */
diacriticSetForKey(keyUpper) {
if (keyUpper === "E")
return this.eb1;
if (keyUpper === "A")
return this.ab1;
if (keyUpper === "O")
return this.ob1;
if (keyUpper === "W")
return this.mocb1.concat(this.trangb1);
if (keyUpper === "D")
return ["đ", "Đ"];
return null;
}
// =====================
// Public API - word/keystroke processing
// =====================
/** Flush a collected word using a fresh engine (avoid cross-word state). */
flushWord(word) {
if (!word)
return "";
const engine = new AnvimEngine({ ...this.config });
return engine.processWord(word);
}
/** Process full text by splitting on spaces and flushing per word. */
processText(input) {
if (input.length === 0)
return input;
let out = "";
let word = "";
const flush = () => {
if (!word)
return;
out += this.flushWord(word);
word = "";
};
for (let i = 0; i < input.length; i++) {
const ch = input[i];
if (ch === " ") {
flush();
out += " ";
}
else {
word += ch;
}
}
flush();
return out;
}
/** Process a full word by simulating keystrokes for each character. */
processWord(word) {
if (!word || word.length === 0)
return word;
if (this.config.onOff === 0)
return word;
// Use the same character-by-character approach as anvim function
// but using processWithKey which has the correct logic
let result = '';
for (let i = 0; i < word.length; i++) {
result = this.processWithKey(result, word[i]);
}
return result;
}
/**
* Process a single keystroke applied to the current prefix (closer to AVIM's
* real-time behavior). This method also implements two ergonomic
* improvements:
* - Incremental uo + w => ươ composition preserving case.
* - Long-distance horn composition when uo is followed by consonants.
*
* These improvements are designed to be backward compatible with AVIM.
*/
processWithKey(prefix, key) {
if (this.config.onOff === 0)
return prefix + key;
const telex = "D,A,E,O,W,W".split(",");
const vni = "9,6,6,6,7,8".split(",");
const viqr = "D,^,^,^,+,(".split(",");
const viqr2 = "D,^,^,^,*,(".split(",");
let uni = [];
let uni2 = [];
let uni3 = [];
let uni4 = [];
if (this.config.method == 0) {
// AUTO
const arr = [];
const value1 = [telex, vni, viqr, viqr2];
for (let a = 0; a < value1.length; a++)
arr[arr.length] = value1[a];
for (let a = 0; a < arr.length; a++) {
if (a === 0)
uni = arr[a];
if (a === 1)
uni2 = arr[a];
if (a === 2)
uni3 = arr[a];
if (a === 3)
uni4 = arr[a];
}
}
else if (this.config.method == 1) {
uni = telex;
}
else if (this.config.method == 2) {
uni = vni;
}
else if (this.config.method == 3) {
uni = viqr;
}
else if (this.config.method == 4) {
uni = viqr2;
}
// Special incremental composition: uo + w/uow -> ươ (preserve case)
if (this.config.method === 1 || this.config.method === 0) {
const last2 = prefix.slice(-2);
if (/uo/i.test(last2) && key.toLowerCase() === "w") {
const u = last2[0];
const o = last2[1];
const isUpperU = u === u.toUpperCase();
const isUpperO = o === o.toUpperCase();
const composed = (isUpperU ? "Ư" : "ư") + (isUpperO ? "Ơ" : "ơ");
return prefix.slice(0, -2) + composed;
}
}
// Long-distance horn composition: if key is 'w' and word ends with consonants after 'uo', map to 'ươ'
if ((this.config.method === 1 || this.config.method === 0) &&
key.toLowerCase() === "w") {
// Define a simple Vietnamese vowel class
const vowelClass = /[aeiouyâăêôơưAEIOUYÂĂÊÔƠƯ]/;
// Find last 'uo' before trailing consonants
for (let i = prefix.length - 2; i >= 1; i--) {
if (prefix[i - 1].toLowerCase() === "u" &&
prefix[i].toLowerCase() === "o") {
// Ensure from i+1 to end there are no vowels (only consonants), so 'uo' is the last vowel cluster
let hasVowelAfter = false;
for (let j = i + 1; j < prefix.length; j++) {
if (vowelClass.test(prefix[j])) {
hasVowelAfter = true;
break;
}
}
if (!hasVowelAfter) {
const U = prefix[i - 1];
const O = prefix[i];
const isUUpper = U === U.toUpperCase();
const isOUpper = O === O.toUpperCase();
const composed = (isUUpper ? "Ư" : "ư") + (isOUpper ? "Ơ" : "ơ");
return (prefix.substring(0, i - 1) + composed + prefix.substring(i + 1));
}
}
}
}
// Call main with prefix (text before key), as in AVIM
const before = prefix;
let out = this.main(prefix, key, uni);
if (out !== before) {
const keyUpper = key.toUpperCase();
// Tone toggle-off: previously had tone, now removed -> append key (preserve case)
if ("SFJRX".indexOf(keyUpper) >= 0) {
if (this.hasTone(before) && !this.hasTone(out))
return out + key;
return out;
}
// Diacritic toggle-off: previously had respective diacritic, now removed -> append key (preserve case)
const set = this.diacriticSetForKey(keyUpper);
if (set) {
if (this.hasCharFromSet(before, set) && !this.hasCharFromSet(out, set))
return out + key;
return out;
}
return out;
}
if (this.config.method === 0) {
if (uni2.length) {
out = this.main(prefix, key, uni2);
if (out !== before) {
const keyUpper = key.toUpperCase();
if ("SFJRX".indexOf(keyUpper) >= 0) {
if (this.hasTone(before) && !this.hasTone(out))
return out + key;
return out;
}
const set = this.diacriticSetForKey(keyUpper);
if (set) {
if (this.hasCharFromSet(before, set) &&
!this.hasCharFromSet(out, set))
return out + key;
return out;
}
return out;
}
}
if (uni3.length) {
out = this.main(prefix, key, uni3);
if (out !== before) {
const keyUpper = key.toUpperCase();
if ("SFJRX".indexOf(keyUpper) >= 0) {
if (this.hasTone(before) && !this.hasTone(out))
return out + key;
return out;
}
const set = this.diacriticSetForKey(keyUpper);
if (set) {
if (this.hasCharFromSet(before, set) &&
!this.hasCharFromSet(out, set))
return out + key;
return out;
}
return out;
}
}
if (uni4.length) {
out = this.main(prefix, key, uni4);
if (out !== before) {
const keyUpper = key.toUpperCase();
if ("SFJRX".indexOf(keyUpper) >= 0) {
if (this.hasTone(before) && !this.hasTone(out))
return out + key;
return out;
}
const set = this.diacriticSetForKey(keyUpper);
if (set) {
if (this.hasCharFromSet(before, set) &&
!this.hasCharFromSet(out, set))
return out + key;
return out;
}
return out;
}
}
}
// If no change, decide whether to append key literally
const keyUpper = key.toUpperCase();
const diacriticSet = this.diacriticSetForKey(keyUpper);
if (diacriticSet) {
// appending diacritic when no transformation should just add the literal letter (preserve case)
return prefix + key;
}
// tone markers that didn't transform should append literally (preserve case)
if ("SFJRX".indexOf(keyUpper) >= 0)
return prefix + key;
return prefix + key;
}
// API
setMethod(method) {
this.config.method = method;
this.config.onOff = method === -1 ? 0 : 1;
}
setMethodByString(method) {
const methodMap = {
OFF: -1,
AUTO: 0,
TELEX: 1,
VNI: 2,
VIQR: 3,
"VIQR*": 4,
};
this.setMethod(methodMap[method] ?? 1);
}
setEnabled(enabled) {
this.config.onOff = enabled ? 1 : 0;
}
getMethod() {
return this.config.method;
}
getMethodString() {
const map = {
[-1]: "OFF",
[0]: "AUTO",
[1]: "TELEX",
[2]: "VNI",
[3]: "VIQR",
[4]: "VIQR*",
};
return map[this.config.method] ?? "TELEX";
}
isEnabled() {
return this.config.onOff === 1;
}
}