@ahmed5938/sorani-helper
Version:
TypeScript library for processing Central Kurdish (Sorani) text: Arabic-to-Kurdish conversion, keyboard layout mapping, validation, and input handling.
177 lines (176 loc) • 5.83 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.EMOJI_REGEX = exports.NON_KURDISH_SCRIPTS = exports.ARABIC_VARIANTS_TO_REJECT = exports.ENGLISH_TO_KURDISH_LAYOUT = exports.ARABIC_TO_KURDISH_MAP = exports.ARABIC_TO_KURDISH_REPLACEMENTS = exports.KURDISH_SORANI_33_LETTERS = void 0;
// ==================== Central Kurdish Alphabet ====================
// Standard 33-letter Central Kurdish (Sorani) alphabet, approved by KRG
exports.KURDISH_SORANI_33_LETTERS = "ئابپتجچحخدرڕزژسشعغفڤقکگلڵمنوۆهەیێ";
// ==================== Arabic → Kurdish Conversion Mappings ====================
// The order of replacements is important:
// - Multi-character patterns (diacritics or ligatures) must be processed first.
// - Then single-character substitutions are applied.
exports.ARABIC_TO_KURDISH_REPLACEMENTS = [
// ---------- MULTI-CHARACTER COMBINATIONS (process first) ----------
[/لآ/g, "ڵا"], // "La" + alif madda → Kurdish "ڵا" (lla)
[/لاَ/g, "ڵا"], // "La" + alif with fatha → Kurdish "ڵا" (lla)
[/لَ/g, "ڵ"], // Lam with fatha → Kurdish retroflex "ڵ"
[/وَ/g, "ۆ"], // Waw with fatha → Kurdish "ۆ" (o)
[/یَ/g, "ێ"], // Yeh with fatha → Kurdish "ێ" (ê)
[/ىَ/g, "ێ"], // Alif maqṣūra with fatha → Kurdish "ێ"
[/رِ/g, "ڕ"], // Ra with kasra → Kurdish "ڕ" (voiced retroflex r)
// ---------- SINGLE CHARACTER MAPPINGS ----------
// Arabic letters and their standardized Kurdish equivalents
[/ك/g, "ک"], // Arabic Kaf → Kurdish Kaf
[/ي/g, "ی"], // Arabic Yeh → Kurdish Yeh
[/ى/g, "ی"], // Alif maqṣūra → Kurdish Yeh
[/ة/g, "ه"], // Tā’ marbūṭa → Heh + ZWNJ (KRG standard form)
[/أ/g, "ا"], // Alif with Hamza above → Alif
[/إ/g, "ا"], // Alif with Hamza below → Alif
[/آ/g, "ێ"], // Alif with madda → Kurdish Ê
[/ؤ/g, "ۆ"], // Waw with Hamza → Kurdish O
[/ھ/g, "ه"], // Heh Doachashmee → Standard Heh (U+0647)
[/ذ/g, "ژ"], // Thal → Kurdish Zhe
[/ث/g, "پ"], // Tha → Kurdish Pe (approximation)
[/ط/g, "گ"], // Ta → Kurdish Gaf (approximation for Kurdish orthography)
[/ض/g, "چ"], // Dad → Kurdish Che (approximation)
[/ظ/g, "ڤ"], // Za → Kurdish Ve
[/ء/g, "و"], // Hamza → ZWNJ + Waw
[/'/g, ""], // ASCII apostrophe → Zero Width Non-Joiner (ZWNJ)
// ---------- CHARACTERS THAT STAY UNCHANGED ----------
[/ص/g, "ص"], // Arabic Ṣād remains identical in Kurdish
];
// ==================== Legacy Simple Mapping (Deprecated) ====================
// Maintained for backward compatibility.
// Use ARABIC_TO_KURDISH_REPLACEMENTS for accurate transformations.
exports.ARABIC_TO_KURDISH_MAP = {
ك: "ک",
ي: "ی",
ى: "ی",
ة: "ه", // Simplified: uses plain Heh (without ZWNJ)
أ: "ا",
إ: "ا",
آ: "ا", // Simplified to plain Alif (no Ê)
ؤ: "و", // Simplified to Waw
ھ: "ه",
};
// ==================== English → Kurdish Keyboard Layout ====================
// Complete Central Kurdish (Sorani) keyboard layout for Windows/Linux
// Based on the Unicode CLDR and official Windows KBDKURD layout
// Reference: https://www.unicode.org/cldr/charts/40/keyboards/layouts/ckb.html
exports.ENGLISH_TO_KURDISH_LAYOUT = {
// ---------- NUMBER ROW ----------
// Unshifted
"`": "",
"1": "1",
"2": "2",
"3": "3",
"4": "4",
"5": "5",
"6": "6",
"7": "7",
"8": "8",
"9": "9",
"0": "0",
"-": "-",
"=": "=",
// Shifted
"~": "~",
"!": "!",
"@": "@",
"#": "#",
"$": "$",
"%": "٪", // Arabic percent sign
"^": "^",
"&": "&",
"*": "*",
"(": ")", // Reversed order on keyboard
")": "(", // Reversed order on keyboard
"_": "_",
"+": "+",
// ---------- TOP LETTER ROW ----------
// Unshifted
q: "ق",
w: "و",
e: "ە",
r: "ر",
t: "ت",
y: "ی",
u: "ئ", // Hamza key
i: "ح",
o: "ۆ",
p: "پ",
"[": "]", // Reversed brackets
"]": "[",
"\\": "\\",
// Shifted
Q: "ٌ",
W: "وو",
E: "ێ",
R: "ڕ",
T: "ث",
Y: "ی",
U: "وو",
I: "ع",
O: "ۆ",
P: "پ",
"{": "}",
"}": "{",
"|": "|",
// ---------- HOME ROW ----------
// Unshifted
a: "ا",
s: "س",
d: "د",
f: "ف",
g: "گ",
h: "ه",
j: "ژ",
k: "ک",
l: "ل",
";": "؛",
"'": "ع",
// Shifted
A: "ئ",
S: "ش",
D: "ذ",
F: "ف",
G: "غ",
H: "ه",
J: "ژ",
K: "ک",
L: "ڵ",
":": ":",
'"': '"',
// ---------- BOTTOM ROW ----------
// Unshifted
z: "ز",
x: "خ",
c: "ج",
v: "ڤ",
b: "ب",
n: "ن",
m: "م",
",": "،",
".": ".",
"/": "\\",
// Shifted
Z: "ض",
X: "غ",
C: "چ",
V: "ڤ",
B: "ب",
N: "ن",
M: "م",
"<": ">",
">": "<",
"?": "؟",
// ---------- SPACEBAR ----------
" ": " ", // Space remains unchanged
};
// ==================== Validation & Rejection Patterns ====================
// Reject Arabic variants that differ from KRG Kurdish orthography
exports.ARABIC_VARIANTS_TO_REJECT = /[كيىةؤأإآھ]/u;
// Reject non-Kurdish scripts (Latin, Chinese, Hebrew, etc.)
exports.NON_KURDISH_SCRIPTS = /[A-Za-z\u4E00-\u9FFF\u0590-\u05FF\u0750-\u077F]/u;
// ==================== Emoji Pattern ====================
// Matches most common emojis and variation selectors
exports.EMOJI_REGEX = /[\u{1F300}-\u{1F9FF}]|[\u{2600}-\u{26FF}]|[\u{2700}-\u{27BF}]|[\u{FE00}-\u{FE0F}]|[\u{1F000}-\u{1F02F}]|[\u{1F0A0}-\u{1F0FF}]|[\u{1F100}-\u{1F64F}]|[\u{1F680}-\u{1F6FF}]|[\u{1F910}-\u{1F96B}]|[\u{1F980}-\u{1F9E0}]/gu;