arabicfmt

Version:

Arabic-first formatting for numbers, currency, dates and bidirectional text across all 22 Arab League countries — with correct handling of the 2025–2026 Unicode currency-symbol transition (Saudi Riyal U+20C1, UAE Dirham U+20C3, Omani Rial U+20C4).

github.com/cc1a2b/arabicfmt

cc1a2b/arabicfmt

182 lines (174 loc) • 7.55 kB

text/typescript

import { N as NumeralSystem } from '../types-CK7PVYeU.cjs'; /** Remove Arabic diacritics (tashkeel / harakat). */ declare function stripTashkeel(text: string): string; /** Remove the tatweel / kashida elongation character (ـ). */ declare function removeTatweel(text: string): string; /** Unify alef variants (آ أ إ ٱ) to a bare alef (ا). */ declare function normalizeAlef(text: string): string; /** Convert alef maksura (ى) to yaa (ي). */ declare function normalizeAlefMaksura(text: string): string; /** Convert taa marbuta (ة) to haa (ه). Changes meaning — use for search only. */ declare function normalizeTaaMarbuta(text: string): string; interface NormalizeOptions { /** Strip diacritics. Default `true`. */ tashkeel?: boolean; /** Remove tatweel / kashida. Default `true`. */ tatweel?: boolean; /** Unify alef variants to ا. Default `true`. */ alef?: boolean; /** Convert alef maksura ى to ي. Default `true`. */ alefMaksura?: boolean; /** Convert taa marbuta ة to ه. Default `false`. */ taaMarbuta?: boolean; /** Fold hamza carriers (ؤ→و, ئ→ي) and drop standalone ء. Default `false`. */ hamza?: boolean; /** Convert Arabic-Indic digits to Western. Default `false`. */ digits?: boolean; } /** * Normalize Arabic text for search and comparison. By default it strips * diacritics and tatweel and unifies alef and alef-maksura — the safe set that * doesn't change a word's identity. Enable {@link NormalizeOptions.taaMarbuta} * and {@link NormalizeOptions.hamza} for more aggressive folding. */ declare function normalizeArabic(text: string, options?: NormalizeOptions): string; /** * Aggressive normalization preset for building search keys: applies every fold * (including taa marbuta and hamza), converts digits, lower-cases embedded Latin * text and collapses whitespace. */ declare function normalizeForSearch(text: string): string; /** * Arabic plural forms follow CLDR's six-form system, which is unique among * world languages. Standard libraries (i18next, formatjs) handle this via * locale data, but getting it right for Arabic requires care. * * CLDR plural rule for Arabic (integer n): * zero: n = 0 * one: n = 1 * two: n = 2 * few: n mod 100 in 3..10 * many: n mod 100 in 11..99 * other: everything else (100, 200, 1000, fractions, ...) */ type ArabicPluralForm = "zero" | "one" | "two" | "few" | "many" | "other"; /** * Return the CLDR plural form for an Arabic count. * * @example arabicPluralForm(0) // "zero" * @example arabicPluralForm(1) // "one" * @example arabicPluralForm(2) // "two" * @example arabicPluralForm(5) // "few" (3–10) * @example arabicPluralForm(15) // "many" (11–99) * @example arabicPluralForm(100) // "other" */ declare function arabicPluralForm(n: number): ArabicPluralForm; /** All six plural forms. Use as a key type for plural tables. */ interface ArabicPluralForms<T = string> { zero?: T; one: T; two?: T; few: T; many: T; other: T; } /** * Select the correct Arabic plural form from a set of strings. * * @example * arabicPlural(5, { one: "كتاب", two: "كتابان", few: "كتب", many: "كتاباً", other: "كتاب" }) * // "كتب" */ declare function arabicPlural(n: number, forms: ArabicPluralForms): string; /** * Arabic-locale-aware string comparison and collation. * * Arabic sorting follows different rules from default Unicode code-point order: * - Hamza and alef variants should sort together. * - Tashkeel (diacritics) should be ignored when sorting. * - `Intl.Collator` with `"ar"` handles all of this natively. */ /** * Compare two strings using Arabic locale collation. Drop-in replacement for * the `compareFunction` argument to `Array.prototype.sort`. * * @example ["ب", "أ", "ت"].sort(compareArabic) // ["أ", "ب", "ت"] */ declare function compareArabic(a: string, b: string): number; /** * Create an `Intl.Collator` configured for Arabic with sensible defaults. * Override any option via `options`. * * @example * const col = createArabicCollator({ sensitivity: "variant" }); * names.sort((a, b) => col.compare(a, b)); */ declare function createArabicCollator(options?: Intl.CollatorOptions): Intl.Collator; /** * Sort an array of strings using Arabic collation. Returns a new sorted array. * * @example sortArabic(["ياسر", "أحمد", "بسام"]) // ["أحمد", "بسام", "ياسر"] */ declare function sortArabic(strings: readonly string[]): string[]; /** * Arabic list formatting — join items into a grammatical Arabic list * ("أحمد ومحمد وعلي", "تفاح أو موز أو برتقال"). * * Wraps `Intl.ListFormat` where available and degrades to a hand-rolled join * (using the Arabic conjunction و / disjunction أو) on runtimes without it. */ interface FormatListOptions { /** BCP-47 locale. Default `"ar"`. */ locale?: string; /** * - `"conjunction"` — "و" (and). *(default)* * - `"disjunction"` — "أو" (or). * - `"unit"` — list of measurements, no conjunction. */ type?: "conjunction" | "disjunction" | "unit"; /** Width of the connector. Default `"long"`. */ style?: "long" | "short" | "narrow"; /** Shape any digits in the result. Default leaves them as-is. */ numerals?: NumeralSystem; } /** * Format an iterable of values as an Arabic list. * * @example formatList(["أحمد", "محمد", "علي"]) // "أحمد ومحمد وعلي" * @example formatList(["تفاح", "موز"], { type: "disjunction" }) // "تفاح أو موز" * @example formatList([1, 2, 3], { numerals: "arab" }) // "١ و٢ و٣" */ declare function formatList(items: Iterable<string | number>, options?: FormatListOptions): string; /** * Romanize Arabic script into readable Latin text, and turn it into URL slugs. * * This is a deterministic, pragmatic scheme (not a strict academic * transliteration): consonants and short vowels map to their common Latin * equivalents, shadda doubles the preceding consonant, and Arabic-Indic digits * become Western digits. Short vowels only appear when the input is vowelled * (carries tashkeel) — bare text romanizes consonant-only, e.g. "محمد" → "mhmd", * while "مُحَمَّد" → "muhammad". */ /** * Romanize Arabic script to Latin text. * * @example transliterate("مُحَمَّد") // "muhammad" * @example transliterate("القاهرة") // "alqahrh" * @example transliterate("غرفة ٢٠١") // "ghrfh 201" */ declare function transliterate(text: string): string; interface SlugifyOptions { /** Word separator. Default `"-"`. */ separator?: string; /** Lower-case the result. Default `true`. */ lowercase?: boolean; } /** * Turn Arabic (or mixed) text into a URL-safe slug. * * @example slugify("مدينة نصر") // "mdynh-nsr" * @example slugify("القاهرة 2026") // "alqahrh-2026" * @example slugify("Hello العالم", { separator: "_" }) // "hello_alaalm" */ declare function slugify(text: string, options?: SlugifyOptions): string; export { type ArabicPluralForm, type ArabicPluralForms, type FormatListOptions, type NormalizeOptions, type SlugifyOptions, arabicPlural, arabicPluralForm, compareArabic, createArabicCollator, formatList, normalizeAlef, normalizeAlefMaksura, normalizeArabic, normalizeForSearch, normalizeTaaMarbuta, removeTatweel, slugify, sortArabic, stripTashkeel, transliterate };