arabicfmt
Version:
Arabic-first formatting for numbers, currency, dates and bidirectional text across all 22 Arab League countries — with correct handling of the 2025–2026 Unicode currency-symbol transition (Saudi Riyal U+20C1, UAE Dirham U+20C3, Omani Rial U+20C4).
182 lines (174 loc) • 7.55 kB
text/typescript
import { N as NumeralSystem } from '../types-CK7PVYeU.cjs';
/** Remove Arabic diacritics (tashkeel / harakat). */
declare function stripTashkeel(text: string): string;
/** Remove the tatweel / kashida elongation character (ـ). */
declare function removeTatweel(text: string): string;
/** Unify alef variants (آ أ إ ٱ) to a bare alef (ا). */
declare function normalizeAlef(text: string): string;
/** Convert alef maksura (ى) to yaa (ي). */
declare function normalizeAlefMaksura(text: string): string;
/** Convert taa marbuta (ة) to haa (ه). Changes meaning — use for search only. */
declare function normalizeTaaMarbuta(text: string): string;
interface NormalizeOptions {
/** Strip diacritics. Default `true`. */
tashkeel?: boolean;
/** Remove tatweel / kashida. Default `true`. */
tatweel?: boolean;
/** Unify alef variants to ا. Default `true`. */
alef?: boolean;
/** Convert alef maksura ى to ي. Default `true`. */
alefMaksura?: boolean;
/** Convert taa marbuta ة to ه. Default `false`. */
taaMarbuta?: boolean;
/** Fold hamza carriers (ؤ→و, ئ→ي) and drop standalone ء. Default `false`. */
hamza?: boolean;
/** Convert Arabic-Indic digits to Western. Default `false`. */
digits?: boolean;
}
/**
* Normalize Arabic text for search and comparison. By default it strips
* diacritics and tatweel and unifies alef and alef-maksura — the safe set that
* doesn't change a word's identity. Enable {@link NormalizeOptions.taaMarbuta}
* and {@link NormalizeOptions.hamza} for more aggressive folding.
*/
declare function normalizeArabic(text: string, options?: NormalizeOptions): string;
/**
* Aggressive normalization preset for building search keys: applies every fold
* (including taa marbuta and hamza), converts digits, lower-cases embedded Latin
* text and collapses whitespace.
*/
declare function normalizeForSearch(text: string): string;
/**
* Arabic plural forms follow CLDR's six-form system, which is unique among
* world languages. Standard libraries (i18next, formatjs) handle this via
* locale data, but getting it right for Arabic requires care.
*
* CLDR plural rule for Arabic (integer n):
* zero: n = 0
* one: n = 1
* two: n = 2
* few: n mod 100 in 3..10
* many: n mod 100 in 11..99
* other: everything else (100, 200, 1000, fractions, ...)
*/
type ArabicPluralForm = "zero" | "one" | "two" | "few" | "many" | "other";
/**
* Return the CLDR plural form for an Arabic count.
*
* @example arabicPluralForm(0) // "zero"
* @example arabicPluralForm(1) // "one"
* @example arabicPluralForm(2) // "two"
* @example arabicPluralForm(5) // "few" (3–10)
* @example arabicPluralForm(15) // "many" (11–99)
* @example arabicPluralForm(100) // "other"
*/
declare function arabicPluralForm(n: number): ArabicPluralForm;
/** All six plural forms. Use as a key type for plural tables. */
interface ArabicPluralForms<T = string> {
zero?: T;
one: T;
two?: T;
few: T;
many: T;
other: T;
}
/**
* Select the correct Arabic plural form from a set of strings.
*
* @example
* arabicPlural(5, { one: "كتاب", two: "كتابان", few: "كتب", many: "كتاباً", other: "كتاب" })
* // "كتب"
*/
declare function arabicPlural(n: number, forms: ArabicPluralForms): string;
/**
* Arabic-locale-aware string comparison and collation.
*
* Arabic sorting follows different rules from default Unicode code-point order:
* - Hamza and alef variants should sort together.
* - Tashkeel (diacritics) should be ignored when sorting.
* - `Intl.Collator` with `"ar"` handles all of this natively.
*/
/**
* Compare two strings using Arabic locale collation. Drop-in replacement for
* the `compareFunction` argument to `Array.prototype.sort`.
*
* @example ["ب", "أ", "ت"].sort(compareArabic) // ["أ", "ب", "ت"]
*/
declare function compareArabic(a: string, b: string): number;
/**
* Create an `Intl.Collator` configured for Arabic with sensible defaults.
* Override any option via `options`.
*
* @example
* const col = createArabicCollator({ sensitivity: "variant" });
* names.sort((a, b) => col.compare(a, b));
*/
declare function createArabicCollator(options?: Intl.CollatorOptions): Intl.Collator;
/**
* Sort an array of strings using Arabic collation. Returns a new sorted array.
*
* @example sortArabic(["ياسر", "أحمد", "بسام"]) // ["أحمد", "بسام", "ياسر"]
*/
declare function sortArabic(strings: readonly string[]): string[];
/**
* Arabic list formatting — join items into a grammatical Arabic list
* ("أحمد ومحمد وعلي", "تفاح أو موز أو برتقال").
*
* Wraps `Intl.ListFormat` where available and degrades to a hand-rolled join
* (using the Arabic conjunction و / disjunction أو) on runtimes without it.
*/
interface FormatListOptions {
/** BCP-47 locale. Default `"ar"`. */
locale?: string;
/**
* - `"conjunction"` — "و" (and). *(default)*
* - `"disjunction"` — "أو" (or).
* - `"unit"` — list of measurements, no conjunction.
*/
type?: "conjunction" | "disjunction" | "unit";
/** Width of the connector. Default `"long"`. */
style?: "long" | "short" | "narrow";
/** Shape any digits in the result. Default leaves them as-is. */
numerals?: NumeralSystem;
}
/**
* Format an iterable of values as an Arabic list.
*
* @example formatList(["أحمد", "محمد", "علي"]) // "أحمد ومحمد وعلي"
* @example formatList(["تفاح", "موز"], { type: "disjunction" }) // "تفاح أو موز"
* @example formatList([1, 2, 3], { numerals: "arab" }) // "١ و٢ و٣"
*/
declare function formatList(items: Iterable<string | number>, options?: FormatListOptions): string;
/**
* Romanize Arabic script into readable Latin text, and turn it into URL slugs.
*
* This is a deterministic, pragmatic scheme (not a strict academic
* transliteration): consonants and short vowels map to their common Latin
* equivalents, shadda doubles the preceding consonant, and Arabic-Indic digits
* become Western digits. Short vowels only appear when the input is vowelled
* (carries tashkeel) — bare text romanizes consonant-only, e.g. "محمد" → "mhmd",
* while "مُحَمَّد" → "muhammad".
*/
/**
* Romanize Arabic script to Latin text.
*
* @example transliterate("مُحَمَّد") // "muhammad"
* @example transliterate("القاهرة") // "alqahrh"
* @example transliterate("غرفة ٢٠١") // "ghrfh 201"
*/
declare function transliterate(text: string): string;
interface SlugifyOptions {
/** Word separator. Default `"-"`. */
separator?: string;
/** Lower-case the result. Default `true`. */
lowercase?: boolean;
}
/**
* Turn Arabic (or mixed) text into a URL-safe slug.
*
* @example slugify("مدينة نصر") // "mdynh-nsr"
* @example slugify("القاهرة 2026") // "alqahrh-2026"
* @example slugify("Hello العالم", { separator: "_" }) // "hello_alaalm"
*/
declare function slugify(text: string, options?: SlugifyOptions): string;
export { type ArabicPluralForm, type ArabicPluralForms, type FormatListOptions, type NormalizeOptions, type SlugifyOptions, arabicPlural, arabicPluralForm, compareArabic, createArabicCollator, formatList, normalizeAlef, normalizeAlefMaksura, normalizeArabic, normalizeForSearch, normalizeTaaMarbuta, removeTatweel, slugify, sortArabic, stripTashkeel, transliterate };