UNPKG

cldr

Version:

Library for extracting data from CLDR (the Unicode Common Locale Data Repository)

137 lines (121 loc) 2.94 kB
<?xml version="1.0" encoding="UTF-8" ?> <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd"> <!-- Copyright © 1991-2017 Unicode, Inc. CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) For terms of use, see http://www.unicode.org/copyright.html --> <supplementalData> <version number="$Revision$"/> <transforms> <transform source="fa" target="fa_FONIPA" direction="forward" alias="fa-fonipa-t-fa"> <tRule><![CDATA[ [\u200c \u200d] → ; # Strip off ZWJ and ZWNJ. ::NFD; # Rewrite similarly-looking Arabic letters to Persian. ي → ی; ى → ی; ك → ک; ە → ه; ::NULL; $VOWEL = [ َ ِ ُ ٓ ا و ی]; $BOUNDARY = [^[:L:][:M:][:N:]]; $IPA_CONSONANT = [ m n p b t d k ɡ ʔ f v s z ʃ ʒ ʁ ɢ h χ {t͡ʃ} {d͡ʒ} l ɾ ]; # Vowels یّ → jj; وّ → vv; ([ َ ِ ُ])ّ → ّ | $1; َیْ → æj; ِی → ej; یوْ → iːv; {یو} ه $BOUNDARY → iːv; {یو} هٔ $BOUNDARY → iːv; یو → juː; َوْ → av; # Hamza forms ء → ʔ; ا َ ٔ → ʔæ; # Needed because NFD reorders fatha before hamza ا ٔ → ʔ; و ٔ → ʔ; ی ْ ٔ → ʔ; # Needed because NFD reorders sukun before hamza ی ِ ٔ → ʔe; # Needed because NFD reorders kasra before hamza ی ٔ → ʔ; { َ ه} $BOUNDARY → æ; [^ːeoæ] {هٔ} $BOUNDARY → eje; [e] {هٔ} $BOUNDARY → je; [^ːeoæ] {ه} $BOUNDARY → e; [e] {ه} $BOUNDARY → ; اَ → æ; اً $BOUNDARY → æn; َ → æ; یه → je; یٰ → ɒː; $IPA_CONSONANT {وی} $VOWEL → uːj; # If yeh is preceded by a consonant and followed by a vowel, # it's pronounced /iːj/, but a sukun breaks that and makes it # be pronounced just as /j/. $IPA_CONSONANT {\u0652 یو} → juː; $IPA_CONSONANT {\u0652 ی} $VOWEL → j; $IPA_CONSONANT {ی} $VOWEL → iːj; {ی} $VOWEL → j; ی ْ → j; ی → iː; $BOUNDARY {ای} → iː; ا\u0653 → ɒː; آ → ɒː; اِ → e; $BOUNDARY {اُو} → o; اُ → o; $BOUNDARY {او} → uː; او → ɒːv; ا → ɒː; # Probably [^$BOUNDARY] ِ → e; ه ِ ّ → hhe; # Needed because NFD moves kasra before shadda هِ → he; خوا → χɒː; خوی → χiː; {و} $VOWEL → v; {و} ه $BOUNDARY → v; {و} هٔ $BOUNDARY → v; $IPA_CONSONANT {و} → uː; $IPA_CONSONANT \u0651 {و} → uː; # shadda after a consonant ُ{و} $IPA_CONSONANT → uː; $BOUNDARY {و} $BOUNDARY → va; { ُو} $VOWEL → ov; ُ و ٔ → oʔ; ُو → o; ُ → o; # Consonants پ → p; ب → b; [ت ط] → t; د → d; ک → k; گ → ɡ; ع → ʔ; چ → t͡ʃ; ج → d͡ʒ; ف → f; [س ص ث] → s; [ز ذ ض ظ] → z; ش → ʃ; ژ → ʒ; خ → χ; غ → ʁ; ق → ɢ; ح → h; م → m; ن → n; ه → h; ل → l; ر → ɾ; ْ → ; ::NULL; # TODO: How to handle these? ([$IPA_CONSONANT|$VOWEL]){ّ} → $1; [ ّ ٔ ً ٰ ] → ; ::NFC; ]]></tRule> </transform> </transforms> </supplementalData>