UNPKG

larvitgeodata

Version:

Geo data, primarily ISO territories, languages etc. Data fetched mostly from CLDR.

241 lines (229 loc) 12.6 kB
<?xml version="1.0" encoding="UTF-8" ?> <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd"> <!-- Copyright © 1991-2013 Unicode, Inc. CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) For terms of use, see http://www.unicode.org/copyright.html --> <supplementalData> <version number="$Revision: 11914 $"/> <transforms> <transform source="Arabic" target="Latin" direction="forward" variant="BGN" draft="provisional"> <comment> ######################################################################## # BGN/PCGN 1956 System # # This system was adopted by the BGN in 1946 and by the PCGN # in 1956 and has been applied in the systematic romanization # of geographic names in Bahrain, Egypt, Iraq, Jordan, # Kuwait, Lebanon, Libya, Oman, Qatar, Saudi Arabia, Sudan, # Syria, Tunisia, the United Arab Emirates, and Yemen, all # of which has been covered by published BGN engineers. # # Originally prepared by Michael Everson &lt;everson@evertype.com&gt; ######################################################################## # # MINIMAL FILTER: Arabic-Latin # </comment> <tRule>:: [[:arabic:][:block=ARABIC:][ءآابةتثجحخدذرزسشصضطظعغفقكلمنهوىيًٌٍَُِّْ٠١٢٣٤٥٦٧٨٩ٱ]] ;</tRule> <tRule>:: NFKD (NFC) ;</tRule> <comment> # ######################################################################## </comment> <comment> ######################################################################## # # Define All Transformation Variables # ######################################################################## # </comment> <tRule>$alef = ’;</tRule> <tRule>$ayin = ‘;</tRule> <tRule>$disambig = ̱ ;</tRule> <comment> # # Use this $wordBoundary until bug 2034 is fixed in ICU: # http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest # </comment> <tRule>$wordBoundary = [^[:L:][:M:][:N:]] ;</tRule> <comment> # ######################################################################## </comment> <comment># non-letters</comment> <tRule>[:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR</tRule> <tRule>[:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR</tRule> <tRule>٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR</tRule> <tRule>٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR</tRule> <comment># ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate</comment> <tRule>، ↔ ',' ; # ARABIC COMMA</tRule> <tRule>؛ ↔ ';' ; # ARABIC SEMICOLON</tRule> <tRule>؟ ↔ '?' ; # ARABIC QUESTION MARK</tRule> <tRule>٪ ↔ '%' ; # ARABIC PERCENT SIGN</tRule> <tRule>۰ ↔ 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO</tRule> <tRule>۱ ↔ 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE</tRule> <tRule>۲ ↔ 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO</tRule> <tRule>۳ ↔ 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE</tRule> <tRule>۴ ↔ 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR</tRule> <tRule>۵ ↔ 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE</tRule> <tRule>۶ ↔ 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX</tRule> <tRule>۷ ↔ 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN</tRule> <tRule>۸ ↔ 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT</tRule> <tRule>۹ ↔ 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE</tRule> <tRule>٠ ↔ 0 ; # ARABIC-INDIC DIGIT ZERO</tRule> <tRule>١ ↔ 1 ; # ARABIC-INDIC DIGIT ONE</tRule> <tRule>٢ ↔ 2 ; # ARABIC-INDIC DIGIT TWO</tRule> <tRule>٣ ↔ 3 ; # ARABIC-INDIC DIGIT THREE</tRule> <tRule>٤ ↔ 4 ; # ARABIC-INDIC DIGIT FOUR</tRule> <tRule>٥ ↔ 5 ; # ARABIC-INDIC DIGIT FIVE</tRule> <tRule>٦ ↔ 6 ; # ARABIC-INDIC DIGIT SIX</tRule> <tRule>٧ ↔ 7 ; # ARABIC-INDIC DIGIT SEVEN</tRule> <tRule>٨ ↔ 8 ; # ARABIC-INDIC DIGIT EIGHT</tRule> <tRule>٩ ↔ 9 ; # ARABIC-INDIC DIGIT NINE</tRule> <comment> ######################################################################## # # Rules moved to front to avoid masking # ######################################################################## </comment> <comment> ######################################################################## # # BGN Page 8 Rule 5 # # The character sequences ت , كه , ته , and سه may be romanized t·h, k·h, # d·h, and s·h in order to differentiate those romanizations from the # digraphs th, kh, dh, and sh. # ######################################################################## # </comment> <tRule>ته → t·h ; # ARABIC LETTER TEH + HEH</tRule> <tRule>كه → k·h ; # ARABIC LETTER KAF + HEH</tRule> <tRule>ده → d·h ; # ARABIC LETTER DAL + HEH</tRule> <tRule>سه → s·h ; # ARABIC LETTER SEEN + HEH</tRule> <comment> # ######################################################################## # # End Rule 5 # ######################################################################## </comment> <comment>########################################################################</comment> <comment> # # BGN Page 8 Rule 9 # # Doubles consonant sounds are represented in Arabic script by placing # a shaddah ( ّ ) over a consonant character. In romanization the letter # should be doubled. [The remainder of this rule deals with the definite # article and is lexical.] # ######################################################################## # </comment> <tRule>بّ → bb ; # ARABIC LETTER BEH + SHADDA</tRule> <tRule>تّ → tt ; # ARABIC LETTER TEH + SHADDA</tRule> <tRule>ثّ → thth ; # ARABIC LETTER THEH + SHADDA</tRule> <tRule>جّ → jj ; # ARABIC LETTER JEEM + SHADDA</tRule> <tRule>حّ → ḥḥ ; # ARABIC LETTER HAH + SHADDA</tRule> <tRule>خّ → khkh ; # ARABIC LETTER KHAH + SHADDA</tRule> <tRule>دّ → dd ; # ARABIC LETTER DAL + SHADDA</tRule> <tRule>ذّ → dhdh ; # ARABIC LETTER THAL + SHADDA</tRule> <tRule>رّ → rr ; # ARABIC LETTER REH + SHADDA</tRule> <tRule>زّ → zz ; # ARABIC LETTER ZAIN + SHADDA</tRule> <tRule>سّ → ss ; # ARABIC LETTER SEEN + SHADDA</tRule> <tRule>شّ → shsh ; # ARABIC LETTER SHEEN + SHADDA</tRule> <tRule>صّ → ṣṣ ; # ARABIC LETTER SAD + SHADDA</tRule> <tRule>ضّ → ḍḍ ; # ARABIC LETTER DAD + SHADDA</tRule> <tRule>طّ → ṭṭ ; # ARABIC LETTER TAH + SHADDA</tRule> <tRule>ظّ → ẓẓ ; # ARABIC LETTER ZAH + SHADDA</tRule> <tRule>عّ → $ayin $ayin ; # ARABIC LETTER AIN + SHADDA</tRule> <tRule>غّ → ghgh ; # ARABIC LETTER GHAIN + SHADDA</tRule> <tRule>فّ → ff ; # ARABIC LETTER FEH + SHADDA</tRule> <tRule>قّ → qq ; # ARABIC LETTER QAF + SHADDA</tRule> <tRule>كّ → kk ; # ARABIC LETTER KAF + SHADDA</tRule> <tRule>لّ → ll ; # ARABIC LETTER LAM + SHADDA</tRule> <tRule>مّ → mm ; # ARABIC LETTER MEEM + SHADDA</tRule> <tRule>نّ → nn ; # ARABIC LETTER NOON + SHADDA</tRule> <tRule>هّ → hh ; # ARABIC LETTER HEH + SHADDA</tRule> <tRule>وّ → ww ; # ARABIC LETTER WAW + SHADDA</tRule> <tRule>ىّ → yy ; # ARABIC LETTER YEH + SHADDA</tRule> <comment> # ######################################################################## # # End Rule 9 # ######################################################################## </comment> <comment> ######################################################################## # # Start of Transformations # ######################################################################## # </comment> <tRule>$wordBoundary{ء → ; # ARABIC LETTER HAMZA</tRule> <tRule>ء → $alef ; # ARABIC LETTER HAMZA</tRule> <tRule>$wordBoundary{ا → ; # ARABIC LETTER ALEF</tRule> <tRule>ٱ → $alef ; # ARABIC LETTER ALEF WASLA</tRule> <tRule>$wordBoundary{آ → ā ; # ARABIC LETTER ALEF WITH MADDA ABOVE</tRule> <tRule>آ → $alef ā ; # ARABIC LETTER ALEF WITH MADDA ABOVE</tRule> <tRule>ب → b ; # ARABIC LETTER BEH</tRule> <tRule>ت → t ; # ARABIC LETTER TEH</tRule> <tRule>ة → h ; # ARABIC LETTER TEH MARBUTA</tRule> <tRule>ث → th ; # ARABIC LETTER THEH</tRule> <tRule>ج → j ; # ARABIC LETTER JEEM</tRule> <tRule>ح → ḩ ; # ARABIC LETTER HAH</tRule> <tRule>خ → kh ; # ARABIC LETTER KHAH</tRule> <tRule>د → d ; # ARABIC LETTER DAL</tRule> <tRule>ذ → dh ; # ARABIC LETTER THAL</tRule> <tRule>ر → r ; # ARABIC LETTER REH</tRule> <tRule>ز → z ; # ARABIC LETTER ZAIN</tRule> <tRule>س → s ; # ARABIC LETTER SEEN</tRule> <tRule>ش → sh ; # ARABIC LETTER SHEEN</tRule> <tRule>ص → ş ; # ARABIC LETTER SAD</tRule> <tRule>ض → ḑ ; # ARABIC LETTER DAD</tRule> <tRule>ط → ţ ; # ARABIC LETTER TAH</tRule> <tRule>ظ → z̧ ; # ARABIC LETTER ZAH</tRule> <tRule>ع → $ayin ; # ARABIC LETTER AIN</tRule> <tRule>غ → gh ; # ARABIC LETTER GHAIN</tRule> <tRule>ف → f ; # ARABIC LETTER FEH</tRule> <tRule>ق → q ; # ARABIC LETTER QAF</tRule> <tRule>ک ↔ k $disambig ; # ARABIC LETTER KEHEH</tRule> <tRule>ك ↔ k ; # ARABIC LETTER KAF</tRule> <tRule>ل → l ; # ARABIC LETTER LAM</tRule> <tRule>م → m ; # ARABIC LETTER MEEM</tRule> <tRule>ن → n ; # ARABIC LETTER NOON</tRule> <tRule>ه → h ; # ARABIC LETTER HEH</tRule> <tRule>و → w ; # ARABIC LETTER WAW</tRule> <tRule>ى → y ; # ARABIC LETTER YEH</tRule> <tRule>َا → ā ; # ARABIC FATHA + ALEF</tRule> <tRule>َى → á ; # ARABIC FATHA + ALEF MAKSURA</tRule> <tRule>َيْ → ay ; # ARABIC FATHA + YEH + SUKUN</tRule> <tRule>َوْ → aw ; # ARABIC FATHA + WAW + SUKUN</tRule> <tRule>َ → a ; # ARABIC FATHA</tRule> <tRule>ِي → ī ; # ARABIC KASRA + YEH</tRule> <tRule>ِ → i ; # ARABIC KASRA</tRule> <tRule>ُو → ū ; # ARABIC DAMMA + WAW</tRule> <tRule>ُ → u ; # ARABIC DAMMA</tRule> <tRule>ْ → ; # ARABIC SUKUN</tRule> <tRule>ً → aⁿ ; # ARABIC FATHATAN</tRule> <tRule>ٍ → iⁿ ; # ARABIC KASRATAN</tRule> <tRule>ٌ → uⁿ ; # ARABIC DAMMATAN</tRule> <tRule>::NFC (NFD) ;</tRule> <comment> # ######################################################################## </comment> </transform> </transforms> </supplementalData>