UNPKG

larvitgeodata

Version:

Geo data, primarily ISO territories, languages etc. Data fetched mostly from CLDR.

160 lines (158 loc) 9.53 kB
<?xml version="1.0" encoding="UTF-8" ?> <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd"> <!-- Copyright © 1991-2013 Unicode, Inc. CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) For terms of use, see http://www.unicode.org/copyright.html --> <supplementalData> <version number="$Revision: 11914 $"/> <transforms> <transform source="ThaiLogical" target="Latin" direction="both" visibility="internal"> <comment># Thai-Latin</comment> <comment># This set of rules follows ISO 11940</comment> <comment>#\u0009see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf</comment> <comment># except that that does not mention an implicit vowel, so we use ọ</comment> <comment>#</comment> <comment># The transcription is fairly ugly, so we ought to also do the UNGEGN version</comment> <comment>#\u0009see: http://www.eki.ee/wgrs/rom1_th.pdf</comment> <comment># and probably make that the main variant.</comment> <comment># Note: this is an internal file. The NFD/NFC is handled externally, in the index</comment> <comment># The insertion of spaces between words, the reversal of the vowels</comment> <comment># and the conversion of space to semicolon are done *outside* of these rules.</comment> <comment># So as far as these rules are concerned, the vowels are in logical order!</comment> <comment># insert implicit vowel (and remove it going the other way)</comment> <comment># COMMENTED out: the implicit vowel positions cannot be predicted algorithmically</comment> <comment>#$consonant = [ก-ฮ];</comment> <comment>#$vowel = [ะ-ฺเ-ไ็];</comment> <comment>#{ ( $consonant ) } [^$vowel \uE000] → | $1 \uE000 ;</comment> <comment>#\uE000 → ọ ;</comment> <comment># ← ọ ;</comment> <tRule>$notAbove = [^\p{ccc=0}\p{ccc=above}] ;</tRule> <tRule>$notBelow = [^\p{ccc=0}\p{ccc=below}] ;</tRule> <comment># Consonants</comment> <comment># Warning: the 'h's need to be handled carefully!</comment> <comment># What we really want to say is the following, but we can't</comment> <comment># $notHAccent = !($notAbove* ̄ | $notBelow* ̣) ;</comment> <comment># Since the only accents we care about that could cause problems are free-standing accents below, we use instead:</comment> <tRule>$freeStandingBelow = [̥ ];</tRule> <tRule>$hAccent = [ ̄ ̣];</tRule> <tRule>$notHAccent0 = [^$freeStandingBelow$hAccent];</tRule> <tRule>$notHAccent1 = $freeStandingBelow [^$hAccent];</tRule> <tRule>ห → h̄ ; # THAI CHARACTER HO HIP</tRule> <tRule>ห | $1 ← h ($notAbove*) ̄; # backward case, account for reordering</tRule> <tRule>ฮ ↔ ḥ ; # THAI CHARACTER HO NOKHUK</tRule> <tRule>ข ↔ k̄h ; # THAI CHARACTER KHO KHAI</tRule> <tRule>ฃ ↔ ḳ̄h ; # THAI CHARACTER KHO KHUAT</tRule> <tRule>ฅ ↔ kʹh ; # THAI CHARACTER KHO KHON</tRule> <tRule>ฆ ↔ ḳh ; # THAI CHARACTER KHO RAKHANG</tRule> <tRule>ค ← kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI</tRule> <tRule>ค ↔ kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI</tRule> <tRule>ก ↔ k ; # THAI CHARACTER KO KAI</tRule> <tRule>ภ ↔ p̣h ; # THAI CHARACTER PHO SAMPHAO</tRule> <tRule>ผ ↔ p̄h ; # THAI CHARACTER PHO PHUNG</tRule> <tRule>พ ← ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN</tRule> <tRule>พ ↔ ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN</tRule> <tRule>ป ↔ p ; # THAI CHARACTER PO PLA</tRule> <tRule>ฉ ↔ c̄h ; # THAI CHARACTER CHO CHING</tRule> <tRule>ฌ ↔ c̣h ; # THAI CHARACTER CHO CHOE</tRule> <tRule>ช ← ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG</tRule> <tRule>ช ↔ ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG</tRule> <tRule>จ ↔ c ; # THAI CHARACTER CHO CHAN</tRule> <tRule>ฐ ↔ ṭ̄h ; # THAI CHARACTER THO THAN</tRule> <tRule>ฑ ↔ ṯh ; # THAI CHARACTER THO NANGMONTHO</tRule> <tRule>ฒ ↔ tʹh ; # THAI CHARACTER THO PHUTHAO</tRule> <tRule>ถ ↔ t̄h ; # THAI CHARACTER THO THUNG</tRule> <tRule>ธ ↔ ṭh ; # THAI CHARACTER THO THONG</tRule> <tRule>ท ← th } $notHAccent1 ; # THAI CHARACTER THO THAHAN</tRule> <tRule>ท ↔ th } $notHAccent0 ; # THAI CHARACTER THO THAHAN</tRule> <comment>#Note: TO PATAK deviates from ISO since t-dotunder + h would be ambigous. So it uses vertical tick.</comment> <tRule>ฏ ↔ t̩ ; # THAI CHARACTER TO PATAK</tRule> <tRule>ต ↔ t ; # THAI CHARACTER TO TAO</tRule> <comment># since there is no singleton g (generated), don't worry about that.</comment> <tRule>ง ↔ ng ; # THAI CHARACTER NGO NGU</tRule> <tRule>ณ ↔ ṇ ; # THAI CHARACTER NO NEN</tRule> <tRule>น ↔ n ; # THAI CHARACTER NO NU</tRule> <tRule>ญ ↔ ỵ ; # THAI CHARACTER YO YING</tRule> <tRule>ฎ ↔ ḍ ; # THAI CHARACTER DO CHADA</tRule> <tRule>ด ↔ d ; # THAI CHARACTER DO DEK</tRule> <tRule>บ ↔ b ; # THAI CHARACTER BO BAIMAI</tRule> <tRule>ฝ ↔ f̄ ; # THAI CHARACTER FO FA</tRule> <tRule>ฝ | $1 ← f ($notAbove*) ̄; # backward case, account for reordering</tRule> <tRule>ม ↔ m ; # THAI CHARACTER MO MA</tRule> <tRule>ย ↔ y ; # THAI CHARACTER YO YAK</tRule> <tRule>ร ↔ r ; # THAI CHARACTER RO RUA</tRule> <tRule>ฤ ↔ v ; # THAI CHARACTER RU</tRule> <tRule>ฦ ↔ ł ; # THAI CHARACTER LU</tRule> <tRule>ว ↔ w ; # THAI CHARACTER WO WAEN</tRule> <tRule>ศ ↔ ṣ̄ ; # THAI CHARACTER SO SALA***</tRule> <tRule>ศ | $1 ← s ̣ ($notAbove*) ̄; # backward case, account for reordering</tRule> <tRule>ษ ↔ s̄ʹ ; # THAI CHARACTER SO RUSI</tRule> <tRule>ส → s̄ ; # THAI CHARACTER SO SUA***</tRule> <tRule>ส | $1 ← s ($notAbove*) ̄; # backward case, account for reordering</tRule> <tRule>ฬ ↔ ḷ ; # THAI CHARACTER LO CHULA</tRule> <tRule>ล ↔ l ; # THAI CHARACTER LO LING</tRule> <tRule>ฟ ↔ f ; # THAI CHARACTER FO FAN</tRule> <tRule>อ ↔ x ; # THAI CHARACTER O ANG</tRule> <tRule>ซ ↔ s ; # THAI CHARACTER SO SO</tRule> <comment># vowels</comment> <tRule>ั ↔ ạ ; # THAI CHARACTER MAI HAN-AKAT</tRule> <tRule>า → ā ; # THAI CHARACTER SARA AA</tRule> <tRule>า | $1 ← a ($notAbove*) ̄; # backward case, account for reordering</tRule> <comment># We deviate from ISO for SARA AM for disambiguation</comment> <tRule>ำ → a ̉; # THAI CHARACTER SARA AM</tRule> <tRule>ำ | $1 ← a ($notAbove*) ̉ ; # backward case, account for reordering</tRule> <tRule>ะ ↔ a ; # THAI CHARACTER SARA A</tRule> <tRule>ี ↔ ī ; # THAI CHARACTER SARA II</tRule> <tRule>ี | $1 ← i ($notAbove*) ̄ ; # backward case, account for reordering</tRule> <tRule>ื ↔ ụ̄ ; # THAI CHARACTER SARA UEE</tRule> <tRule>ื | $1 ← u ̣ ($notAbove*) ̄ ; # backward case, account for reordering</tRule> <tRule>ึ ↔ ụ ; # THAI CHARACTER SARA UE</tRule> <tRule>ู ↔ ū ; # THAI CHARACTER SARA UU</tRule> <tRule>ู | $1 ← u ($notAbove*) ̄ ; # backward case, account for reordering</tRule> <tRule>ุ ↔ u ; # THAI CHARACTER SARA U</tRule> <tRule>ฯ ↔ ‡ ; # THAI CHARACTER PAIYANNOI</tRule> <comment># ฿ ↔ XXX ; # THAI CURRENCY SYMBOL BAHT</comment> <tRule>เ ↔ e ; # THAI CHARACTER SARA E</tRule> <tRule>แ ↔ æ ; # THAI CHARACTER SARA AE</tRule> <tRule>โ ↔ o ; # THAI CHARACTER SARA O</tRule> <tRule>ใ ↔ ı ; # THAI CHARACTER SARA AI MAIMUAN</tRule> <tRule>ไ ↔ ị ; # THAI CHARACTER SARA AI MAIMALAI</tRule> <tRule>ๅ ↔ ɨ ; # THAI CHARACTER LAKKHANGYAO</tRule> <tRule>็ ↔ ̆ ; # THAI CHARACTER MAITAIKHU</tRule> <tRule>่ ↔ ̀ ; # THAI CHARACTER MAI EK</tRule> <tRule>้ ↔ ̂ ; # THAI CHARACTER MAI THO</tRule> <tRule>๊ ↔ ́ ; # THAI CHARACTER MAI TRI</tRule> <tRule>๋ ↔ ̌ ; # THAI CHARACTER MAI CHATTAWA</tRule> <tRule>์ ↔ ̒ ; # THAI CHARACTER THANTHAKHAT</tRule> <tRule>๎ ↔ '~' ; # THAI CHARACTER YAMAKKAN</tRule> <comment># We deviate from ISO for disambiguation</comment> <tRule>ํ ↔ ̊ ; # THAI CHARACTER NIKHAHIT</tRule> <tRule>๏ ↔ '§' ; # THAI CHARACTER FONGMAN</tRule> <tRule>๐ ↔ 0 ; # THAI DIGIT ZERO</tRule> <tRule>๑ ↔ 1 ; # THAI DIGIT ONE</tRule> <tRule>๒ ↔ 2 ; # THAI DIGIT TWO</tRule> <tRule>๓ ↔ 3 ; # THAI DIGIT THREE</tRule> <tRule>๔ ↔ 4 ; # THAI DIGIT FOUR</tRule> <tRule>๕ ↔ 5 ; # THAI DIGIT FIVE</tRule> <tRule>๖ ↔ 6 ; # THAI DIGIT SIX</tRule> <tRule>๗ ↔ 7 ; # THAI DIGIT SEVEN</tRule> <tRule>๘ ↔ 8 ; # THAI DIGIT EIGHT</tRule> <tRule>๙ ↔ 9 ; # THAI DIGIT NINE</tRule> <tRule>๚ ↔ '||' ; # THAI CHARACTER ANGKHANKHU</tRule> <tRule>๛ ↔ » ; # THAI CHARACTER KHOMUT</tRule> <tRule>ๆ ↔ « ; # THAI CHARACTER MAIYAMOK</tRule> <comment># moved down to make shorter first</comment> <comment>#Note: PHINTHU deviates from ISO since underring causes canonical problems. So it uses spacing tick below.</comment> <tRule>ฺ ↔ ˌ ; # THAI CHARACTER PHINTHU</tRule> <tRule>ิ ↔ i ; # THAI CHARACTER SARA I</tRule> <comment># fallbacks</comment> <tRule>| k ← g ;</tRule> <tRule>| k ← h ;</tRule> <tRule>| c ← j ;</tRule> <tRule>| k ← q ;</tRule> <tRule>| s ← z ;</tRule> <tRule>:: (lower);</tRule> </transform> </transforms> </supplementalData>