UNPKG

larvitgeodata

Version:

Geo data, primarily ISO territories, languages etc. Data fetched mostly from CLDR.

278 lines (265 loc) 11.3 kB
<?xml version="1.0" encoding="UTF-8" ?> <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd"> <!-- Copyright © 1991-2013 Unicode, Inc. CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) For terms of use, see http://www.unicode.org/copyright.html --> <supplementalData> <version number="$Revision: 11914 $"/> <transforms> <transform source="Bulgarian" target="Latin" direction="forward" variant="BGN" draft="provisional"> <comment> ######################################################################## # BGN/PCGN 1952 System # # This system was adopted by the BGN in 1949 and by the PCGN in 1952. # It reflects the much simplified Bulgarian orthography as officially # revised in February 1945. The Bulgarian alphabet contains all of # the characters present in the Russian alphabet with the exception # of Ёё, Ыы, and Ээ. Two obsolete letters Ѫѫ and Ѣѣ are also given. # # The Bulgarian Alphabet as defined by the BGN (Page 15): # # АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯѪѢ # абвгдежзийклмнопрстуфхцчшщъьюяѫѣ # # Originally prepared by Michael Everson &lt;everson@evertype.com&gt; ######################################################################## # # MINIMAL FILTER: Bulgarian-Latin # </comment> <tRule>:: [АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯѪѢабвгдежзийклмнопрстуфхцчшщъьюяѫѣ] ;</tRule> <tRule>:: NFD (NFC) ;</tRule> <comment> # ######################################################################## </comment> <comment> ######################################################################## # # Define All Transformation Variables # ######################################################################## # </comment> <tRule>$upperConsonants = [БВГДЖЗЙКЛМНПРСТФХЦЧШЩЬ] ;</tRule> <tRule>$lowerConsonants = [бвгджзйклмнпрстфхцчшщь] ;</tRule> <tRule>$consonants = [$upperConsonants $lowerConsonants] ;</tRule> <tRule>$upperVowels = [АЕИОУЪЮЯѪѢ] ;</tRule> <tRule>$lowerVowels = [аеиоуъюяѫѣ] ;</tRule> <tRule>$vowels = [$upperVowels $lowerVowels] ;</tRule> <tRule>$lower = [$lowerConsonants $lowerVowels] ;</tRule> <tRule>$bulgarian = [ $lower $upperConsonants $upperVowels ] ;</tRule> <comment> # # Use this $wordBoundary until bug 2034 is fixed in ICU: # http://bugs.icu-project.org/cgi-bin/icu-bugs/transliterate?id=2034;expression=boundary;user=guest # </comment> <tRule>$wordBoundary = [^[:L:][:M:][:N:]] ;</tRule> <comment> # ######################################################################## </comment> <comment> ######################################################################## # # Start of Alphabetic Transformations # ######################################################################## # </comment> <tRule>А A ; # CYRILLIC CAPITAL LETTER A</tRule> <tRule>а a ; # CYRILLIC SMALL LETTER A</tRule> <tRule>Б B ; # CYRILLIC CAPITAL LETTER BE</tRule> <tRule>б b ; # CYRILLIC SMALL LETTER BE</tRule> <tRule>В V ; # CYRILLIC CAPITAL LETTER VE</tRule> <tRule>в v ; # CYRILLIC SMALL LETTER VE</tRule> <tRule>Г G ; # CYRILLIC CAPITAL LETTER GHE</tRule> <tRule>г g ; # CYRILLIC SMALL LETTER GHE</tRule> <tRule>Д D ; # CYRILLIC CAPITAL LETTER DE</tRule> <tRule>д d ; # CYRILLIC SMALL LETTER DE</tRule> <tRule>Е E ; # CYRILLIC CAPITAL LETTER DE</tRule> <tRule>е e ; # CYRILLIC SMALL LETTER DE</tRule> <tRule>Ж} $lower Zh ; # CYRILLIC CAPITAL LETTER ZHE</tRule> <tRule>Ж ZH ; # CYRILLIC CAPITAL LETTER ZHE</tRule> <tRule>ж zh ; # CYRILLIC SMALL LETTER ZHE</tRule> <tRule>З Z ; # CYRILLIC CAPITAL LETTER ZE</tRule> <tRule>з z ; # CYRILLIC SMALL LETTER ZE</tRule> <tRule>И I ; # CYRILLIC CAPITAL LETTER I</tRule> <tRule>и i ; # CYRILLIC SMALL LETTER I</tRule> <tRule>Й Y ; # CYRILLIC CAPITAL LETTER I</tRule> <tRule>й y ; # CYRILLIC SMALL LETTER I</tRule> <tRule>К K ; # CYRILLIC CAPITAL LETTER KA</tRule> <tRule>к k ; # CYRILLIC SMALL LETTER KA</tRule> <tRule>Л L ; # CYRILLIC CAPITAL LETTER EL</tRule> <tRule>л l ; # CYRILLIC SMALL LETTER EL</tRule> <tRule>М M ; # CYRILLIC CAPITAL LETTER EM</tRule> <tRule>м m ; # CYRILLIC SMALL LETTER EM</tRule> <tRule>Н N ; # CYRILLIC CAPITAL LETTER EN</tRule> <tRule>н n ; # CYRILLIC SMALL LETTER EN</tRule> <tRule>О O ; # CYRILLIC CAPITAL LETTER O</tRule> <tRule>о o ; # CYRILLIC SMALL LETTER O</tRule> <tRule>П P ; # CYRILLIC CAPITAL LETTER PE</tRule> <tRule>п p ; # CYRILLIC SMALL LETTER PE</tRule> <tRule>Р R ; # CYRILLIC CAPITAL LETTER ER</tRule> <tRule>р r ; # CYRILLIC SMALL LETTER ER</tRule> <tRule>С S ; # CYRILLIC CAPITAL LETTER ES</tRule> <tRule>с s ; # CYRILLIC SMALL LETTER ES</tRule> <comment> # ######################################################################## # # BGN Page 16 Note 4 # # тс becomes t·s # ######################################################################## # </comment> <tRule>ТС T·S ; # CYRILLIC CAPITAL LETTER TE</tRule> <tRule>Тс T·s ; # CYRILLIC CAPITAL LETTER TE</tRule> <tRule>тс t·s ; # CYRILLIC SMALL LETTER TE</tRule> <tRule>Т T ; # CYRILLIC CAPITAL LETTER TE</tRule> <tRule>т t ; # CYRILLIC SMALL LETTER TE</tRule> <comment> # ######################################################################## # # End Note 4 # ######################################################################## </comment> <tRule>У U ; # CYRILLIC CAPITAL LETTER U</tRule> <tRule>у u ; # CYRILLIC SMALL LETTER U</tRule> <tRule>Ф F ; # CYRILLIC CAPITAL LETTER EF</tRule> <tRule>ф f ; # CYRILLIC SMALL LETTER EF</tRule> <tRule>Х} $lower Kh ; # CYRILLIC CAPITAL LETTER HA</tRule> <tRule>Х KH ; # CYRILLIC CAPITAL LETTER HA</tRule> <tRule>х kh ; # CYRILLIC SMALL LETTER HA</tRule> <tRule>Ц} $lower Ts ; # CYRILLIC CAPITAL LETTER TSE</tRule> <tRule>Ц TS ; # CYRILLIC CAPITAL LETTER TSE</tRule> <tRule>ц ts ; # CYRILLIC SMALL LETTER TSE</tRule> <tRule>Ч} $lower Ch ; # CYRILLIC CAPITAL LETTER CHE</tRule> <tRule>Ч CH ; # CYRILLIC CAPITAL LETTER CHE</tRule> <tRule>ч ch ; # CYRILLIC SMALL LETTER CHE</tRule> <comment> ######################################################################## # # Implied rule from BGN Russian-Latin transliteration (Page 94 Note 3.6). # # шт becomes sh·t # ######################################################################## # </comment> <tRule>ШТ SH·T ; # CYRILLIC CAPITAL LETTER SHA</tRule> <tRule>Шт Sh·t ; # CYRILLIC CAPITAL LETTER SHA</tRule> <tRule>шт sh·t ; # CYRILLIC SMALL LETTER SHA</tRule> <tRule>Ш} $lower Sh ; # CYRILLIC CAPITAL LETTER SHA</tRule> <tRule>Ш SH ; # CYRILLIC CAPITAL LETTER SHA</tRule> <tRule>ш sh ; # CYRILLIC SMALL LETTER SHA</tRule> <tRule>Щ} $lower Sht ; # CYRILLIC CAPITAL LETTER SHCHA</tRule> <tRule>Щ SHT ; # CYRILLIC CAPITAL LETTER SHCHA</tRule> <tRule>щ sht ; # CYRILLIC SMALL LETTER SHCHA</tRule> <comment> # ######################################################################## # # End Implied rule # ######################################################################## </comment> <tRule>Ъ Ŭ ; # CYRILLIC CAPITAL LETTER HARD SIGN</tRule> <tRule>ъ ŭ ; # CYRILLIC SMALL LETTER HARD SIGN</tRule> <comment> ######################################################################## # # BGN Page 16 Note 1 # # In modern Bulgarian orthography, the character ъ does not occur in # word-final position. It should be omitted in romanization when found # on older sources. # # The following rule removes all Ъъ at the end of a word. It is assumed # that when the condition is met, the text must be from an older source. # Comment out with a '#' at the start of a line to disable. # # ######################################################################## # </comment> <tRule>$bulgarian { [Ъъ] } $wordBoundary > ;</tRule> <comment> # ######################################################################## # # End BGN Page 16 Note 1 # ######################################################################## </comment> <tRule>Ь ; # CYRILLIC CAPITAL LETTER SOFT SIGN</tRule> <tRule>ь ; # CYRILLIC SMALL LETTER SOFT SIGN</tRule> <tRule>Ю} $lower Yu ; # CYRILLIC CAPITAL LETTER YU</tRule> <tRule>Ю YU ; # CYRILLIC CAPITAL LETTER YU</tRule> <tRule>ю yu ; # CYRILLIC SMALL LETTER YU</tRule> <tRule>Я} $lower Ya ; # CYRILLIC CAPITAL LETTER YA</tRule> <tRule>Я YA ; # CYRILLIC CAPITAL LETTER YA</tRule> <tRule>я ya ; # CYRILLIC SMALL LETTER YA</tRule> <comment> ######################################################################## # # BGN Page 16 Note 2 # # The obsolete character Ѫ, which was replaced by Ъ in 1945, should be # romanized Ŭ. # ######################################################################## # </comment> <tRule>Ѫ Ŭ ; # CYRILLIC CAPITAL LETTER BIG YUS</tRule> <tRule>ѫ ŭ ; # CYRILLIC SMALL LETTER BIG YUS</tRule> <comment> # ######################################################################## # # End BGN Page 16 Note 2 # ######################################################################## </comment> <comment> ######################################################################## # # BGN Page 16 Note 3 # # The obsolete character Ѣ, replaced in 1945 by Я or Е according to local # pronunciation, should be romanized as e or ya, accordingly, if the # pronunciation is known; otherwise as ye. # ######################################################################## # </comment> <tRule>Ѣ} $lower Ye ; # CYRILLIC CAPITAL LETTER YAT</tRule> <tRule>Ѣ YE ; # CYRILLIC CAPITAL LETTER YAT</tRule> <tRule>ѣ ye ; # CYRILLIC SMALL LETTER YAT</tRule> <comment> # # Alternative rule where appropriate for local pronounciation. To apply # uncomment the following by removing the '#' mark at the start of the # line and insert before the three rule lines above. # # Ѣ} $lower → e ; # CYRILLIC CAPITAL LETTER YAT # Ѣ → E ; # CYRILLIC CAPITAL LETTER YAT # ѣ → e ; # CYRILLIC SMALL LETTER YAT # ######################################################################## # # End BGN Page 16 Note 3 # ######################################################################## </comment> </transform> </transforms> </supplementalData>