larvitgeodata
Version:
Geo data, primarily ISO territories, languages etc. Data fetched mostly from CLDR.
420 lines (416 loc) • 19.2 kB
text/xml
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
<!--
Copyright © 1991-2013 Unicode, Inc.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
For terms of use, see http://www.unicode.org/copyright.html
-->
<supplementalData>
<version number="$Revision: 11914 $"/>
<transforms>
<transform source="Russian" target="Latin" direction="forward" variant="BGN" draft="provisional">
<comment>
########################################################################
# BGN/PCGN 1947 System # # The BGN/PCGN system for Russian was
# adopted by the BGN in 1944 and by # the PCGN in 1947 for use in
# romanizing names written in the Russian # Cyrillic alphabet. # #
# The Russian Alphabet as defined by the BGN (Page 93): # #
# АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ #
# абвгдеёжзийклмнопрстуфхцчшщъыьэюя # # Originally prepared by
# Michael Everson everson@evertype.com
# Fixed by Frank Yung-Fong Tang ftang@google.com
#
# Test Data from http://en.wikipedia.org/wiki/BGN/PCGN_romanization_of_Russian
# Since the use of · is optional, we do NOT implement here.
# Азов Azov
# Тамбов Tambov
# Барнаул Barnaul
# Кубань Kubanʹ
# Владимир Vladimir
# Ульяновск Ulʹyanovsk
# Грозный Groznyy
# Волгодонск Volgodonsk
# Дзержинский Dzerzhinskiy
# Нелидово Nelidovo
# Елизово Yelizovo
# Чапаевск Chapayevsk
# Майер Mayyer
# Юрьев Yurʹyev
# Съезд Sʺyezd
# Белкин Belkin
# Ёлкин Yëlkin
# Остриё Ostriyë
## Йёнчёпинг Y·ënchëping
# Йёнчёпинг Yënchëping
# Громадьё Gromadʹyë
# Подъёмный Podʺyëmnyy
# Жуков Zhukov
# Лужники Luzhniki
# Звенигород Zvenigorod
# Вязьма Vyazʹma
# Иркутск Irkutsk
# Апатиты Apatity
## Кайафа Kay·afa
# Кайафа Kayafa
# Йошкар-Ола Yoshkar-Ola
# Бийск Biysk
# Киров Kirov
# Енисейск Yeniseysk
# Ломоносов Lomonosov
# Нелидово Nelidovo
# Менделеев Mendeleyev
# Каменка Kamenka
# Новосибирск Novosibirsk
# Кандалакша Kandalaksha
# Омск Omsk
# Красноярск Krasnoyarsk
# Петрозаводск Petrozavodsk
# Серпухов Serpukhov
# Ростов Rostov
# Северобайкальск Severobaykalʹsk
# Сковородино Skovorodino
# Чайковский Chaykovskiy
# Тамбов Tambov
# Мытищи Mytishchi
# Углич Uglich
# Дудинка Dudinka
# Фурманов Furmanov
# Уфа Ufa
# Хабаровск Khabarovsk
# Прохладный Prokhladnyy
# Цимлянск Tsimlyansk
# Ельцин Yelʹtsin
# Чебоксары Cheboksary
# Печора Pechora
# Шахтёрск Shakhtërsk
# Мышкин Myshkin
# Щёлково Shchëlkovo
# Ртищево Rtishchevo
# Подъездной Podʺyezdnoy
## Выудить Vy·uditʹ
# Выудить Vyuditʹ
## Суык-Су Su·yk-Su
# Суык-Су Suyk-Su
# Ыттык-Кёль Yttyk-Këlʹ
# Тында Tynda
# Тюмень Tyumenʹ
## Двухэлементный Dvukh·elementnyy
# Двухэлементный Dvukhelementnyy
# Электрогорск Elektrogorsk
# Радиоэлектроника Radioelektronika
# Юбилейный Yubileynyy
# Ключевская Klyuchevskaya
# Якутск Yakutsk
# Брянск Bryansk
## Соответствие Sootvet·stviye
# Соответствие Sootvetstviye
## Веснушчатый Vesnush·chatyy
# Веснушчатый Vesnushchatyy
#
########################################################################
# # MINIMAL FILTER: Russian-Latin #
</comment>
<tRule>::[АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдеёжзийклмнопрстуфхцчшщъыьэюя];</tRule>
<comment>
# BUG(ftang) remove the following line. Otherwise the rule for
# Й й Ё ё will break since the rule is written in NFC but
# the line decomposes the text.
# :: NFD (NFC) ;
#
########################################################################
</comment>
<comment>
########################################################################
# Define All Transformation Variables #
########################################################################
#
</comment>
<tRule>$prime = ʹ ;</tRule>
<tRule>$doublePrime = ʺ ;</tRule>
<tRule>$wordBoundary = [^[:L:][:M:][:N:]] ;</tRule>
<tRule>$upperConsonants = [БВГДЖЙКЛМНПРСТФХЦЧШЩЭ] ;</tRule>
<tRule>$lowerConsonants = [бвгджйклмнпрстфхцчшщэ] ;</tRule>
<tRule>$consonants = [$upperConsonants $lowerConsonants] ;</tRule>
<tRule>$upperVowels = [АЕЁЭИОУЫЮЯ] ;</tRule>
<tRule>$lowerVowels = [аеёэиоуыюя] ;</tRule>
<tRule>$vowels = [$upperVowels $lowerVowels] ;</tRule>
<tRule>$lower = [$lowerConsonants $lowerVowels] ;</tRule>
<tRule>$upper = [$upperConsonants $upperVowels] ;</tRule>
<comment>
#
########################################################################
</comment>
<comment>
########################################################################
# Rules moved to front to avoid masking #
########################################################################
#
# REMOVEOPTIONAL(ftang) The following two rules for · option is commented out.
#$lowerVowels { ы → ·y ;
#$upperVowels { [Ыы] → ·Y ;
########################################################################
#
</comment>
<comment>
# REMOVEOPTIONAL(ftang) The following two rules for · option is commented out.
#[$consonants - [Йй]]{Э → ·E ;
#[$consonants - [Йй]]{э → ·e ;
</comment>
<tRule>[$upperVowels [ЙЪЬ]] { Е } $upper → YE ; # CYRILLIC CAPITAL LETTER IE</tRule>
<tRule>[$upperVowels [ЙЪЬ]] { Е → Ye ; # CYRILLIC CAPITAL LETTER IE</tRule>
<tRule>[$upperVowels $lowerVowels [ЙйЪъЬь]] { е → ye ; # CYRILLIC SMALL LETTER IE</tRule>
<tRule>[$upperVowels [ЙЪЬ]] { Ё } $upper → YË ; # CYRILLIC CAPITAL LETTER IO</tRule>
<tRule>[$upperVowels [ЙЪЬ]] { Ё → Yë ; # CYRILLIC CAPITAL LETTER IO</tRule>
<tRule>[$upperVowels $lowerVowels [ЙйЪъЬь]] { ё → yë ; # CYRILLIC SMALL LETTER IO</tRule>
<comment>
# Since in the above rule we look use the context before the characters,
# we have to perform them in a separate pass before we change the vowels
# the ::Null forces a separate pass.
</comment>
<tRule>::Null;</tRule>
<comment>
#
########################################################################
# Start of Alphabetic Transformations #
########################################################################
#
</comment>
<tRule>А → A ; # CYRILLIC CAPITAL LETTER A</tRule>
<tRule>а → a ; # CYRILLIC SMALL LETTER A</tRule>
<tRule>Б → B ; # CYRILLIC CAPITAL LETTER BE</tRule>
<tRule>б → b ; # CYRILLIC SMALL LETTER BE</tRule>
<tRule>В → V ; # CYRILLIC CAPITAL LETTER VE</tRule>
<tRule>в → v ; # CYRILLIC SMALL LETTER VE</tRule>
<tRule>Г → G ; # CYRILLIC CAPITAL LETTER GHE</tRule>
<tRule>г → g ; # CYRILLIC SMALL LETTER GHE</tRule>
<tRule>Д → D ; # CYRILLIC CAPITAL LETTER DE</tRule>
<tRule>д → d ; # CYRILLIC SMALL LETTER DE</tRule>
<comment>
#
########################################################################
# BGN Page 94 Rule 1:
# # The character e should be romanized ye
# initially, after the vowel # characters a, e, ё, и, о, у, ы, э, ю,
# and я, and after й, ъ, and ь.
# In all other instances, it should
# be romanized e.
########################################################################
#
# BUG(ftang)- the following two lines said BEFORE the vowels, instead of AFTER
# Е}[$upperVowels [ЙЪЬ]] → YE ; # CYRILLIC CAPITAL LETTER IE
# Е}[$lowerVowels [йъь]] → Ye ; # CYRILLIC CAPITAL LETTER IE
</comment>
<tRule>$wordBoundary{Е} $upper → YE ; # CYRILLIC CAPITAL LETTER IE</tRule>
<tRule>$wordBoundary{Е → Ye ; # CYRILLIC CAPITAL LETTER IE</tRule>
<tRule>Е → E ; # CYRILLIC CAPITAL LETTER IE</tRule>
<comment>
# BUG(ftang)- the following line said BEFORE the vowels, instead of AFTER
# е}[$upperVowels $lowerVowels [ЙйЪъЬь]] → ye ; # CYRILLIC SMALL LETTER IE
</comment>
<tRule>$wordBoundary{е → ye ; # CYRILLIC SMALL LETTER IE</tRule>
<tRule>е → e ; # CYRILLIC SMALL LETTER IE</tRule>
<comment>
#
########################################################################
# End of Rule 1
########################################################################
########################################################################
# BGN Page 94 Rule 2: # # The character ё is not considered a
# separate character of the Russian # alphabet and the dieresis is
# generally not shown. When the dieresis is # shown, the character
# should be romanized yë initially, after the vowel # characters a,
# e, ё, и, о, у, ы, э, ю, and я, and after й, ъ, and ь, In # all
# other instances, it should be romanized ё. When the dieresis is not
# shown, the character may still be romanized in the preceding
# manner or, # alternatively, in accordance with note 1. #
########################################################################
# BUG(ftang)- the following two lines said BEFORE the vowels, instead of AFTER
# Ё}[$upperVowels [ЙЪЬ]] → YË ; # CYRILLIC CAPITAL LETTER IO
# Ё}[$lowerVowels [йъь]] → Yë ; # CYRILLIC CAPITAL LETTER IO
</comment>
<tRule>$wordBoundary {Ё} $upper → YË ; # CYRILLIC CAPITAL LETTER IO</tRule>
<tRule>$wordBoundary {Ё} $lower → Yë ; # CYRILLIC CAPITAL LETTER IO</tRule>
<tRule>Ё → Ë ; # CYRILLIC CAPITAL LETTER IO</tRule>
<comment>
# BUG(ftang)- the following line said BEFORE the vowels, instead of AFTER
# ё}[$upperVowels $lowerVowels [ЙйЪъЬь]] → yë ; # CYRILLIC SMALL LETTER IO
</comment>
<tRule>$wordBoundary{ё → yë ; # CYRILLIC SMALL LETTER IO</tRule>
<tRule>ё → ë ; # CYRILLIC SMALL LETTER IO</tRule>
<comment>
#
########################################################################
# End of Rule 2 #
########################################################################
#
</comment>
<tRule>Ж} $lower → Zh ; # CYRILLIC CAPITAL LETTER ZHE</tRule>
<tRule>Ж → ZH ; # CYRILLIC CAPITAL LETTER ZHE</tRule>
<tRule>ж → zh ; # CYRILLIC SMALL LETTER ZHE</tRule>
<comment>
#
########################################################################
# BGN Page 94 Rule 3.4
# э after any consonant character except
# й becomes ·е
########################################################################
#
</comment>
<tRule>З → Z ; # CYRILLIC CAPITAL LETTER ZE</tRule>
<tRule>з → z ; # CYRILLIC SMALL LETTER ZE</tRule>
<comment>
# BUG(ftang) The following two lines said those consonant becomes ·е
# [$consonants - [Йй]]}Э → ·Е ;
# [$consonants - [Йй]]}э → ·е ;
#
########################################################################
# End of Rule 3.4 #
########################################################################
#
</comment>
<tRule>И → I ; # CYRILLIC CAPITAL LETTER I</tRule>
<tRule>и → i ; # CYRILLIC SMALL LETTER I</tRule>
<comment>
#
########################################################################
# BGN Page 94 Rule 3:
# Unusual Russian character sequences
# occurring primarily in non-Russian-language names may be
# romanized as shown below in order to provide differentiation from
# regularly-occurring digraphs and # character sequences.
# BGN Page 94 Rule 3.1
# й before а, у, ы, or э becomes у· #
########################################################################
#
# REMOVEOPTIONAL(ftang) The following two rules for · option is commented out.
# Й}[АаУуЫыЭэ] → Y· ; # CYRILLIC CAPITAL LETTER I
# й}[АаУуЫыЭэ] → y· ; # CYRILLIC SMALL LETTER I
</comment>
<tRule>Й → Y ; # CYRILLIC CAPITAL LETTER I</tRule>
<tRule>й → y ; # CYRILLIC SMALL LETTER I</tRule>
<comment>
#
########################################################################
# End Rule 3.1 #
########################################################################
#
</comment>
<tRule>К → K ; # CYRILLIC CAPITAL LETTER KA</tRule>
<tRule>к → k ; # CYRILLIC SMALL LETTER KA</tRule>
<tRule>Л → L ; # CYRILLIC CAPITAL LETTER EL</tRule>
<tRule>л → l ; # CYRILLIC SMALL LETTER EL</tRule>
<tRule>М → M ; # CYRILLIC CAPITAL LETTER EM</tRule>
<tRule>м → m ; # CYRILLIC SMALL LETTER EM</tRule>
<tRule>Н → N ; # CYRILLIC CAPITAL LETTER EN</tRule>
<tRule>н → n ; # CYRILLIC SMALL LETTER EN</tRule>
<tRule>О → O ; # CYRILLIC CAPITAL LETTER O</tRule>
<tRule>о → o ; # CYRILLIC SMALL LETTER O</tRule>
<tRule>П → P ; # CYRILLIC CAPITAL LETTER PE</tRule>
<tRule>п → p ; # CYRILLIC SMALL LETTER PE</tRule>
<tRule>Р → R ; # CYRILLIC CAPITAL LETTER ER</tRule>
<tRule>р → r ; # CYRILLIC SMALL LETTER ER</tRule>
<tRule>С → S ; # CYRILLIC CAPITAL LETTER ES</tRule>
<tRule>с → s ; # CYRILLIC SMALL LETTER ES</tRule>
<comment>
#
########################################################################
# BGN Page 94 Rule 3.5 # # тс becomes t·s #
########################################################################
#
# REMOVEOPTIONAL(ftang) The following three rules for · option is commented out
# and change to the version without ·
# ТС → T·S ; # CYRILLIC CAPITAL LETTER TE
# Тс → T·s ; # CYRILLIC CAPITAL LETTER TE
# тс → t·s ; # CYRILLIC SMALL LETTER TE
</comment>
<tRule>ТС → TS ; # CYRILLIC CAPITAL LETTER TE</tRule>
<tRule>Тс → Ts ; # CYRILLIC CAPITAL LETTER TE</tRule>
<tRule>тс → ts ; # CYRILLIC SMALL LETTER TE</tRule>
<tRule>Т → T ; # CYRILLIC CAPITAL LETTER TE</tRule>
<tRule>т → t ; # CYRILLIC SMALL LETTER TE</tRule>
<comment>
#
########################################################################
# End Rule 3.5 #
########################################################################
#
</comment>
<tRule>У → U ; # CYRILLIC CAPITAL LETTER U</tRule>
<tRule>у → u ; # CYRILLIC SMALL LETTER U</tRule>
<tRule>Ф → F ; # CYRILLIC CAPITAL LETTER EF</tRule>
<tRule>ф → f ; # CYRILLIC SMALL LETTER EF</tRule>
<tRule>Х} $lower → Kh ; # CYRILLIC CAPITAL LETTER HA</tRule>
<tRule>Х → KH ; # CYRILLIC CAPITAL LETTER HA</tRule>
<tRule>х → kh ; # CYRILLIC SMALL LETTER HA</tRule>
<tRule>Ц} $lower → Ts ; # CYRILLIC CAPITAL LETTER TSE</tRule>
<tRule>Ц → TS ; # CYRILLIC CAPITAL LETTER TSE</tRule>
<tRule>ц → ts ; # CYRILLIC SMALL LETTER TSE</tRule>
<tRule>Ч} $lower → Ch ; # CYRILLIC CAPITAL LETTER CHE</tRule>
<tRule>Ч → CH ; # CYRILLIC CAPITAL LETTER CHE</tRule>
<tRule>ч → ch ; # CYRILLIC SMALL LETTER CHE</tRule>
<comment>
#
########################################################################
# BGN Page 94 Rule 3.6 # # шч becomes sh·ch #
########################################################################
#
# REMOVEOPTIONAL(ftang) The following three rules for · option is commented out
# and change to the version without ·
# ШЧ → SH·CH ; # CYRILLIC CAPITAL LETTER SHA
# Шч → Sh·ch ; # CYRILLIC CAPITAL LETTER SHA
# шч → sh·ch ; # CYRILLIC SMALL LETTER SHA
</comment>
<tRule>ШЧ → SHCH ; # CYRILLIC CAPITAL LETTER SHA</tRule>
<tRule>Шч → Shch ; # CYRILLIC CAPITAL LETTER SHA</tRule>
<tRule>шч → shch ; # CYRILLIC SMALL LETTER SHA</tRule>
<tRule>Ш} $lower → Sh ; # CYRILLIC CAPITAL LETTER SHA</tRule>
<tRule>Ш → SH ; # CYRILLIC CAPITAL LETTER SHA</tRule>
<tRule>ш → sh ; # CYRILLIC SMALL LETTER SHA</tRule>
<tRule>Щ} $lower → Shch ; # CYRILLIC CAPITAL LETTER SHCHA</tRule>
<tRule>Щ → SHCH ; # CYRILLIC CAPITAL LETTER SHCHA</tRule>
<tRule>щ → shch ; # CYRILLIC SMALL LETTER SHCHA</tRule>
<comment>
#
########################################################################
# End Rule 3.6 #
########################################################################
#
</comment>
<tRule>Ъ → $doublePrime ; # CYRILLIC CAPITAL LETTER HARD SIGN</tRule>
<tRule>ъ → $doublePrime ; # CYRILLIC SMALL LETTER HARD SIGN</tRule>
<comment>
#
########################################################################
# BGN Page 94 Rule 3.2 # # ы before а, у, ы, or э becomes у·
# BGN Page 94 Rule 3.3 # # ы after any vowel character becomes ·у
########################################################################
#
# BUG(ftang) the following line said the vowels will change
# $vowels}Ы → ·Y ; # CYRILLIC CAPITAL LETTER I
# $vowels}ы → ·y ; # CYRILLIC CAPITAL LETTER I
# REMOVEOPTIONAL(ftang) The following two rules for · option is commented out.
#Ы}[АаУуЫыЭэ] → Y· ; # CYRILLIC CAPITAL LETTER YERU
#ы}[ауыэ] → y· ; # CYRILLIC SMALL LETTER YERU
</comment>
<tRule>Ы → Y ; # CYRILLIC CAPITAL LETTER YERU</tRule>
<tRule>ы → y ; # CYRILLIC SMALL LETTER YERU</tRule>
<comment>
#
########################################################################
# End Rule 3.2 and 3.3 #
########################################################################
#
</comment>
<tRule>Ь → $prime ; # CYRILLIC CAPITAL LETTER SOFT SIGN</tRule>
<tRule>ь → $prime ; # CYRILLIC SMALL LETTER SOFT SIGN</tRule>
<tRule>Э → E ; # CYRILLIC CAPITAL LETTER E</tRule>
<tRule>э → e ; # CYRILLIC SMALL LETTER E</tRule>
<tRule>Ю} $lower → Yu ; # CYRILLIC CAPITAL LETTER YU</tRule>
<tRule>Ю → YU ; # CYRILLIC CAPITAL LETTER YU</tRule>
<tRule>ю → yu ; # CYRILLIC SMALL LETTER YU</tRule>
<tRule>Я} $lower → Ya ; # CYRILLIC CAPITAL LETTER YA</tRule>
<tRule>Я → YA ; # CYRILLIC CAPITAL LETTER YA</tRule>
<tRule>я → ya ; # CYRILLIC SMALL LETTER YA</tRule>
</transform>
</transforms>
</supplementalData>