UNPKG

larvitgeodata

Version:

Geo data, primarily ISO territories, languages etc. Data fetched mostly from CLDR.

193 lines (191 loc) 8.37 kB
<?xml version="1.0" encoding="UTF-8" ?> <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd"> <!-- Copyright © 1991-2013 Unicode, Inc. CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/) For terms of use, see http://www.unicode.org/copyright.html --> <supplementalData> <version number="$Revision: 11914 $"/> <transforms> <transform source="Greek" target="Latin" direction="both" variant="UNGEGN"> <comment># For modern Greek, based on UNGEGN rules.</comment> <comment># Rules are predicated on running NFD first, and NFC afterwards</comment> <comment># MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN</comment> <comment># WARNING: need to add accents to both filters ###</comment> <comment># :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩϷ-\u07FBϹ] ;</comment> <tRule>:: [[[:Greek:][:Mn:][:Me:]] [\:-;?·;·]] ;</tRule> <tRule>::NFD (NFC) ;</tRule> <comment># Useful variables</comment> <tRule>$lower = [[:latin:][:greek:] &amp; [:Ll:]] ;</tRule> <tRule>$upper = [[:latin:][:greek:] &amp; [:Lu:]] ;</tRule> <tRule>$accent = [[:Mn:][:Me:]] ;</tRule> <tRule>$macron = ̄ ;</tRule> <tRule>$ddot = ̈ ;</tRule> <tRule>$lcgvowel = [αεηιουω] ;</tRule> <tRule>$ucgvowel = [ΑΕΗΙΟΥΩ] ;</tRule> <tRule>$gvowel = [$lcgvowel $ucgvowel] ;</tRule> <tRule>$lcgvowelC = [$lcgvowel $accent] ;</tRule> <tRule>$evowel = [aeiouyAEIOUY];</tRule> <tRule>$vowel = [ $evowel $gvowel] ;</tRule> <tRule>$beforeLower = $accent * $lower ;</tRule> <tRule>$gammaLike = [ΓΚΞΧγκξχϰ] ;</tRule> <tRule>$egammaLike = [GKXCgkxc] ;</tRule> <tRule>$smooth = ̓ ;</tRule> <tRule>$rough = ̔ ;</tRule> <tRule>$iotasub = ͅ ;</tRule> <tRule>$softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ;</tRule> <tRule>$under = ̱;</tRule> <tRule>$caron = ̌;</tRule> <tRule>$afterLetter = [:L:] [\'$accent]* ;</tRule> <tRule>$beforeLetter = [\'$accent]* [:L:] ;</tRule> <comment># Fix punctuation</comment> <comment># preserve orginal</comment> <tRule>\: ↔ \: $under ;</tRule> <tRule>\? ↔ \? $under ;</tRule> <tRule>\; ↔ \? ;</tRule> <tRule>· ↔ \: ;</tRule> <comment># Fix any ancient characters that creep in</comment> <tRule>͂ → ́ ;</tRule> <tRule>̂ → ́ ;</tRule> <tRule>̀ → ́ ;</tRule> <tRule>$smooth → ;</tRule> <tRule>$rough → ;</tRule> <tRule>$iotasub → ;</tRule> <tRule>ͺ → ;</tRule> <comment># need to have these up here so the rules don't mask</comment> <tRule>η ↔ i $under ;</tRule> <tRule>Η ↔ I $under ;</tRule> <tRule>Ψ } $beforeLower ↔ Ps ;</tRule> <tRule>Ψ ↔ PS ;</tRule> <tRule>ψ ↔ ps ;</tRule> <tRule>ω ↔ o $under ;</tRule> <tRule>Ω ↔ O $under;</tRule> <comment># at begining or end of word, convert mp to b</comment> <tRule>[^[:L:]$accent] { μπ → b ;</tRule> <tRule>μπ } [^[:L:]$accent] → b ;</tRule> <tRule>[^[:L:]$accent] { [Μμ][Ππ] → B ;</tRule> <tRule>[Μμ][Ππ] } [^[:L:]$accent] → B ;</tRule> <tRule>μπ ← b ;</tRule> <tRule>Μπ ← B } $beforeLower ;</tRule> <tRule>ΜΠ ← B ;</tRule> <comment># handle diphthongs ending with upsilon</comment> <tRule>ου ↔ ou ;</tRule> <tRule>ΟΥ ↔ OU ;</tRule> <tRule>Ου ↔ Ou ;</tRule> <tRule>οΥ ↔ oU ;</tRule> <tRule>$fmaker = [aeiAEI] $under ? ;</tRule> <tRule>$shiftForwardVowels = [[:Mn:]-[̈]]; # note: a diaeresis keeps the items separate</tRule> <tRule>$fmaker { υ ( $shiftForwardVowels )* } $softener → $1 v $under ;</tRule> <tRule>υ $1 ← ( $shiftForwardVowels )* v $under ;</tRule> <tRule>$fmaker { υ ( $shiftForwardVowels )* } → $1 f $under;</tRule> <tRule>υ $1 ← ( $shiftForwardVowels )* f $under ;</tRule> <tRule>$fmaker { Υ } $softener ↔ V $under ;</tRule> <tRule>$fmaker { Υ ↔ U $under ;</tRule> <tRule>υ ↔ y ;</tRule> <tRule>Υ ↔ Y ;</tRule> <comment># NORMAL</comment> <tRule>α ↔ a ;</tRule> <tRule>Α ↔ A ;</tRule> <tRule>β ↔ v ;</tRule> <tRule>Β ↔ V ;</tRule> <tRule>γ } $gammaLike ↔ n } $egammaLike ;</tRule> <tRule>γ ↔ g ;</tRule> <tRule>Γ } $gammaLike ↔ N } $egammaLike ;</tRule> <tRule>Γ ↔ G ;</tRule> <tRule>δ ↔ d ;</tRule> <tRule>Δ ↔ D ;</tRule> <tRule>ε ↔ e ;</tRule> <tRule>Ε ↔ E ;</tRule> <tRule>ζ ↔ z ;</tRule> <tRule>Ζ ↔ Z ;</tRule> <tRule>θ ↔ th ;</tRule> <tRule>Θ } $beforeLower ↔ Th ;</tRule> <tRule>Θ ↔ TH ;</tRule> <tRule>ι ↔ i ;</tRule> <tRule>Ι ↔ I ;</tRule> <tRule>κ ↔ k ;</tRule> <tRule>Κ ↔ K ;</tRule> <tRule>λ ↔ l ;</tRule> <tRule>Λ ↔ L ;</tRule> <tRule>μ ↔ m ;</tRule> <tRule>Μ ↔ M ;</tRule> <tRule>ν } $gammaLike → n\' ;</tRule> <tRule>ν ↔ n ;</tRule> <tRule>Ν } $gammaLike ↔ N\' ;</tRule> <tRule>Ν ↔ N ;</tRule> <tRule>ξ ↔ x ;</tRule> <tRule>Ξ ↔ X ;</tRule> <tRule>ο ↔ o ;</tRule> <tRule>Ο ↔ O ;</tRule> <tRule>π ↔ p ;</tRule> <tRule>Π ↔ P ;</tRule> <tRule>ρ ↔ r ;</tRule> <tRule>Ρ ↔ R ;</tRule> <comment># insert separator before things that turn into s</comment> <tRule>[Pp] { } [ςσΣϷϸϺϻ] → \' ;</tRule> <comment># special S variants</comment> <tRule>Ϸ ↔ Š ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L</tRule> <tRule>ϸ ↔ š ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L</tRule> <tRule>Ϻ ↔ Ŝ ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L</tRule> <tRule>ϻ ↔ ŝ ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L</tRule> <comment># Caron means exception</comment> <comment># before a letter, initial</comment> <tRule>ς } $beforeLetter ↔ s $under } $beforeLetter;</tRule> <tRule>σ } $beforeLetter ↔ s } $beforeLetter;</tRule> <comment># otherwise, after a letter = final</comment> <tRule>$afterLetter { σ ↔ $afterLetter { s $under;</tRule> <tRule>$afterLetter { ς ↔ $afterLetter { s ;</tRule> <comment># otherwise (isolated) = initial</comment> <tRule>ς ↔ s $under;</tRule> <tRule>σ ↔ s ;</tRule> <comment># [Pp] { Σ ↔ \'S ;</comment> <tRule>Σ ↔ S ;</tRule> <tRule>τ ↔ t ;</tRule> <tRule>Τ ↔ T ;</tRule> <tRule>φ ↔ f ;</tRule> <tRule>Φ ↔ F ;</tRule> <tRule>χ ↔ ch ;</tRule> <tRule>Χ } $beforeLower ↔ Ch ;</tRule> <tRule>Χ ↔ CH ;</tRule> <comment># Completeness for ASCII</comment> <comment># $ignore = [[:Mark:]''] * ;</comment> <tRule>| ch ← h ;</tRule> <tRule>| k ← c ;</tRule> <tRule>| i ← j ;</tRule> <tRule>| k ← q ;</tRule> <tRule>| b ← u } $vowel ;</tRule> <tRule>| b ← w } $vowel ;</tRule> <tRule>| y ← u ;</tRule> <tRule>| y ← w ;</tRule> <tRule>| Ch ← H ;</tRule> <tRule>| K ← C ;</tRule> <tRule>| I ← J ;</tRule> <tRule>| K ← Q ;</tRule> <tRule>| B ← W } $vowel ;</tRule> <tRule>| B ← U } $vowel ;</tRule> <tRule>| Y ← W ;</tRule> <tRule>| Y ← U ;</tRule> <comment># Completeness for Greek</comment> <tRule>ϐ → | β ;</tRule> <tRule>ϑ → | θ ;</tRule> <tRule>ϒ → | Υ ;</tRule> <tRule>ϕ → | φ ;</tRule> <tRule>ϖ → | π ;</tRule> <tRule>ϰ → | κ ;</tRule> <tRule>ϱ → | ρ ;</tRule> <tRule>ϲ → | σ ;</tRule> <tRule>Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL</tRule> <tRule>ϳ → j ;</tRule> <tRule>ϴ → | Θ ;</tRule> <tRule>ϵ → | ε ;</tRule> <tRule>µ → | μ ;</tRule> <comment># delete any trailing ' marks used for roundtripping</comment> <tRule>← [Ππ] { \' } [Ss] ;</tRule> <tRule>← [Νν] { \' } $egammaLike ;</tRule> <tRule>::NFC (NFD) ;</tRule> <comment># MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD</comment> <tRule>:: ([[[:Latin:][:Mn:][:Me:]] ['\:?]]) ;</tRule> </transform> </transforms> </supplementalData>