larvitgeodata
Version:
Geo data, primarily ISO territories, languages etc. Data fetched mostly from CLDR.
193 lines (191 loc) • 8.37 kB
text/xml
<!--
Copyright © 1991-2013 Unicode, Inc.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
For terms of use, see http://www.unicode.org/copyright.html
-->
<supplementalData>
<version number="$Revision: 11914 $"/>
<transforms>
<transform source="Greek" target="Latin" direction="both" variant="UNGEGN">
<comment># For modern Greek, based on UNGEGN rules.</comment>
<comment># Rules are predicated on running NFD first, and NFC afterwards</comment>
<comment># MINIMAL FILTER GENERATED FOR: Greek-Latin/UNGEGN</comment>
<comment># WARNING: need to add accents to both filters ###</comment>
<comment># :: [́̄̆̈;µ·ÀÂÈÊÌÎÒÔÙÛàâèêìîòôùûĈ-ĉĜ-ĝĤ-ĥĴ-ĵŜ-ŝŴ-ŷǛ-ǜǸ-ǹ̀̂̓-̔̀͂-̓ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϖϰ-ϵЀЍѐѝḔ-ḕṐ-ṑẀ-ẁẐ-ẑẤ-ậẰ-ằẾ-ệỐ-ộỜ-ờỪ-ừỲ-ỳἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-῍῏-ΐῖ-Ί῝῟-῭ῲ-ῴῶ-ῼΩϷ-\u07FBϹ] ;</comment>
<tRule>:: [[[:Greek:][:Mn:][:Me:]] [\:-;?·;·]] ;</tRule>
<tRule>::NFD (NFC) ;</tRule>
<comment># Useful variables</comment>
<tRule>$lower = [[:latin:][:greek:] & [:Ll:]] ;</tRule>
<tRule>$upper = [[:latin:][:greek:] & [:Lu:]] ;</tRule>
<tRule>$accent = [[:Mn:][:Me:]] ;</tRule>
<tRule>$macron = ̄ ;</tRule>
<tRule>$ddot = ̈ ;</tRule>
<tRule>$lcgvowel = [αεηιουω] ;</tRule>
<tRule>$ucgvowel = [ΑΕΗΙΟΥΩ] ;</tRule>
<tRule>$gvowel = [$lcgvowel $ucgvowel] ;</tRule>
<tRule>$lcgvowelC = [$lcgvowel $accent] ;</tRule>
<tRule>$evowel = [aeiouyAEIOUY];</tRule>
<tRule>$vowel = [ $evowel $gvowel] ;</tRule>
<tRule>$beforeLower = $accent * $lower ;</tRule>
<tRule>$gammaLike = [ΓΚΞΧγκξχϰ] ;</tRule>
<tRule>$egammaLike = [GKXCgkxc] ;</tRule>
<tRule>$smooth = ̓ ;</tRule>
<tRule>$rough = ̔ ;</tRule>
<tRule>$iotasub = ͅ ;</tRule>
<tRule>$softener = [βΒγΓδΔζΖλΛμΜνΝρΡ$gvowel] ;</tRule>
<tRule>$under = ̱;</tRule>
<tRule>$caron = ̌;</tRule>
<tRule>$afterLetter = [:L:] [\'$accent]* ;</tRule>
<tRule>$beforeLetter = [\'$accent]* [:L:] ;</tRule>
<comment># Fix punctuation</comment>
<comment># preserve orginal</comment>
<tRule>\: ↔ \: $under ;</tRule>
<tRule>\? ↔ \? $under ;</tRule>
<tRule>\; ↔ \? ;</tRule>
<tRule>· ↔ \: ;</tRule>
<comment># Fix any ancient characters that creep in</comment>
<tRule>͂ → ́ ;</tRule>
<tRule>̂ → ́ ;</tRule>
<tRule>̀ → ́ ;</tRule>
<tRule>$smooth → ;</tRule>
<tRule>$rough → ;</tRule>
<tRule>$iotasub → ;</tRule>
<tRule>ͺ → ;</tRule>
<comment># need to have these up here so the rules don't mask</comment>
<tRule>η ↔ i $under ;</tRule>
<tRule>Η ↔ I $under ;</tRule>
<tRule>Ψ } $beforeLower ↔ Ps ;</tRule>
<tRule>Ψ ↔ PS ;</tRule>
<tRule>ψ ↔ ps ;</tRule>
<tRule>ω ↔ o $under ;</tRule>
<tRule>Ω ↔ O $under;</tRule>
<comment># at begining or end of word, convert mp to b</comment>
<tRule>[^[:L:]$accent] { μπ → b ;</tRule>
<tRule>μπ } [^[:L:]$accent] → b ;</tRule>
<tRule>[^[:L:]$accent] { [Μμ][Ππ] → B ;</tRule>
<tRule>[Μμ][Ππ] } [^[:L:]$accent] → B ;</tRule>
<tRule>μπ ← b ;</tRule>
<tRule>Μπ ← B } $beforeLower ;</tRule>
<tRule>ΜΠ ← B ;</tRule>
<comment># handle diphthongs ending with upsilon</comment>
<tRule>ου ↔ ou ;</tRule>
<tRule>ΟΥ ↔ OU ;</tRule>
<tRule>Ου ↔ Ou ;</tRule>
<tRule>οΥ ↔ oU ;</tRule>
<tRule>$fmaker = [aeiAEI] $under ? ;</tRule>
<tRule>$shiftForwardVowels = [[:Mn:]-[̈]]; # note: a diaeresis keeps the items separate</tRule>
<tRule>$fmaker { υ ( $shiftForwardVowels )* } $softener → $1 v $under ;</tRule>
<tRule>υ $1 ← ( $shiftForwardVowels )* v $under ;</tRule>
<tRule>$fmaker { υ ( $shiftForwardVowels )* } → $1 f $under;</tRule>
<tRule>υ $1 ← ( $shiftForwardVowels )* f $under ;</tRule>
<tRule>$fmaker { Υ } $softener ↔ V $under ;</tRule>
<tRule>$fmaker { Υ ↔ U $under ;</tRule>
<tRule>υ ↔ y ;</tRule>
<tRule>Υ ↔ Y ;</tRule>
<comment># NORMAL</comment>
<tRule>α ↔ a ;</tRule>
<tRule>Α ↔ A ;</tRule>
<tRule>β ↔ v ;</tRule>
<tRule>Β ↔ V ;</tRule>
<tRule>γ } $gammaLike ↔ n } $egammaLike ;</tRule>
<tRule>γ ↔ g ;</tRule>
<tRule>Γ } $gammaLike ↔ N } $egammaLike ;</tRule>
<tRule>Γ ↔ G ;</tRule>
<tRule>δ ↔ d ;</tRule>
<tRule>Δ ↔ D ;</tRule>
<tRule>ε ↔ e ;</tRule>
<tRule>Ε ↔ E ;</tRule>
<tRule>ζ ↔ z ;</tRule>
<tRule>Ζ ↔ Z ;</tRule>
<tRule>θ ↔ th ;</tRule>
<tRule>Θ } $beforeLower ↔ Th ;</tRule>
<tRule>Θ ↔ TH ;</tRule>
<tRule>ι ↔ i ;</tRule>
<tRule>Ι ↔ I ;</tRule>
<tRule>κ ↔ k ;</tRule>
<tRule>Κ ↔ K ;</tRule>
<tRule>λ ↔ l ;</tRule>
<tRule>Λ ↔ L ;</tRule>
<tRule>μ ↔ m ;</tRule>
<tRule>Μ ↔ M ;</tRule>
<tRule>ν } $gammaLike → n\' ;</tRule>
<tRule>ν ↔ n ;</tRule>
<tRule>Ν } $gammaLike ↔ N\' ;</tRule>
<tRule>Ν ↔ N ;</tRule>
<tRule>ξ ↔ x ;</tRule>
<tRule>Ξ ↔ X ;</tRule>
<tRule>ο ↔ o ;</tRule>
<tRule>Ο ↔ O ;</tRule>
<tRule>π ↔ p ;</tRule>
<tRule>Π ↔ P ;</tRule>
<tRule>ρ ↔ r ;</tRule>
<tRule>Ρ ↔ R ;</tRule>
<comment># insert separator before things that turn into s</comment>
<tRule>[Pp] { } [ςσΣϷϸϺϻ] → \' ;</tRule>
<comment># special S variants</comment>
<tRule>Ϸ ↔ Š ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L</tRule>
<tRule>ϸ ↔ š ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L</tRule>
<tRule>Ϻ ↔ Ŝ ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L</tRule>
<tRule>ϻ ↔ ŝ ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L</tRule>
<comment># Caron means exception</comment>
<comment># before a letter, initial</comment>
<tRule>ς } $beforeLetter ↔ s $under } $beforeLetter;</tRule>
<tRule>σ } $beforeLetter ↔ s } $beforeLetter;</tRule>
<comment># otherwise, after a letter = final</comment>
<tRule>$afterLetter { σ ↔ $afterLetter { s $under;</tRule>
<tRule>$afterLetter { ς ↔ $afterLetter { s ;</tRule>
<comment># otherwise (isolated) = initial</comment>
<tRule>ς ↔ s $under;</tRule>
<tRule>σ ↔ s ;</tRule>
<comment># [Pp] { Σ ↔ \'S ;</comment>
<tRule>Σ ↔ S ;</tRule>
<tRule>τ ↔ t ;</tRule>
<tRule>Τ ↔ T ;</tRule>
<tRule>φ ↔ f ;</tRule>
<tRule>Φ ↔ F ;</tRule>
<tRule>χ ↔ ch ;</tRule>
<tRule>Χ } $beforeLower ↔ Ch ;</tRule>
<tRule>Χ ↔ CH ;</tRule>
<comment># Completeness for ASCII</comment>
<comment># $ignore = [[:Mark:]''] * ;</comment>
<tRule>| ch ← h ;</tRule>
<tRule>| k ← c ;</tRule>
<tRule>| i ← j ;</tRule>
<tRule>| k ← q ;</tRule>
<tRule>| b ← u } $vowel ;</tRule>
<tRule>| b ← w } $vowel ;</tRule>
<tRule>| y ← u ;</tRule>
<tRule>| y ← w ;</tRule>
<tRule>| Ch ← H ;</tRule>
<tRule>| K ← C ;</tRule>
<tRule>| I ← J ;</tRule>
<tRule>| K ← Q ;</tRule>
<tRule>| B ← W } $vowel ;</tRule>
<tRule>| B ← U } $vowel ;</tRule>
<tRule>| Y ← W ;</tRule>
<tRule>| Y ← U ;</tRule>
<comment># Completeness for Greek</comment>
<tRule>ϐ → | β ;</tRule>
<tRule>ϑ → | θ ;</tRule>
<tRule>ϒ → | Υ ;</tRule>
<tRule>ϕ → | φ ;</tRule>
<tRule>ϖ → | π ;</tRule>
<tRule>ϰ → | κ ;</tRule>
<tRule>ϱ → | ρ ;</tRule>
<tRule>ϲ → | σ ;</tRule>
<tRule>Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL</tRule>
<tRule>ϳ → j ;</tRule>
<tRule>ϴ → | Θ ;</tRule>
<tRule>ϵ → | ε ;</tRule>
<tRule>µ → | μ ;</tRule>
<comment># delete any trailing ' marks used for roundtripping</comment>
<tRule>← [Ππ] { \' } [Ss] ;</tRule>
<tRule>← [Νν] { \' } $egammaLike ;</tRule>
<tRule>::NFC (NFD) ;</tRule>
<comment># MINIMAL FILTER GENERATED FOR: Latin-Greek/UNGEGN BACKWARD</comment>
<tRule>:: ([[[:Latin:][:Mn:][:Me:]] ['\:?]]) ;</tRule>
</transform>
</transforms>
</supplementalData>