larvitgeodata
Version:
Geo data, primarily ISO territories, languages etc. Data fetched mostly from CLDR.
264 lines (262 loc) • 13.4 kB
text/xml
<!--
Copyright © 1991-2013 Unicode, Inc.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
For terms of use, see http://www.unicode.org/copyright.html
-->
<supplementalData>
<version number="$Revision: 11914 $"/>
<transforms>
<transform source="Greek" target="Latin" direction="both">
<comment># Rules are predicated on running NFD first, and NFC afterwards</comment>
<comment># :: [\u0000-\u007F \u0370-Ͽ [:Greek:] [:nonspacing mark:]] ;</comment>
<comment># MINIMAL FILTER GENERATED FOR: Greek-Latin</comment>
<tRule>:: [;µ·ÄËÏÖÜäëïöüÿ-āĒ-ēĪ-īŌ-ōŪ-ūŸǕ-ǜǞ-ǣǬ-ǭȪ-ȭȰ-ȳ̄̈̓-̔͂-ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϗϛϝϟϡϣϥϧϩϫϭϯ-ϵϷ-\u07FBЁЇёїӒ-ӓӚ-ӟӢ-ӧӪ-ӱӴ-ӵӸ-ӹḔ-ḗḠ-ḡḦ-ḧḮ-ḯḸ-ḹṎ-ṓṜ-ṝṺ-ṻẄ-ẅẌ-ẍẗἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-ῌ῏-ΐῖ-Ί῟-Ῥῲ-ῴῶ-ῼΩϹ] ;</tRule>
<tRule>:: NFD (NFC) ;</tRule>
<comment># TEST CASES</comment>
<comment># Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος</comment>
<comment># ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ</comment>
<comment># ᾳ ῃ ῳ ὃ ὄ</comment>
<comment># ὠς ὡς ὢς ὣς</comment>
<comment># Ὠς Ὡς Ὢς Ὣς</comment>
<comment># ὨΣ ὩΣ ὪΣ ὫΣ</comment>
<comment># Ạ, ạ, Ẹ, ẹ, Ọ, ọ</comment>
<comment># Useful variables</comment>
<tRule>$lower = [[:latin:][:greek:] & [:Ll:]];</tRule>
<tRule>$glower = [[:greek:] & [:Ll:]];</tRule>
<tRule>$upper = [[:latin:][:greek:] & [:Lu:]] ;</tRule>
<tRule>$accent = [:M:] ;</tRule>
<comment># NOTE: restrict to just the Greek & Latin accents that we care about</comment>
<comment># TODO: broaden out once interation is fixed</comment>
<tRule>$accentMinus = [ [̀-ͅ] & [:M:] - [̸]] ;</tRule>
<tRule>$macron = ̄ ;</tRule>
<tRule>$ddot = ̈ ;</tRule>
<tRule>$ddotmac = [$ddot$macron];</tRule>
<tRule>$lcgvowel = [αεηιουω] ;</tRule>
<tRule>$ucgvowel = [ΑΕΗΙΟΥΩ] ;</tRule>
<tRule>$gvowel = [$lcgvowel $ucgvowel] ;</tRule>
<tRule>$lcgvowelC = [$lcgvowel $accent] ;</tRule>
<tRule>$evowel = [aeiouyAEIOUY];</tRule>
<tRule>$evowel2 = [iuyIUY];</tRule>
<tRule>$vowel = [ $evowel $gvowel] ;</tRule>
<tRule>$gammaLike = [ΓΚΞΧγκξχϰ] ;</tRule>
<tRule>$egammaLike = [GKXCgkxc] ;</tRule>
<tRule>$smooth = ̓ ;</tRule>
<tRule>$rough = ̔ ;</tRule>
<tRule>$iotasub = ͅ ;</tRule>
<tRule>$evowel_i = [$evowel-[iI]] ;</tRule>
<tRule>$evowel2_i = [uyUY];</tRule>
<tRule>$underbar = ̱;</tRule>
<tRule>$afterLetter = [:L:] [[:M:]\']* ;</tRule>
<tRule>$beforeLetter = [[:M:]\']* [:L:] ;</tRule>
<tRule>$beforeLower = $accent * $lower ;</tRule>
<tRule>$notLetter = [^[:L:][:M:]] ;</tRule>
<tRule>$under = ̱;</tRule>
<comment># Fix punctuation</comment>
<comment># preserve original</comment>
<tRule>\: ↔ \: $under ;</tRule>
<tRule>\? ↔ \? $under ;</tRule>
<tRule>\; ↔ \? ;</tRule>
<tRule>· ↔ \: ;</tRule>
<comment># CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve</comment>
<tRule>͂ ↔ ̂ ;</tRule>
<comment># IOTA: convert iota subscript to iota</comment>
<comment># first make previous alpha long!</comment>
<tRule>$accent_minus = [[$accent]-[$iotasub$macron]];</tRule>
<tRule>Α } $accent_minus * $iotasub → | Α $macron ;</tRule>
<tRule>α } $accent_minus * $iotasub → | α $macron ;</tRule>
<comment># now convert to uppercase if after uppercase, ow to lowercase</comment>
<tRule>$upper $accent * { $iotasub → I ;</tRule>
<tRule>$iotasub → i ;</tRule>
<tRule>| $1 $iotasub ← ($evowel $macron $accentMinus *) i ;</tRule>
<tRule>| $1 $iotasub ← ($evowel $macron $accentMinus *) I ;</tRule>
<comment># BREATHING</comment>
<comment># Convert rough breathing to h, and move before letters.</comment>
<comment># Make A ` x = → H a x</comment>
<tRule>Α ($macron?) $rough } $beforeLower → H | α $1;</tRule>
<tRule>Ε $rough } $beforeLower → H | ε;</tRule>
<tRule>Η $rough } $beforeLower → H | η ;</tRule>
<tRule>Ι ($ddot?) $rough } $beforeLower → H | ι $1;</tRule>
<tRule>Ο $rough } $beforeLower → H | ο ;</tRule>
<tRule>Υ $rough } $beforeLower → H | υ ;</tRule>
<tRule>Ω ($ddot?) $rough } $beforeLower → H | ω $1;</tRule>
<comment># Make A x ` = → H a x</comment>
<tRule>Α ($glower $macron?) $rough → H | α $1 ;</tRule>
<tRule>Ε ($glower) $rough → H | ε $1 ;</tRule>
<tRule>Η ($glower) $rough → H | η $1 ;</tRule>
<tRule>Ι ($glower $ddot?) $rough → H | ι $1 ;</tRule>
<tRule>Ο ($glower) $rough → H | ο $1 ;</tRule>
<tRule>Υ ($glower) $rough → H | υ $1 ;</tRule>
<tRule>Ω ($glower $ddot?) $rough → H | ω $1 ;</tRule>
<comment>#Otherwise, make x ` into h x and X ` into H X</comment>
<tRule>($lcgvowel + $ddotmac? ) $rough → h | $1 ;</tRule>
<tRule>($gvowel + $ddotmac? ) $rough → H | $1 ;</tRule>
<comment># Go backwards with H</comment>
<tRule>| $1 $rough ← h ($evowel $macron $ddot? $evowel2_i $macron?) ;</tRule>
<tRule>| $1 $rough ← h ($evowel $ddot? $evowel2 $macron?) ;</tRule>
<tRule>| $1 $rough ← h ($evowel $macron? $ddot?) ;</tRule>
<tRule>| $1 $rough ← H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ;</tRule>
<tRule>| $1 $rough ← H ([AEIOUY] $ddot? $evowel2 $macron?) ;</tRule>
<tRule>| $1 $rough ← H ([AEIOUY] $macron? $ddot?) ;</tRule>
<comment># titlecase, have to fix individually</comment>
<comment># in the future, we should add &uppercase() to make this easier</comment>
<tRule>| A $1 $rough ← H a ($macron $ddot? $evowel2_i $macron?) ;</tRule>
<tRule>| E $1 $rough ← H e ($macron $ddot? $evowel2_i $macron?) ;</tRule>
<tRule>| I $1 $rough ← H i ($macron $ddot? $evowel2_i $macron?) ;</tRule>
<tRule>| O $1 $rough ← H o ($macron $ddot? $evowel2_i $macron?) ;</tRule>
<tRule>| U $1 $rough ← H u ($macron $ddot? $evowel2_i $macron?) ;</tRule>
<tRule>| Y $1 $rough ← H y ($macron $ddot? $evowel2_i $macron?) ;</tRule>
<tRule>| A $1 $rough ← H a ($ddot? $evowel2 $macron?) ;</tRule>
<tRule>| E $1 $rough ← H e ($ddot? $evowel2 $macron?) ;</tRule>
<tRule>| I $1 $rough ← H i ($ddot? $evowel2 $macron?) ;</tRule>
<tRule>| O $1 $rough ← H o ($ddot? $evowel2 $macron?) ;</tRule>
<tRule>| U $1 $rough ← H u ($ddot? $evowel2 $macron?) ;</tRule>
<tRule>| Y $1 $rough ← H y ($ddot? $evowel2 $macron?) ;</tRule>
<tRule>| A $1 $rough ← H a ($macron? $ddot? ) ;</tRule>
<tRule>| E $1 $rough ← H e ($macron? $ddot? ) ;</tRule>
<tRule>| I $1 $rough ← H i ($macron? $ddot? ) ;</tRule>
<tRule>| O $1 $rough ← H o ($macron? $ddot? ) ;</tRule>
<tRule>| U $1 $rough ← H u ($macron? $ddot? ) ;</tRule>
<tRule>| Y $1 $rough ← H y ($macron? $ddot? ) ;</tRule>
<comment># Now do smooth</comment>
<comment>#delete smooth breathing for Latin</comment>
<tRule>$smooth → ;</tRule>
<comment># insert in Greek</comment>
<comment># the assumption is that all Marks are on letters.</comment>
<tRule>| $1 $smooth ← $notLetter { ([rR]) } [^hH$smooth$rough] ;</tRule>
<tRule>| $1 $smooth ← $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ;</tRule>
<tRule>| $1 $smooth ← $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ;</tRule>
<comment># TODO: preserve smooth/rough breathing if not</comment>
<comment># on initial vowel sequence</comment>
<comment># need to have these up here so the rules don't mask</comment>
<comment># remove now superfluous macron when returning</comment>
<tRule>Α ← A $macron ;</tRule>
<tRule>α ← a $macron ;</tRule>
<tRule>η ↔ e $macron ;</tRule>
<tRule>Η ↔ E $macron ;</tRule>
<tRule>φ ↔ ph ;</tRule>
<tRule>Ψ } $beforeLower ↔ Ps ;</tRule>
<tRule>Ψ ↔ PS ;</tRule>
<tRule>Φ } $beforeLower ↔ Ph ;</tRule>
<tRule>Φ ↔ PH ;</tRule>
<tRule>ψ ↔ ps ;</tRule>
<tRule>ω ↔ o $macron ;</tRule>
<tRule>Ω ↔ O $macron;</tRule>
<comment># NORMAL</comment>
<tRule>α ↔ a ;</tRule>
<tRule>Α ↔ A ;</tRule>
<tRule>β ↔ b ;</tRule>
<tRule>Β ↔ B ;</tRule>
<tRule>γ } $gammaLike ↔ n } $egammaLike ;</tRule>
<tRule>γ ↔ g ;</tRule>
<tRule>Γ } $gammaLike ↔ N } $egammaLike ;</tRule>
<tRule>Γ ↔ G ;</tRule>
<tRule>δ ↔ d ;</tRule>
<tRule>Δ ↔ D ;</tRule>
<tRule>ε ↔ e ;</tRule>
<tRule>Ε ↔ E ;</tRule>
<tRule>ζ ↔ z ;</tRule>
<tRule>Ζ ↔ Z ;</tRule>
<tRule>θ ↔ th ;</tRule>
<tRule>Θ } $beforeLower ↔ Th ;</tRule>
<tRule>Θ ↔ TH ;</tRule>
<tRule>ι ↔ i ;</tRule>
<tRule>Ι ↔ I ;</tRule>
<tRule>κ ↔ k ;</tRule>
<tRule>Κ ↔ K ;</tRule>
<tRule>λ ↔ l ;</tRule>
<tRule>Λ ↔ L ;</tRule>
<tRule>μ ↔ m ;</tRule>
<tRule>Μ ↔ M ;</tRule>
<tRule>ν } $gammaLike → n\' ;</tRule>
<tRule>ν ↔ n ;</tRule>
<tRule>Ν } $gammaLike ↔ N\' ;</tRule>
<tRule>Ν ↔ N ;</tRule>
<tRule>ξ ↔ x ;</tRule>
<tRule>Ξ ↔ X ;</tRule>
<tRule>ο ↔ o ;</tRule>
<tRule>Ο ↔ O ;</tRule>
<tRule>π ↔ p ;</tRule>
<tRule>Π ↔ P ;</tRule>
<tRule>ρ $rough ↔ rh;</tRule>
<tRule>Ρ $rough } $beforeLower ↔ Rh ;</tRule>
<tRule>Ρ $rough ↔ RH ;</tRule>
<tRule>ρ ↔ r ;</tRule>
<tRule>Ρ ↔ R ;</tRule>
<comment># insert separator before things that turn into s</comment>
<tRule>[Pp] { } [ςσΣϷϸϺϻ] → \' ;</tRule>
<comment># special S variants</comment>
<tRule>Ϸ ↔ Š ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L</tRule>
<tRule>ϸ ↔ š ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L</tRule>
<tRule>Ϻ ↔ Ŝ ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L</tRule>
<tRule>ϻ ↔ ŝ ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L</tRule>
<comment># underbar means exception</comment>
<comment># before a letter, initial</comment>
<tRule>ς } $beforeLetter ↔ s $underbar } $beforeLetter;</tRule>
<tRule>σ } $beforeLetter ↔ s } $beforeLetter;</tRule>
<comment># otherwise, after a letter = final</comment>
<tRule>$afterLetter { σ ↔ $afterLetter { s $underbar;</tRule>
<tRule>$afterLetter { ς ↔ $afterLetter { s ;</tRule>
<comment># otherwise (isolated) = initial</comment>
<tRule>ς ↔ s $underbar;</tRule>
<tRule>σ ↔ s ;</tRule>
<comment># [Pp] { Σ ↔ \'S ;</comment>
<tRule>Σ ↔ S ;</tRule>
<tRule>τ ↔ t ;</tRule>
<tRule>Τ ↔ T ;</tRule>
<tRule>$vowel {υ } ↔ u ;</tRule>
<tRule>υ ↔ y ;</tRule>
<tRule>$vowel { Υ ↔ U ;</tRule>
<tRule>Υ ↔ Y ;</tRule>
<tRule>χ ↔ ch ;</tRule>
<tRule>Χ } $beforeLower ↔ Ch ;</tRule>
<tRule>Χ ↔ CH ;</tRule>
<comment># Completeness for ASCII</comment>
<tRule>$ignore = [[:Mark:]''] * ;</tRule>
<tRule>| k ← c ;</tRule>
<tRule>| ph ← f ;</tRule>
<tRule>| i ← j ;</tRule>
<tRule>| k ← q ;</tRule>
<tRule>| b ← v } $vowel ;</tRule>
<tRule>| b ← w } $vowel;</tRule>
<tRule>| u ← v ;</tRule>
<tRule>| u ← w;</tRule>
<tRule>| K ← C ;</tRule>
<tRule>| Ph ← F ;</tRule>
<tRule>| I ← J ;</tRule>
<tRule>| K ← Q ;</tRule>
<tRule>| B ← V } $vowel ;</tRule>
<tRule>| B ← W } $vowel ;</tRule>
<tRule>| U ← V ;</tRule>
<tRule>| U ← W ;</tRule>
<tRule>$rough } $ignore [:UppercaseLetter:] → H ;</tRule>
<tRule>$ignore [:UppercaseLetter:] { $rough → H ;</tRule>
<tRule>$rough ← H ;</tRule>
<tRule>$rough ↔ h ;</tRule>
<comment># Completeness for Greek</comment>
<tRule>ϐ → | β ;</tRule>
<tRule>ϑ → | θ ;</tRule>
<tRule>ϒ → | Υ ;</tRule>
<tRule>ϕ → | φ ;</tRule>
<tRule>ϖ → | π ;</tRule>
<tRule>ϰ → | κ ;</tRule>
<tRule>ϱ → | ρ ;</tRule>
<tRule>ϲ → | σ ;</tRule>
<tRule>Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL</tRule>
<tRule>ϳ → j ;</tRule>
<tRule>ϴ → | Θ ;</tRule>
<tRule>ϵ → | ε ;</tRule>
<tRule>µ → | μ ;</tRule>
<tRule>ͺ → i;</tRule>
<comment># delete any trailing ' marks used for roundtripping</comment>
<tRule>← [Ππ] { \' } [Ss] ;</tRule>
<tRule>← [Νν] { \' } $egammaLike ;</tRule>
<tRule>::NFC (NFD) ;</tRule>
<comment># ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ;</comment>
<comment># ([\u0000-\u007F · [:Latin:] [:nonspacing mark:]]) ;</comment>
<comment># MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD</comment>
<tRule>:: ( [':?A-Za-zÀ-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳ̀-̷̹-ͅ΅-ΆΈ-ΊΌΎ-ΐΪ-ΰϊ-ώϓ-ϔЀ-ЁЃЇЌ-ЎЙйѐ-ёѓїќ-ўѶ-ѷӁ-ӂӐ-ӓӖ-ӗӚ-ӟӢ-ӧӪ-ӵӸ-ӹḀ-ẙẛẠ-ỹἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼ῁-ῄῆ-ΐῖ-Ί῝-΅ῲ-ῴῶ-ῼK-Å] ) ;</tRule>
</transform>
</transforms>
</supplementalData>