larvitgeodata

<?xml version="1.0" encoding="UTF-8" ?> <!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">  <supplementalData> <version number="$Revision: 11914 $"/> <transforms> <transform source="Greek" target="Latin" direction="both"> <comment># Rules are predicated on running NFD first, and NFC afterwards</comment> <comment># :: [\u0000-\u007F \u0370-Ͽ [:Greek:] [:nonspacing mark:]] ;</comment> <comment># MINIMAL FILTER GENERATED FOR: Greek-Latin</comment> <tRule>:: [;µ·ÄËÏÖÜäëïöüÿ-āĒ-ēĪ-īŌ-ōŪ-ūŸǕ-ǜǞ-ǣǬ-ǭȪ-ȭȰ-ȳ̄̈̓-̔͂-ͅͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϗϛϝϟϡϣϥϧϩϫϭϯ-ϵϷ-\u07FBЁЇёїӒ-ӓӚ-ӟӢ-ӧӪ-ӱӴ-ӵӸ-ӹḔ-ḗḠ-ḡḦ-ḧḮ-ḯḸ-ḹṎ-ṓṜ-ṝṺ-ṻẄ-ẅẌ-ẍẗἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-ῌ῏-ΐῖ-Ί῟-Ῥῲ-ῴῶ-ῼΩϹ] ;</tRule> <tRule>:: NFD (NFC) ;</tRule> <comment># TEST CASES</comment> <comment># Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος</comment> <comment># ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ</comment> <comment># ᾳ ῃ ῳ ὃ ὄ</comment> <comment># ὠς ὡς ὢς ὣς</comment> <comment># Ὠς Ὡς Ὢς Ὣς</comment> <comment># ὨΣ ὩΣ ὪΣ ὫΣ</comment> <comment># Ạ, ạ, Ẹ, ẹ, Ọ, ọ</comment> <comment># Useful variables</comment> <tRule>$lower = [[:latin:][:greek:] & [:Ll:]];</tRule> <tRule>$glower = [[:greek:] & [:Ll:]];</tRule> <tRule>$upper = [[:latin:][:greek:] & [:Lu:]] ;</tRule> <tRule>$accent = [:M:] ;</tRule> <comment># NOTE: restrict to just the Greek & Latin accents that we care about</comment> <comment># TODO: broaden out once interation is fixed</comment> <tRule>$accentMinus = [ [̀-ͅ] & [:M:] - [̸]] ;</tRule> <tRule>$macron = ̄ ;</tRule> <tRule>$ddot = ̈ ;</tRule> <tRule>$ddotmac = [$ddot$macron];</tRule> <tRule>$lcgvowel = [αεηιουω] ;</tRule> <tRule>$ucgvowel = [ΑΕΗΙΟΥΩ] ;</tRule> <tRule>$gvowel = [$lcgvowel $ucgvowel] ;</tRule> <tRule>$lcgvowelC = [$lcgvowel $accent] ;</tRule> <tRule>$evowel = [aeiouyAEIOUY];</tRule> <tRule>$evowel2 = [iuyIUY];</tRule> <tRule>$vowel = [ $evowel $gvowel] ;</tRule> <tRule>$gammaLike = [ΓΚΞΧγκξχϰ] ;</tRule> <tRule>$egammaLike = [GKXCgkxc] ;</tRule> <tRule>$smooth = ̓ ;</tRule> <tRule>$rough = ̔ ;</tRule> <tRule>$iotasub = ͅ ;</tRule> <tRule>$evowel_i = [$evowel-[iI]] ;</tRule> <tRule>$evowel2_i = [uyUY];</tRule> <tRule>$underbar = ̱;</tRule> <tRule>$afterLetter = [:L:] [[:M:]\']* ;</tRule> <tRule>$beforeLetter = [[:M:]\']* [:L:] ;</tRule> <tRule>$beforeLower = $accent * $lower ;</tRule> <tRule>$notLetter = [^[:L:][:M:]] ;</tRule> <tRule>$under = ̱;</tRule> <comment># Fix punctuation</comment> <comment># preserve original</comment> <tRule>\: ↔ \: $under ;</tRule> <tRule>\? ↔ \? $under ;</tRule> <tRule>\; ↔ \? ;</tRule> <tRule>· ↔ \: ;</tRule> <comment># CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve</comment> <tRule>͂ ↔ ̂ ;</tRule> <comment># IOTA: convert iota subscript to iota</comment> <comment># first make previous alpha long!</comment> <tRule>$accent_minus = [[$accent]-[$iotasub$macron]];</tRule> <tRule>Α } $accent_minus * $iotasub → | Α $macron ;</tRule> <tRule>α } $accent_minus * $iotasub → | α $macron ;</tRule> <comment># now convert to uppercase if after uppercase, ow to lowercase</comment> <tRule>$upper $accent * { $iotasub → I ;</tRule> <tRule>$iotasub → i ;</tRule> <tRule>| $1 $iotasub ← ($evowel $macron $accentMinus *) i ;</tRule> <tRule>| $1 $iotasub ← ($evowel $macron $accentMinus *) I ;</tRule> <comment># BREATHING</comment> <comment># Convert rough breathing to h, and move before letters.</comment> <comment># Make A ` x = → H a x</comment> <tRule>Α ($macron?) $rough } $beforeLower → H | α $1;</tRule> <tRule>Ε $rough } $beforeLower → H | ε;</tRule> <tRule>Η $rough } $beforeLower → H | η ;</tRule> <tRule>Ι ($ddot?) $rough } $beforeLower → H | ι $1;</tRule> <tRule>Ο $rough } $beforeLower → H | ο ;</tRule> <tRule>Υ $rough } $beforeLower → H | υ ;</tRule> <tRule>Ω ($ddot?) $rough } $beforeLower → H | ω $1;</tRule> <comment># Make A x ` = → H a x</comment> <tRule>Α ($glower $macron?) $rough → H | α $1 ;</tRule> <tRule>Ε ($glower) $rough → H | ε $1 ;</tRule> <tRule>Η ($glower) $rough → H | η $1 ;</tRule> <tRule>Ι ($glower $ddot?) $rough → H | ι $1 ;</tRule> <tRule>Ο ($glower) $rough → H | ο $1 ;</tRule> <tRule>Υ ($glower) $rough → H | υ $1 ;</tRule> <tRule>Ω ($glower $ddot?) $rough → H | ω $1 ;</tRule> <comment>#Otherwise, make x ` into h x and X ` into H X</comment> <tRule>($lcgvowel + $ddotmac? ) $rough → h | $1 ;</tRule> <tRule>($gvowel + $ddotmac? ) $rough → H | $1 ;</tRule> <comment># Go backwards with H</comment> <tRule>| $1 $rough ← h ($evowel $macron $ddot? $evowel2_i $macron?) ;</tRule> <tRule>| $1 $rough ← h ($evowel $ddot? $evowel2 $macron?) ;</tRule> <tRule>| $1 $rough ← h ($evowel $macron? $ddot?) ;</tRule> <tRule>| $1 $rough ← H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ;</tRule> <tRule>| $1 $rough ← H ([AEIOUY] $ddot? $evowel2 $macron?) ;</tRule> <tRule>| $1 $rough ← H ([AEIOUY] $macron? $ddot?) ;</tRule> <comment># titlecase, have to fix individually</comment> <comment># in the future, we should add &uppercase() to make this easier</comment> <tRule>| A $1 $rough ← H a ($macron $ddot? $evowel2_i $macron?) ;</tRule> <tRule>| E $1 $rough ← H e ($macron $ddot? $evowel2_i $macron?) ;</tRule> <tRule>| I $1 $rough ← H i ($macron $ddot? $evowel2_i $macron?) ;</tRule> <tRule>| O $1 $rough ← H o ($macron $ddot? $evowel2_i $macron?) ;</tRule> <tRule>| U $1 $rough ← H u ($macron $ddot? $evowel2_i $macron?) ;</tRule> <tRule>| Y $1 $rough ← H y ($macron $ddot? $evowel2_i $macron?) ;</tRule> <tRule>| A $1 $rough ← H a ($ddot? $evowel2 $macron?) ;</tRule> <tRule>| E $1 $rough ← H e ($ddot? $evowel2 $macron?) ;</tRule> <tRule>| I $1 $rough ← H i ($ddot? $evowel2 $macron?) ;</tRule> <tRule>| O $1 $rough ← H o ($ddot? $evowel2 $macron?) ;</tRule> <tRule>| U $1 $rough ← H u ($ddot? $evowel2 $macron?) ;</tRule> <tRule>| Y $1 $rough ← H y ($ddot? $evowel2 $macron?) ;</tRule> <tRule>| A $1 $rough ← H a ($macron? $ddot? ) ;</tRule> <tRule>| E $1 $rough ← H e ($macron? $ddot? ) ;</tRule> <tRule>| I $1 $rough ← H i ($macron? $ddot? ) ;</tRule> <tRule>| O $1 $rough ← H o ($macron? $ddot? ) ;</tRule> <tRule>| U $1 $rough ← H u ($macron? $ddot? ) ;</tRule> <tRule>| Y $1 $rough ← H y ($macron? $ddot? ) ;</tRule> <comment># Now do smooth</comment> <comment>#delete smooth breathing for Latin</comment> <tRule>$smooth → ;</tRule> <comment># insert in Greek</comment> <comment># the assumption is that all Marks are on letters.</comment> <tRule>| $1 $smooth ← $notLetter { ([rR]) } [^hH$smooth$rough] ;</tRule> <tRule>| $1 $smooth ← $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ;</tRule> <tRule>| $1 $smooth ← $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ;</tRule> <comment># TODO: preserve smooth/rough breathing if not</comment> <comment># on initial vowel sequence</comment> <comment># need to have these up here so the rules don't mask</comment> <comment># remove now superfluous macron when returning</comment> <tRule>Α ← A $macron ;</tRule> <tRule>α ← a $macron ;</tRule> <tRule>η ↔ e $macron ;</tRule> <tRule>Η ↔ E $macron ;</tRule> <tRule>φ ↔ ph ;</tRule> <tRule>Ψ } $beforeLower ↔ Ps ;</tRule> <tRule>Ψ ↔ PS ;</tRule> <tRule>Φ } $beforeLower ↔ Ph ;</tRule> <tRule>Φ ↔ PH ;</tRule> <tRule>ψ ↔ ps ;</tRule> <tRule>ω ↔ o $macron ;</tRule> <tRule>Ω ↔ O $macron;</tRule> <comment># NORMAL</comment> <tRule>α ↔ a ;</tRule> <tRule>Α ↔ A ;</tRule> <tRule>β ↔ b ;</tRule> <tRule>Β ↔ B ;</tRule> <tRule>γ } $gammaLike ↔ n } $egammaLike ;</tRule> <tRule>γ ↔ g ;</tRule> <tRule>Γ } $gammaLike ↔ N } $egammaLike ;</tRule> <tRule>Γ ↔ G ;</tRule> <tRule>δ ↔ d ;</tRule> <tRule>Δ ↔ D ;</tRule> <tRule>ε ↔ e ;</tRule> <tRule>Ε ↔ E ;</tRule> <tRule>ζ ↔ z ;</tRule> <tRule>Ζ ↔ Z ;</tRule> <tRule>θ ↔ th ;</tRule> <tRule>Θ } $beforeLower ↔ Th ;</tRule> <tRule>Θ ↔ TH ;</tRule> <tRule>ι ↔ i ;</tRule> <tRule>Ι ↔ I ;</tRule> <tRule>κ ↔ k ;</tRule> <tRule>Κ ↔ K ;</tRule> <tRule>λ ↔ l ;</tRule> <tRule>Λ ↔ L ;</tRule> <tRule>μ ↔ m ;</tRule> <tRule>Μ ↔ M ;</tRule> <tRule>ν } $gammaLike → n\' ;</tRule> <tRule>ν ↔ n ;</tRule> <tRule>Ν } $gammaLike ↔ N\' ;</tRule> <tRule>Ν ↔ N ;</tRule> <tRule>ξ ↔ x ;</tRule> <tRule>Ξ ↔ X ;</tRule> <tRule>ο ↔ o ;</tRule> <tRule>Ο ↔ O ;</tRule> <tRule>π ↔ p ;</tRule> <tRule>Π ↔ P ;</tRule> <tRule>ρ $rough ↔ rh;</tRule> <tRule>Ρ $rough } $beforeLower ↔ Rh ;</tRule> <tRule>Ρ $rough ↔ RH ;</tRule> <tRule>ρ ↔ r ;</tRule> <tRule>Ρ ↔ R ;</tRule> <comment># insert separator before things that turn into s</comment> <tRule>[Pp] { } [ςσΣϷϸϺϻ] → \' ;</tRule> <comment># special S variants</comment> <tRule>Ϸ ↔ Š ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L</tRule> <tRule>ϸ ↔ š ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L</tRule> <tRule>Ϻ ↔ Ŝ ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L</tRule> <tRule>ϻ ↔ ŝ ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L</tRule> <comment># underbar means exception</comment> <comment># before a letter, initial</comment> <tRule>ς } $beforeLetter ↔ s $underbar } $beforeLetter;</tRule> <tRule>σ } $beforeLetter ↔ s } $beforeLetter;</tRule> <comment># otherwise, after a letter = final</comment> <tRule>$afterLetter { σ ↔ $afterLetter { s $underbar;</tRule> <tRule>$afterLetter { ς ↔ $afterLetter { s ;</tRule> <comment># otherwise (isolated) = initial</comment> <tRule>ς ↔ s $underbar;</tRule> <tRule>σ ↔ s ;</tRule> <comment># [Pp] { Σ ↔ \'S ;</comment> <tRule>Σ ↔ S ;</tRule> <tRule>τ ↔ t ;</tRule> <tRule>Τ ↔ T ;</tRule> <tRule>$vowel {υ } ↔ u ;</tRule> <tRule>υ ↔ y ;</tRule> <tRule>$vowel { Υ ↔ U ;</tRule> <tRule>Υ ↔ Y ;</tRule> <tRule>χ ↔ ch ;</tRule> <tRule>Χ } $beforeLower ↔ Ch ;</tRule> <tRule>Χ ↔ CH ;</tRule> <comment># Completeness for ASCII</comment> <tRule>$ignore = [[:Mark:]''] * ;</tRule> <tRule>| k ← c ;</tRule> <tRule>| ph ← f ;</tRule> <tRule>| i ← j ;</tRule> <tRule>| k ← q ;</tRule> <tRule>| b ← v } $vowel ;</tRule> <tRule>| b ← w } $vowel;</tRule> <tRule>| u ← v ;</tRule> <tRule>| u ← w;</tRule> <tRule>| K ← C ;</tRule> <tRule>| Ph ← F ;</tRule> <tRule>| I ← J ;</tRule> <tRule>| K ← Q ;</tRule> <tRule>| B ← V } $vowel ;</tRule> <tRule>| B ← W } $vowel ;</tRule> <tRule>| U ← V ;</tRule> <tRule>| U ← W ;</tRule> <tRule>$rough } $ignore [:UppercaseLetter:] → H ;</tRule> <tRule>$ignore [:UppercaseLetter:] { $rough → H ;</tRule> <tRule>$rough ← H ;</tRule> <tRule>$rough ↔ h ;</tRule> <comment># Completeness for Greek</comment> <tRule>ϐ → | β ;</tRule> <tRule>ϑ → | θ ;</tRule> <tRule>ϒ → | Υ ;</tRule> <tRule>ϕ → | φ ;</tRule> <tRule>ϖ → | π ;</tRule> <tRule>ϰ → | κ ;</tRule> <tRule>ϱ → | ρ ;</tRule> <tRule>ϲ → | σ ;</tRule> <tRule>Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL</tRule> <tRule>ϳ → j ;</tRule> <tRule>ϴ → | Θ ;</tRule> <tRule>ϵ → | ε ;</tRule> <tRule>µ → | μ ;</tRule> <tRule>ͺ → i;</tRule> <comment># delete any trailing ' marks used for roundtripping</comment> <tRule>← [Ππ] { \' } [Ss] ;</tRule> <tRule>← [Νν] { \' } $egammaLike ;</tRule> <tRule>::NFC (NFD) ;</tRule> <comment># ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ;</comment> <comment># ([\u0000-\u007F · [:Latin:] [:nonspacing mark:]]) ;</comment> <comment># MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD</comment> <tRule>:: ( [':?A-Za-zÀ-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳ̀-̷̹-ͅ΅-ΆΈ-ΊΌΎ-ΐΪ-ΰϊ-ώϓ-ϔЀ-ЁЃЇЌ-ЎЙйѐ-ёѓїќ-ўѶ-ѷӁ-ӂӐ-ӓӖ-ӗӚ-ӟӢ-ӧӪ-ӵӸ-ӹḀ-ẙẛẠ-ỹἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼ῁-ῄῆ-ΐῖ-Ί῝-΅ῲ-ῴῶ-ῼK-Å] ) ;</tRule> </transform> </transforms> </supplementalData>