larvitgeodata
Version:
Geo data, primarily ISO territories, languages etc. Data fetched mostly from CLDR.
43 lines (41 loc) • 2.69 kB
text/xml
<!--
Copyright © 1991-2013 Unicode, Inc.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
For terms of use, see http://www.unicode.org/copyright.html
-->
<supplementalData>
<version number="$Revision: 11914 $"/>
<transforms>
<transform source="Latin" target="NumericPinyin" direction="both">
<comment># According to the pinyin definitions I've been able to find:</comment>
<comment># 'a', 'e' are the preferred bases</comment>
<comment># otherwise 'o'</comment>
<comment># otherwise last vowel</comment>
<comment># The trailing form of syllables are the following:</comment>
<comment># "a", "ai", "ao", "an", "ang",</comment>
<comment># "o", "ou", "ong",</comment>
<comment># "e", "ei", "er", "en", "eng",</comment>
<comment># "i", "ia", "iao", "ie", "iu", "ian", "in", "iang", "ing", "iong",</comment>
<comment># "u", "ua", "uo", "uai", "ui", "uan", "un", "uang", "ueng",</comment>
<comment># "ü", "üe", "üan", "ün"</comment>
<comment># so the letters the tone will 'hop' are:</comment>
<tRule>::NFD (NFC);</tRule>
<tRule>$tone = [̄́̌̀̆] ;</tRule>
<comment># Move the tone to the end of a syllable, and convert to number</comment>
<tRule>e {($tone) r} → r &Pinyin-NumericPinyin($1);</tRule>
<tRule>($tone) ( [i o n u {o n} {n g}]) → $2 &Pinyin-NumericPinyin($1);</tRule>
<tRule>($tone) → &Pinyin-NumericPinyin($1);</tRule>
<comment># The following backs up until it finds the right vowel, then deposits the tone</comment>
<tRule>$vowel = [aAeEiIoOuU {ü} {Ü} vV];</tRule>
<tRule>$consonant = [[a-z A-Z] - [$vowel]];</tRule>
<tRule>$digit = [1-5];</tRule>
<tRule>$1 &NumericPinyin-Pinyin($3) $2 ← ([aAeE]) ($vowel* $consonant*) ($digit);</tRule>
<tRule>$1 &NumericPinyin-Pinyin($3) $2 ← ([oO]) ([$vowel-[aeAE]]* $consonant*) ($digit);</tRule>
<tRule>$1 &NumericPinyin-Pinyin($3) $2 ← ($vowel) ($consonant*) ($digit);</tRule>
<tRule>&NumericPinyin-Pinyin($1) ← [:letter:] {($digit)};</tRule>
<tRule>::NFC (NFD);</tRule>
</transform>
</transforms>
</supplementalData>