larvitgeodata
Version:
Geo data, primarily ISO territories, languages etc. Data fetched mostly from CLDR.
77 lines (75 loc) • 4.01 kB
text/xml
<!--
Copyright © 1991-2013 Unicode, Inc.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
For terms of use, see http://www.unicode.org/copyright.html
-->
<supplementalData>
<version number="$Revision: 11914 $"/>
<transforms>
<transform source="Han" target="Latin" direction="forward" variant="Names">
<comment># This transform is primarily intended to produce readings for Chinese surnames, or for full</comment>
<comment># Chinese personal names - surname first - that occur at the beginning of a contiguous Han substring</comment>
<comment># (i.e. at the beginning of text, or immediately preceded by space or other non-Han characters).</comment>
<comment># Several Han characters have different readings in surnames, than the readings found in Han-Latin.</comment>
<comment># ----</comment>
<comment># Insert marker at start of each Han sequence (including Han after space).</comment>
<comment># Do this before ::Han-Spacedhan() to catch Han after space in original text,</comment>
<comment># and to apply before all other rules.</comment>
<tRule>$startOfHanMarker = \uFDD1;</tRule>
<tRule>[:^script=Han:] { ([:script=Han:]) → $startOfHanMarker $1;</tRule>
<comment># Need Spacedhan so the name transliterations get spaced properly</comment>
<tRule>::Han-Spacedhan();</tRule>
<comment># Convert special name readings that depend on next character</comment>
<tRule>令 } \u0020? 狐 →líng;</tRule>
<tRule>万 } \u0020? 俟 →mò;</tRule>
<tRule>澹 } \u0020? 台 →tán;</tRule>
<comment># The following maps 长 to the standard Han-Latin reading zhǎng for this case,</comment>
<comment># to override the normal Han-Latin/Names reading 长→cháng further below</comment>
<tRule>$startOfHanMarker{ 长 } \u0020? 孙 →zhǎng;</tRule>
<comment># Convert single characters with special name readings</comment>
<tRule>$startOfHanMarker{ 秘→bì;</tRule>
<tRule>$startOfHanMarker{ 卜→bǔ;</tRule>
<tRule>长→cháng;</tRule>
<tRule>$startOfHanMarker{ 种→chóng;</tRule>
<tRule>$startOfHanMarker{ 重→chóng;</tRule>
<tRule>$startOfHanMarker{ 刀→diāo;</tRule>
<tRule>干→gān;</tRule>
<tRule>葛→gě;</tRule>
<tRule>$startOfHanMarker{ 盖→gě;</tRule>
<tRule>$startOfHanMarker{ 过→guō;</tRule>
<tRule>$startOfHanMarker{ 华→huà;</tRule>
<tRule>$startOfHanMarker{ 纪→jǐ;</tRule>
<tRule>筠→jūn;</tRule>
<tRule>$startOfHanMarker{ 牟→mù;</tRule>
<tRule>$startOfHanMarker{ 区→ōu;</tRule>
<tRule>$startOfHanMarker{ 繁→pó;</tRule>
<tRule>仇→qiú;</tRule>
<tRule>$startOfHanMarker{ 任→rén;</tRule>
<tRule>$startOfHanMarker{ 单→shàn;</tRule>
<tRule>$startOfHanMarker{ 召→shào;</tRule>
<tRule>$startOfHanMarker{ 折→shé;</tRule>
<tRule>$startOfHanMarker{ 舍→shè;</tRule>
<tRule>$startOfHanMarker{ 沈→shěn;</tRule>
<tRule>峙→shì;</tRule>
<tRule>隗→wěi;</tRule>
<tRule>$startOfHanMarker{ 解→xiè;</tRule>
<tRule>莘→xīn;</tRule>
<tRule>$startOfHanMarker{ 燕→yān;</tRule>
<tRule>$startOfHanMarker{ 尉→yù;</tRule>
<tRule>$startOfHanMarker{ 乐→yuè;</tRule>
<tRule>$startOfHanMarker{ 员→yùn;</tRule>
<tRule>$startOfHanMarker{ 查→zhā;</tRule>
<tRule>翟→zhái;</tRule>
<tRule>曾→zēng;</tRule>
<comment># Convert $startOfHanMarkers to space, or to nothing if they are at the beginning of text.</comment>
<comment># Need to do this as a separate pass to get the spacing right.</comment>
<tRule>::Null();</tRule>
<tRule>[^$]{ $startOfHanMarker →\u0020;</tRule>
<tRule>$startOfHanMarker →;</tRule>
<comment># Then run the normal Han-Latin transform for the rest</comment>
<tRule>::Han-Latin();</tRule>
</transform>
</transforms>
</supplementalData>