cldr
Version:
Library for extracting data from CLDR (the Unicode Common Locale Data Repository)
508 lines (506 loc) • 11.3 kB
text/xml
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
<!--
Copyright © 1991-2013 Unicode, Inc.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
For terms of use, see http://www.unicode.org/copyright.html
<supplementalData>
<version number="$Revision$"/>
<transforms>
<transform source="InterIndic" target="Latin" direction="forward" visibility="internal">
<tRule>
InterIndic-Latin
\u0E00 reserved
consonants
chandrabindu=\uE001;
anusvara=\uE002;
visarga=\uE003;
\u0E004 reserved
w←vowel→ represents the stand-alone form
wa=\uE005;
waa=\uE006;
wi=\uE007;
wii=\uE008;
wu=\uE009;
wuu=\uE00A;
wr=\uE00B;
wl=\uE00C;
wce=\uE00D; # LETTER CANDRA E
wse=\uE00E; # LETTER SHORT E
we=\uE00F; # ए LETTER E
wai=\uE010;
wco=\uE011; # LETTER CANDRA O
wso=\uE012; # LETTER SHORT O
wo=\uE013; # ओ LETTER O
wau=\uE014;
ka=\uE015;
kha=\uE016;
ga=\uE017;
gha=\uE018;
nga=\uE019;
ca=\uE01A;
cha=\uE01B;
ja=\uE01C;
jha=\uE01D;
nya=\uE01E;
tta=\uE01F;
ttha=\uE020;
dda=\uE021;
ddha=\uE022;
nna=\uE023;
ta=\uE024;
tha=\uE025;
da=\uE026;
dha=\uE027;
na=\uE028;
ena=\uE029; #compatibility
pa=\uE02A;
pha=\uE02B;
ba=\uE02C;
bha=\uE02D;
ma=\uE02E;
ya=\uE02F;
ra=\uE030;
vva=\uE081;
rra=\uE031;
la=\uE032;
lla=\uE033;
ela=\uE034; #compatibility
va=\uE035;
sha=\uE036;
ssa=\uE037;
sa=\uE038;
ha=\uE039;
\u093A Reserved
\u093B Reserved
nukta=\uE03C;
avagraha=\uE03D; # SIGN AVAGRAHA
←vowel→ represents the dependent form
aa=\uE03E;
i=\uE03F;
ii=\uE040;
u=\uE041;
uu=\uE042;
rh=\uE043;
rrh=\uE044;
ce=\uE045; #VOWEL SIGN CANDRA E
se=\uE046; #VOWEL SIGN SHORT E
e=\uE047;
ai=\uE048;
co=\uE049; # VOWEL SIGN CANDRA O
so=\uE04A; # VOWEL SIGN SHORT O
o=\uE04B; # ो
au=\uE04C;
virama=\uE04D;
\u094E Reserved
\u094F Reserved
om=\uE050; # OM
\uE051→; # UNMAPPED STRESS SIGN UDATTA
\uE052→; # UNMAPPED STRESS SIGN ANUDATTA
\uE053→; # UNMAPPED GRAVE ACCENT
\uE054→; # UNMAPPED ACUTE ACCENT
lm = \uE055;# Telugu Length Mark
ailm=\uE056;# AI Length Mark
aulm=\uE057;# AU Length Mark
urdu compatibility forms
uka=\uE058;
ukha=\uE059;
ugha=\uE05A;
ujha=\uE05B;
uddha=\uE05C;
udha=\uE05D;
ufa=\uE05E;
uya=\uE05F;
wrr=\uE060;
wll=\uE061;
lh=\uE062;
llh=\uE063;
danda=\uE064;
doubleDanda=\uE065;
zero=\uE066; # DIGIT ZERO
one=\uE067; # DIGIT ONE
two=\uE068; # DIGIT TWO
three=\uE069; # DIGIT THREE
four=\uE06A; # DIGIT FOUR
five=\uE06B; # DIGIT FIVE
six=\uE06C; # DIGIT SIX
seven=\uE06D; # DIGIT SEVEN
eight=\uE06E; # DIGIT EIGHT
nine=\uE06F; # DIGIT NINE
Glottal stop
dgs=\uE082;
Khanda-ta
kta=\uE083;
depVowelAbove=[\uE03E-\uE040\uE045-\uE04C];
depVowelBelow=[\uE041-\uE044];
$x was originally called '§'; $z was '%'
x=[$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co];
z=[bcdfghjklmnpqrstvwxyz];
vowels=[aeiour̥̄̆];
forceIndependentMatra = [^[[:L:][̀-͌]]];
#####################################################################
convert from Native letters to Latin letters
#####################################################################
transliterations for anusvara
anusvara} [$ka$kha$ga$gha$nga] → ṅ;
anusvara} [$ca$cha$ja$jha$nya] → n̄;
anusvara} [$tta$ttha$dda$ddha$nna] → ṇ;
anusvara} [$ta$tha$da$dha$na] → n;
anusvara} [$pa$pha$ba$bha$ma] → m;
anusvara} [$ya$ra$lla$la$va$ssa$sha$sa$ha] → n;
anusvara→ ṁ;
Urdu compatibility
ya$nukta}$x → ẏ;
ya$nukta$virama → ẏ;
ya$nukta → ẏa;
la$nukta }$x → ḻ;
la$nukta$virama → ḻ;
la$nukta → ḻa;
na$nukta }$x → ṉ;
na$nukta$virama → ṉ;
na$nukta → ṉa;
ena }$x → ṉ;
ena$virama → ṉ;
ena → ṉa;
uka → qa;
ka$nukta }$x → q;
ka$nukta$virama → q;
ka$nukta → qa;
kha$nukta }$x → ḵẖ;
kha$nukta$virama → ḵẖ;
kha$nukta → ḵẖa;
ukha$virama → ḵẖ;
ukha → ḵẖa;
ugha → ġa;
ga$nukta }$x → ġ;
ga$nukta$virama → ġ;
ga$nukta → ġa;
ujha → za;
ja$nukta }$x → z;
ja$nukta$virama → z;
ja$nukta → za;
ddha$nukta}$x → ṛh;
ddha$nukta$virama → ṛh;
ddha$nukta → ṛha;
uddha}$x → ṛ;
uddha$virama → ṛ;
uddha → ṛa;
udha → ṛa;
dda$nukta}$x → ṛ;
dda$nukta$virama → ṛ;
dda$nukta → ṛa;
pha$nukta }$x → f;
pha$nukta$virama → f;
pha$nukta → fa;
ufa }$x → f;
ufa$virama → f;
ufa → fa;
ra$nukta}$x → ṟ;
ra$nukta$virama → ṟ;
ra$nukta → ṟa;
lla$nukta}$x → ḻ;
lla$nukta$virama → ḻ;
lla$nukta → ḻa;
ela}$x → ḻ;
ela$virama → ḻ;
ela → ḻa;
uya}$x → ẏ;
uya$virama → ẏ;
uya → ẏa;
normal consonants
ka$virama}$ha→k'';
ka}$x→k;
ka$virama→k;
ka→ka;
kha}$x→kh;
kha$virama→kh;
kha→kha;
ga$virama}$ha→g'';
ga}$x→g;
ga$virama→g;
ga→ga;
gha}$x→gh;
gha$virama→gh;
gha→gha;
nga}$x→ṅ;
nga$virama→ṅ;
nga→ṅa;
ca$virama}$ha→c'';
ca}$x→c;
ca$virama→c;
ca→ca;
cha}$x→ch;
cha$virama→ch;
cha→cha;
ja$virama}$ha→j'';
ja}$x→j;
ja$virama→j;
ja→ja;
jha}$x→jh;
jha$virama→jh;
jha→jha;
nya }$x→ñ;
nya$virama→ñ;
nya → ña;
tta$virama}$ha→ṭ'';
tta}$x→ṭ;
tta$virama→ṭ;
tta→ṭa;
ttha}$x→ṭh;
ttha$virama→ṭh;
ttha→ṭha;
dda}$x$ha→ḍ'';
dda}$x→ḍ;
dda$virama→ḍ;
dda→ḍa;
ddha}$x→ḍh;
ddha$virama→ḍh;
ddha→ḍha;
nna}$x→ṇ;
nna$virama→ṇ;
nna→ṇa;
ta$virama}$ha→t'';
ta$virama}$ttha→t'';
ta$virama}$tta→t'';
ta$virama}$tha→t'';
ta}$x→t;
ta$virama→t;
ta→ta;
tha}$x→th;
tha$virama→th;
tha→tha;
da$virama}$ha→d'';
da$virama}$ddha→d'';
da$virama}$dda→d'';
da$virama}$dha→d'';
da}$x→d;
da$virama→d;
da→da;
dha}$x→dh;
dha$virama→dh;
dha→dha;
na$virama}$ga→n'';
na$virama}$ya→n'';
na}$x→n;
na$virama→n;
na→na;
pa$virama}$ha→p'';
pa}$x→p;
pa$virama→p;
pa→pa;
pha}$x→ph;
pha$virama→ph;
pha→pha;
ba$virama}$ha→b'';
ba}$x→b;
ba$virama→b;
ba→ba;
bha}$x→bh;
bha$virama→bh;
bha→bha;
ma$virama}$ma→m'';
ma}$x→m;
ma$virama→m;
ma→ma;
ya}$x→y;
ya$virama→y;
ya→ya;
ra$virama}$ha→r'';
ra}$x→r;
ra$virama→r;
ra→ra;
vva$virama}$ha→ẇ'';
vva}$x→ẇ;
vva$virama→ẇ;
vva→ẇa;
rra$virama}$ha→ṟ'';
rra}$x→ṟ;
rra$virama→ṟ;
rra→ṟa;
la$virama}$ha→l'';
la}$x→l;
la$virama→l;
la→la;
lla$virama}$ha→ḷ'';
lla}$x→ḷ;
lla$virama→ḷ;
lla→ḷa;
va}$x→v;
va$virama→v;
va→va;
sa$virama}$ha→s'';
sa$virama}$sha→s'';
sa$virama}$ssa→s'';
sa$virama}$sa→s'';
sa}$x→s;
sa$virama→s;
for gurmukhi
sa$nukta}$x→ś;
sa$nukta$virama→ś;
sa$nukta→śa;
sa→sa;
sha}$x→ś;
sha$virama→ś;
sha→śa;
ssa}$x→ṣ;
ssa$virama→ṣ;
ssa→ṣa;
ha}$x→h;
ha$virama→h;
ha→ha;
dependent vowels (should never occur except following consonants)
forceIndependentMatra{$aa → ̔ā;
forceIndependentMatra{$ai → ̔ai;
forceIndependentMatra{$au → ̔au;
forceIndependentMatra{$ii → ̔ī;
forceIndependentMatra{$i → ̔i;
forceIndependentMatra{$uu → ̔ū;
forceIndependentMatra{$u → ̔u;
forceIndependentMatra{$rrh → ̔r̥̄;
forceIndependentMatra{$rh → ̔r̥;
forceIndependentMatra{$llh → ̔l̥̄;
forceIndependentMatra{$lh → ̔l̥;
forceIndependentMatra{$e → ̔ē;
forceIndependentMatra{$o → ̔ō;
extra vowels
forceIndependentMatra{$ce → ̔ĕ;
forceIndependentMatra{$co → ̔ŏ;
forceIndependentMatra{$se → ̔e;
forceIndependentMatra{$so → ̔o;
forceIndependentMatra{$nukta →; # Nukta cannot appear independently or as first character
forceIndependentMatra{$virama →; # Virama cannot appear independently or as first character
aa → ā;
ai → ai;
au → au;
ii → ī;
i → i;
uu → ū;
u → u;
rrh → r̥̄;
rh → r̥;
llh → l̥̄;
lh → l̥;
e → ē;
o → ō;
extra vowels
ce → ĕ;
co → ŏ;
se → e;
so → o;
dependent vowels when following independent vowels. Generally Illegal only for roundtripping
waa} $x → ā̔;
wai} $x → ai̔;
wau} $x → au̔;
wii} $x → ī̔;
wi } $x → i̔;
wuu} $x → ū̔;
wu } $x → u̔;
wrr} $x → r̥̄̔;
wr } $x → r̥̔;
wll} $x → l̥̄̔;
wl } $x → l̥̔;
we } $x → ē̔;
wo } $x → ō̔;
wa } $x → a̔;
extra vowels
wce} $x → ĕ̔;
wco} $x → ŏ̔;
wse} $x → e̔;
wso} $x → o̔;
om} $x → ''om̔;
independent vowels when preceded by vowels
vowels{$waa → ''ā;
vowels{$wai → ''ai;
vowels{$wau → ''au;
vowels{$wii → ''ī;
vowels{$wi → ''i;
vowels{$wuu → ''ū;
vowels{$wu → ''u;
vowels{$wrr → ''r̥̄;
vowels{$wr → ''r̥;
vowels{$wll → ''l̥̄;
vowels{$wl → ''l̥;
vowels{$we → ''ē;
vowels{$wo → ''ō;
vowels{$wa → ''a;
extra vowels
vowels{$wce → ''ĕ;
vowels{$wco → ''ŏ;
vowels{$wse → ''e;
vowels{$wso → ''o;
independent vowels (otherwise)
waa → ā;
wai → ai;
wau → au;
wii → ī;
wi → i;
wuu → ū;
wu → u;
wrr → r̥̄;
wr → r̥;
wll → l̥̄;
wl → l̥;
we → ē;
wo → ō;
wa → a;
extra vowels
wce → ĕ;
wco → ŏ;
wse → e;
wso → o;
om → ''om;
stress marks
avagraha → ̕;
chandrabindu$anusvara→̃;
chandrabindu → m̐;
visarga→ḥ;
numbers
zero → 0;
one → 1;
two → 2;
three → 3;
four → 4;
five → 5;
six → 6;
seven → 7;
eight → 8;
nine → 9;
lm →;
ailm →;
aulm →;
dgs→ʔ;
kta→ṯ;
danda→'.';
doubleDanda→'.';
\uE070→'.'; # ABBREVIATION SIGN
LETTER RA WITH MIDDLE DIAGONAL
\uE071}$x→ra;
\uE071$virama→r;
\uE071→ra;
LETTER RA WITH LOWER DIAGONAL
\uE072}$x→ra;
\uE072$virama→r;
\uE072→ra;
\uE073→; # RUPEE MARK
\uE074→; # RUPEE SIGN
\uE075→; # CURRENCY NUMERATOR ONE
\uE076→; # CURRENCY NUMERATOR TWO
\uE077→; # CURRENCY NUMERATOR THREE
\uE078→; # CURRENCY NUMERATOR FOUR
\uE079→; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
\uE07A→; # CURRENCY DENOMINATOR SIXTEEN
\uE07B→; # ISSHAR
\uE07C→; # TIPPI
\uE07D→; # ADDAK
\uE07E→; # IRI
\uE07F→; # URA
\uE080→; # EK ONKAR
\uE004→; # DEVANAGARI VOWEL SIGN SHORT A
virama→; # remove remaining viramas
nukta→\u0323 ; # combining dot below
\uE084→æ; # DEVANAGARI VOWEL SIGN SHORT A
</tRule>
</transform>
</transforms>
</supplementalData>