compromise
Version:
modest natural language processing
47 lines (46 loc) • 2.13 kB
JavaScript
//a hugely-ignorant, and widely subjective transliteration of latin, cryllic, greek unicode characters to english ascii.
//approximate visual (not semantic or phonetic) relationship between unicode and ascii characters
//http://en.wikipedia.org/wiki/List_of_Unicode_characters
//https://docs.google.com/spreadsheet/ccc?key=0Ah46z755j7cVdFRDM1A2YVpwa1ZYWlpJM2pQZ003M0E
let compact = {
'!': '¡',
'?': '¿Ɂ',
'"': '“”"❝❞',
"'": '‘‛❛❜’',
'-': '—–',
a: 'ªÀÁÂÃÄÅàáâãäåĀāĂ㥹ǍǎǞǟǠǡǺǻȀȁȂȃȦȧȺΆΑΔΛάαλАаѦѧӐӑӒӓƛæ',
b: 'ßþƀƁƂƃƄƅɃΒβϐϦБВЪЬвъьѢѣҌҍ',
c: '¢©ÇçĆćĈĉĊċČčƆƇƈȻȼͻͼϲϹϽϾСсєҀҁҪҫ',
d: 'ÐĎďĐđƉƊȡƋƌ',
e: 'ÈÉÊËèéêëĒēĔĕĖėĘęĚěƐȄȅȆȇȨȩɆɇΈΕΞΣέεξϵЀЁЕеѐёҼҽҾҿӖӗễ',
f: 'ƑƒϜϝӺӻҒғſ',
g: 'ĜĝĞğĠġĢģƓǤǥǦǧǴǵ',
h: 'ĤĥĦħƕǶȞȟΉΗЂЊЋНнђћҢңҤҥҺһӉӊ',
I: 'ÌÍÎÏ',
i: 'ìíîïĨĩĪīĬĭĮįİıƖƗȈȉȊȋΊΐΪίιϊІЇіїi̇',
j: 'ĴĵǰȷɈɉϳЈј',
k: 'ĶķĸƘƙǨǩΚκЌЖКжкќҚқҜҝҞҟҠҡ',
l: 'ĹĺĻļĽľĿŀŁłƚƪǀǏǐȴȽΙӀӏ',
m: 'ΜϺϻМмӍӎ',
n: 'ÑñŃńŅņŇňʼnŊŋƝƞǸǹȠȵΝΠήηϞЍИЙЛПийлпѝҊҋӅӆӢӣӤӥπ',
o: 'ÒÓÔÕÖØðòóôõöøŌōŎŏŐőƟƠơǑǒǪǫǬǭǾǿȌȍȎȏȪȫȬȭȮȯȰȱΌΘΟθοσόϕϘϙϬϴОФоѲѳӦӧӨөӪӫ',
p: 'ƤΡρϷϸϼРрҎҏÞ',
q: 'Ɋɋ',
r: 'ŔŕŖŗŘřƦȐȑȒȓɌɍЃГЯгяѓҐґ',
s: 'ŚśŜŝŞşŠšƧƨȘșȿЅѕ',
t: 'ŢţŤťŦŧƫƬƭƮȚțȶȾΓΤτϮТт',
u: 'ÙÚÛÜùúûüŨũŪūŬŭŮůŰűŲųƯưƱƲǓǔǕǖǗǘǙǚǛǜȔȕȖȗɄΰυϋύ',
v: 'νѴѵѶѷ',
w: 'ŴŵƜωώϖϢϣШЩшщѡѿ',
x: '×ΧχϗϰХхҲҳӼӽӾӿ',
y: 'ÝýÿŶŷŸƳƴȲȳɎɏΎΥΫγψϒϓϔЎУучўѰѱҮүҰұӮӯӰӱӲӳ',
z: 'ŹźŻżŽžƵƶȤȥɀΖ',
}
//decompress data into two hashes
let unicode = {}
Object.keys(compact).forEach(function (k) {
compact[k].split('').forEach(function (s) {
unicode[s] = k
})
})
export default unicode