unidecode
Version:
ASCII transliterations of Unicode text
70 lines (51 loc) • 3.78 kB
JavaScript
// Time-stamp: "2015-10-15 07:49:48 MDT sburke@cpan.org"
module.exports = [
// BLOCK U+0000
"\\x00", "\\x01", "\\x02", "\\x03", "\\x04", "\\x05", "\\x06", "\\x07", "\\x08", "\\x09", "\\x0a", "\\x0b", "\\x0c", "\\x0d", "\\x0e", "\\x0f",
// ^00 ^01 ^02 ^03 ^04 ^05 ^06 ^07 ^08 ^09 ^0a ^0b ^0c ^0d ^0e ^0f
"\\x10", "\\x11", "\\x12", "\\x13", "\\x14", "\\x15", "\\x16", "\\x17", "\\x18", "\\x19", "\\x1a", "\\x1b", "\\x1c", "\\x1d", "\\x1e", "\\x1f",
// ^10 ^11 ^12 ^13 ^14 ^15 ^16 ^17 ^18 ^19 ^1a ^1b ^1c ^1d ^1e ^1f
' ', "!", "\"", "#", "\\$", "%", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/",
// ^20 ^21 ^22 ^23 ^24 ^25 ^26 ^27 ^28 ^29 ^2a ^2b ^2c ^2d ^2e ^2f ^30
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ":", ";", "<", "=", ">", "?",
// ^30 ^31 ^32 ^33 ^34 ^35 ^36 ^37 ^38 ^39 ^3a ^3b ^3c ^3d ^3e ^3f
"\\@", 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
// ^40 ^41 ^42 ^43 ^44 ^45 ^46 ^47 ^48 ^49 ^4a ^4b ^4c ^4d ^4e ^4f
'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', "[", "\\\\", "]", "^", "_",
// ^50 ^51 ^52 ^53 ^54 ^55 ^56 ^57 ^58 ^59 ^5a ^5b ^5c ^5d ^5e ^5f
"`", 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
// ^60 ^61 ^62 ^63 ^64 ^65 ^66 ^67 ^68 ^69 ^6a ^6b ^6c ^6d ^6e ^6f
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', "\\{", "|", "\\}", "~", "\\x7f",
// ^70 ^71 ^72 ^73 ^74 ^75 ^76 ^77 ^78 ^79 ^7a ^7b ^7c ^7d ^7e ^7f
// ======================================================================
// Strictly speaking, these are the Unicode values:
// "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
// ^80 ^81 ^82 ^83 ^84 ^85 ^86 ^87 ^88 ^89 ^8a ^8b ^8c ^8d ^8e ^8f
// "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
// ^90 ^91 ^92 ^93 ^94 ^95 ^96 ^97 ^98 ^99 ^9a ^9b ^9c ^9d ^9e ^9f
//
// But I've decided to tolerate Win-1252 input:
"EUR", '', ",", "f",
",,", "...", "+", "++",
"^", "%0", "S", "<",
"OE", '', "Z", '',
'', "'", "'", "\"",
"\"", "*", "-", "--",
"~", "tm", "s", ">",
"oe", '', "z", "Y",
// See: https://en.wikipedia.org/wiki/Latin-1_Supplement_%28Unicode_block%29
// ======================================================================
// And now, back to Latin-1 = Unicode values...
' ', "!", "C/", 'PS', "\\$?", "Y=", "|", 'SS', "\"", "(c)", 'a', "<<", "!", "", "(r)", "-",
// ^a0 ^a1 ^a2 ^a3 ^a4 ^a5 ^a6 ^a7 ^a8 ^a9 ^aa ^ab ^ac ^ad ^ae ^af
'deg', "+-", '2', '3', "'", 'u', 'P', "*", ",", '1', 'o', ">>", "1/4", "1/2", "3/4", "?",
// ^b0 ^b1 ^b2 ^b3 ^b4 ^b5 ^b6 ^b7 ^b8 ^b9 ^ba ^bb ^bc ^bd ^be ^bf ^c0
'A', 'A', 'A', 'A', 'A', 'A', 'AE', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I',
// ^c0 ^c1 ^c2 ^c3 ^c4 ^c5 ^c6 ^c7 ^c8 ^c9 ^ca ^cb ^cc ^cd ^ce ^cf
'D', 'N', 'O', 'O', 'O', 'O', 'O', 'x', 'O', 'U', 'U', 'U', 'U', 'Y', 'Th', 'ss',
// ^d0 ^d1 ^d2 ^d3 ^d4 ^d5 ^d6 ^d7 ^d8 ^d9 ^da ^db ^dc ^dd ^de ^df
'a', 'a', 'a', 'a', 'a', 'a', 'ae', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
// ^e0 ^e1 ^e2 ^e3 ^e4 ^e5 ^e6 ^e7 ^e8 ^e9 ^ea ^eb ^ec ^ed ^ee ^ef
'd', 'n', 'o', 'o', 'o', 'o', 'o', "/", 'o', 'u', 'u', 'u', 'u', 'y', 'th', 'y',
// ^f0 ^f1 ^f2 ^f3 ^f4 ^f5 ^f6 ^f7 ^f8 ^f9 ^fa ^fb ^fc ^fd ^fe ^ff
];