voca
Version:
The ultimate JavaScript string library
190 lines (175 loc) • 6.45 kB
JavaScript
;
require('./internal/is_nil.js');
require('./is_string.js');
var coerce_to_string = require('./internal/coerce_to_string.js');
var _const = require('./internal/const.js');
/**
* Generated diacritics map. See bellow the base code.
* @ignore
* @type Object
*/
var diacritics = {
'3': '\u039e\u03be',
'8': '\u0398\u03b8',
A: '\x41\xc0\xc1\xc2\xc3\xc4\xc5\u0100\u0102\u0104\u01cd\u01de\u01e0\u01fa\u0200\u0202\u0226\u023a\u0386\u0391\u0410',
B: '\x42\u0181\u0182\u0243\u0392\u0411',
C: '\x43\xc7\u0106\u0108\u010a\u010c\u0187\u023b\u0426',
D: '\x44\u010e\u0110\u0189\u018a\u018b\xd0\u0394\u0414',
E: '\x45\xc8\xc9\xca\xcb\u0112\u0114\u0116\u0118\u011a\u018e\u0190\u0204\u0206\u0228\u0388\u0395\u0415\u042d',
F: '\x46\u0191\u03a6\u0424',
G: '\x47\u011c\u011e\u0120\u0122\u0193\u01e4\u01e6\u01f4\u0393\u0413\u0490',
H: '\x48\u0124\u0126\u021e\u0389\u0397\u0425',
I: '\x49\xcc\xcd\xce\xcf\u0128\u012a\u012c\u012e\u0130\u0197\u01cf\u0208\u020a\u038a\u0399\u03aa\u0406\u0418',
J: '\x4a\u0134\u0248\u0419',
K: '\x4b\u0136\u0198\u01e8\u039a\u041a',
L: '\x4c\u0139\u013b\u013d\u013f\u0141\u023d\u039b\u041b',
M: '\x4d\u019c\u039c\u041c',
N: '\x4e\xd1\u0143\u0145\u0147\u019d\u01f8\u0220\u039d\u041d',
O: '\x4f\xd2\xd3\xd4\xd5\xd6\xd8\u014c\u014e\u0150\u0186\u019f\u01a0\u01d1\u01ea\u01ec\u01fe\u020c\u020e\u022a\u022c\u022e\u0230\u038c\u039f\u041e',
P: '\x50\u01a4\u03a0\u041f',
Q: '\x51\u024a',
R: '\x52\u0154\u0156\u0158\u0210\u0212\u024c\u03a1\u0420',
S: '\x53\u015a\u015c\u015e\u0160\u0218\u03a3\u0421',
T: '\x54\u0162\u0164\u0166\u01ac\u01ae\u021a\u023e\u03a4\u0422',
U: '\x55\xd9\xda\xdb\xdc\u0168\u016a\u016c\u016e\u0170\u0172\u01af\u01d3\u01d5\u01d7\u01d9\u01db\u0214\u0216\u0244\u0423\u042a',
V: '\x56\u01b2\u0245\u0412',
W: '\x57\u0174\u038f\u03a9',
X: '\x58\u03a7',
Y: '\x59\xdd\u0176\u0178\u01b3\u0232\u024e\u038e\u03a5\u03ab\u042b',
Z: '\x5a\u0179\u017b\u017d\u01b5\u0224\u0396\u0417',
a: '\x61\xe0\xe1\xe2\xe3\xe4\xe5\u0101\u0103\u0105\u01ce\u01df\u01e1\u01fb\u0201\u0203\u0227\u0250\u03ac\u03b1\u0430',
b: '\x62\u0180\u0183\u0253\u03b2\u0431',
c: '\x63\xe7\u0107\u0109\u010b\u010d\u0188\u023c\u0446',
d: '\x64\u010f\u0111\u018c\u0256\u0257\xf0\u03b4\u0434',
e: '\x65\xe8\xe9\xea\xeb\u0113\u0115\u0117\u0119\u011b\u01dd\u0205\u0207\u0229\u0247\u025b\u03ad\u03b5\u0435\u044d',
f: '\x66\u0192\u03c6\u0444',
g: '\x67\u011d\u011f\u0121\u0123\u01e5\u01e7\u01f5\u0260\u03b3\u0433\u0491',
h: '\x68\u0125\u0127\u021f\u0265\u03ae\u03b7\u0445',
i: '\x69\xec\xed\xee\xef\u0129\u012b\u012d\u012f\u0131\u01d0\u0209\u020b\u0268\u0390\u03af\u03b9\u03ca\u0438\u0456',
j: '\x6a\u0135\u01f0\u0249\u0439',
k: '\x6b\u0137\u0199\u01e9\u03ba\u043a',
l: '\x6c\u013a\u013c\u013e\u0140\u0142\u017f\u019a\u026b\u03bb\u043b',
m: '\x6d\u026f\u0271\u03bc\u043c',
n: '\x6e\xf1\u0144\u0146\u0148\u0149\u019e\u01f9\u0272\u03bd\u043d',
o: '\x6f\xf2\xf3\xf4\xf5\xf6\xf8\u014d\u014f\u0151\u01a1\u01d2\u01eb\u01ed\u01ff\u020d\u020f\u022b\u022d\u022f\u0231\u0254\u0275\u03bf\u03cc\u043e',
p: '\x70\u01a5\u03c0\u043f',
q: '\x71\u024b',
r: '\x72\u0155\u0157\u0159\u0211\u0213\u024d\u027d\u03c1\u0440',
s: '\x73\xdf\u015b\u015d\u015f\u0161\u0219\u023f\u03c2\u03c3\u0441',
t: '\x74\u0163\u0165\u0167\u01ad\u021b\u0288\u03c4\u0442',
u: '\x75\xf9\xfa\xfb\xfc\u0169\u016b\u016d\u016f\u0171\u0173\u01b0\u01d4\u01d6\u01d8\u01da\u01dc\u0215\u0217\u0289\u0443\u044a',
v: '\x76\u028b\u028c\u0432',
w: '\x77\u0175\u03c9\u03ce',
x: '\x78\u03c7',
y: '\x79\xfd\xff\u0177\u01b4\u0233\u024f\u03b0\u03c5\u03cb\u03cd\u044b',
z: '\x7a\u017a\u017c\u017e\u01b6\u0225\u0240\u03b6\u0437',
OE: '\x8c\u0152',
oe: '\x9c\u0153',
AE: '\xc6\u01e2\u01fc',
ae: '\xe6\u01e3\u01fd',
hv: '\u0195',
OI: '\u01a2',
oi: '\u01a3',
DZ: '\u01c4\u01f1',
Dz: '\u01c5\u01f2',
dz: '\u01c6\u01f3',
LJ: '\u01c7',
Lj: '\u01c8',
lj: '\u01c9',
NJ: '\u01ca',
Nj: '\u01cb',
nj: '\u01cc',
OU: '\u0222',
ou: '\u0223',
TH: '\xde',
th: '\xfe',
PS: '\u03a8',
ps: '\u03c8',
Yo: '\u0401',
Ye: '\u0404',
Yi: '\u0407',
Zh: '\u0416',
Ch: '\u0427',
Sh: '\u0428\u0429',
'': '\u042a\u042c\u044c',
Yu: '\u042e',
Ya: '\u042f',
zh: '\u0436',
ch: '\u0447',
sh: '\u0448\u0449',
yu: '\u044e',
ya: '\u044f',
yo: '\u0451',
ye: '\u0454',
yi: '\u0457'
};
var diacriticsMap = null;
/**
* Creates a map of the diacritics.
*
* @ignore
* @returns {Object} Returns the diacritics map.
*/
function getDiacriticsMap() {
if (diacriticsMap !== null) {
return diacriticsMap;
}
diacriticsMap = {};
Object.keys(diacritics).forEach(function (key) {
var characters = diacritics[key];
for (var index = 0; index < characters.length; index++) {
var character = characters[index];
diacriticsMap[character] = key;
}
});
return diacriticsMap;
}
/**
* Get the latin character from character with diacritics.
*
* @ignore
* @param {string} character The character with diacritics.
* @returns {string} Returns the character without diacritics.
*/
function getLatinCharacter(character) {
var characterWithoutDiacritic = getDiacriticsMap()[character];
return characterWithoutDiacritic ? characterWithoutDiacritic : character;
}
/**
* Returns the `cleanCharacter` from combining marks regular expression match.
*
* @ignore
* @param {string} character The character with combining marks
* @param {string} cleanCharacter The character without combining marks.
* @return {string} The character without combining marks.
*/
function removeCombiningMarks(character, cleanCharacter) {
return cleanCharacter;
}
/**
* Latinises the `subject` by removing diacritic characters.
*
* @function latinise
* @static
* @since 1.0.0
* @memberOf Manipulate
* @param {string} [subject=''] The string to latinise.
* @return {string} Returns the latinised string.
* @example
* v.latinise('cafe\u0301'); // or 'café'
* // => 'cafe'
*
* v.latinise('août décembre');
* // => 'aout decembre'
*
* v.latinise('как прекрасен этот мир');
* // => 'kak prekrasen etot mir'
*/
function latinise(subject) {
var subjectString = coerce_to_string.coerceToString(subject);
if (subjectString === '') {
return '';
}
return subjectString.replace(_const.REGEXP_NON_LATIN, getLatinCharacter).replace(_const.REGEXP_COMBINING_MARKS, removeCombiningMarks);
}
module.exports = latinise;