UNPKG

modern-diacritics

Version:

A modern way to latinize/ascii-fold strings and normalize symbols.

691 lines (684 loc) 15.4 kB
var diacriticList = [ { base: "0", chars: ["\u07c0"] }, { base: "A", chars: ["\u24b6", "\uff21", "\u023a", "\u2c6f"] }, { base: "AA", chars: ["\ua732"] }, { base: "AE", chars: ["\u00c6", "\u01fc", "\u01e2"] }, { base: "AO", chars: ["\ua734"] }, { base: "AU", chars: ["\ua736"] }, { base: "AV", chars: ["\ua738", "\ua73a"] }, { base: "AY", chars: ["\ua73c"] }, { base: "B", chars: ["\u24b7", "\uff22", "\u0243", "\u0181"] }, { base: "C", chars: ["\u24b8", "\uff23", "\ua73e", "\u0187", "\u023b"] }, { base: "D", chars: [ "\u24b9", "\uff24", "\u0110", "\u018a", "\u0189", "\u1d05", "\ua779" ] }, { base: "Dh", chars: ["\u00d0"] }, { base: "DZ", chars: ["\u01f1", "\u01c4"] }, { base: "Dz", chars: ["\u01f2", "\u01c5"] }, { base: "E", chars: ["\u025b", "\u24ba", "\uff25", "\u0190", "\u018e", "\u1d07"] }, { base: "F", chars: ["\ua77c", "\u24bb", "\uff26", "\u0191", "\ua77b"] }, { base: "G", chars: [ "\u24bc", "\uff27", "\u01e4", "\u0193", "\ua7a0", "\ua77d", "\ua77e", "\u0262" ] }, { base: "H", chars: ["\u24bd", "\uff28", "\u0126", "\u2c67", "\u2c75", "\ua78d"] }, { base: "I", chars: ["\u24be", "\uff29", "\u0197"] }, { base: "J", chars: ["\u24bf", "\uff2a", "\u0248", "\u0237"] }, { base: "K", chars: [ "\u24c0", "\uff2b", "\u0198", "\u2c69", "\ua740", "\ua742", "\ua744", "\ua7a2" ] }, { base: "L", chars: [ "\u24c1", "\uff2c", "\u013f", "\u0141", "\u023d", "\u2c62", "\u2c60", "\ua748", "\ua746", "\ua780" ] }, { base: "LJ", chars: ["\u01c7"] }, { base: "Lj", chars: ["\u01c8"] }, { base: "M", chars: ["\u24c2", "\uff2d", "\u2c6e", "\u019c", "\u03fb"] }, { base: "N", chars: [ "\ua7a4", "\u0220", "\u24c3", "\uff2e", "\u019d", "\ua790", "\u1d0e" ] }, { base: "NJ", chars: ["\u01ca"] }, { base: "Nj", chars: ["\u01cb"] }, { base: "O", chars: [ "\u24c4", "\uff2f", "\u00d8", "\u01fe", "\u0186", "\u019f", "\ua74a", "\ua74c" ] }, { base: "OE", chars: ["\u0152"] }, { base: "OI", chars: ["\u01a2"] }, { base: "OO", chars: ["\ua74e"] }, { base: "OU", chars: ["\u0222"] }, { base: "P", chars: [ "\u24c5", "\uff30", "\u01a4", "\u2c63", "\ua750", "\ua752", "\ua754" ] }, { base: "Q", chars: ["\u24c6", "\uff31", "\ua756", "\ua758", "\u024a"] }, { base: "R", chars: [ "\u24c7", "\uff32", "\u024c", "\u2c64", "\ua75a", "\ua7a6", "\ua782" ] }, { base: "S", chars: ["\u24c8", "\uff33", "\u1e9e", "\u2c7e", "\ua7a8", "\ua784"] }, { base: "T", chars: [ "\u24c9", "\uff34", "\u0166", "\u01ac", "\u01ae", "\u023e", "\ua786" ] }, { base: "Th", chars: ["\u00de"] }, { base: "TZ", chars: ["\ua728"] }, { base: "U", chars: ["\u24ca", "\uff35", "\u0244"] }, { base: "V", chars: ["\u24cb", "\uff36", "\u01b2", "\ua75e", "\u0245"] }, { base: "VY", chars: ["\ua760"] }, { base: "W", chars: ["\u24cc", "\uff37", "\u2c72"] }, { base: "X", chars: ["\u24cd", "\uff38"] }, { base: "Y", chars: ["\u24ce", "\uff39", "\u01b3", "\u024e", "\u1efe"] }, { base: "Z", chars: [ "\u24cf", "\uff3a", "\u01b5", "\u0224", "\u2c7f", "\u2c6b", "\ua762" ] }, { base: "a", chars: ["\u24d0", "\uff41", "\u1e9a", "\u2c65", "\u0250", "\u0251"] }, { base: "aa", chars: ["\ua733"] }, { base: "ae", chars: ["\u00e6", "\u01fd", "\u01e3"] }, { base: "ao", chars: ["\ua735"] }, { base: "au", chars: ["\ua737"] }, { base: "av", chars: ["\ua739", "\ua73b"] }, { base: "ay", chars: ["\ua73d"] }, { base: "b", chars: ["\u24d1", "\uff42", "\u0180", "\u0183", "\u0253", "\u0182"] }, { base: "c", chars: ["\uff43", "\u24d2", "\u0188", "\u023c", "\ua73f", "\u2184"] }, { base: "d", chars: [ "\u24d3", "\uff44", "\u0111", "\u018c", "\u0256", "\u0257", "\u018b", "\u13e7", "\u0501", "\ua7aa" ] }, { base: "dh", chars: ["\u00f0"] }, { base: "dz", chars: ["\u01f3", "\u01c6"] }, { base: "e", chars: ["\u24d4", "\uff45", "\u0247", "\u01dd"] }, { base: "f", chars: ["\u24d5", "\uff46", "\u0192"] }, { base: "ff", chars: ["\ufb00"] }, { base: "fi", chars: ["\ufb01"] }, { base: "fl", chars: ["\ufb02"] }, { base: "ffi", chars: ["\ufb03"] }, { base: "ffl", chars: ["\ufb04"] }, { base: "g", chars: [ "\u24d6", "\uff47", "\u01e5", "\u0260", "\ua7a1", "\ua77f", "\u1d79" ] }, { base: "h", chars: ["\u24d7", "\uff48", "\u0127", "\u2c68", "\u2c76", "\u0265"] }, { base: "hv", chars: ["\u0195"] }, { base: "i", chars: ["\u24d8", "\uff49", "\u0268", "\u0131"] }, { base: "j", chars: ["\u24d9", "\uff4a", "\u0249"] }, { base: "k", chars: [ "\u24da", "\uff4b", "\u0199", "\u2c6a", "\ua741", "\ua743", "\ua745", "\ua7a3" ] }, { base: "l", chars: [ "\u24db", "\uff4c", "\u0140", "\u017f", "\u0142", "\u019a", "\u026b", "\u2c61", "\ua749", "\ua781", "\ua747", "\u026d" ] }, { base: "lj", chars: ["\u01c9"] }, { base: "m", chars: ["\u24dc", "\uff4d", "\u0271", "\u026f"] }, { base: "n", chars: [ "\u24dd", "\uff4e", "\u019e", "\u0272", "\u0149", "\ua791", "\ua7a5", "\u043b", "\u0509" ] }, { base: "nj", chars: ["\u01cc"] }, { base: "o", chars: [ "\u24de", "\uff4f", "\u00f8", "\u01ff", "\ua74b", "\ua74d", "\u0275", "\u0254", "\u1d11" ] }, { base: "oe", chars: ["\u0153"] }, { base: "oi", chars: ["\u01a3"] }, { base: "oo", chars: ["\ua74f"] }, { base: "ou", chars: ["\u0223"] }, { base: "p", chars: [ "\u24df", "\uff50", "\u01a5", "\u1d7d", "\ua751", "\ua753", "\ua755", "\u03c1" ] }, { base: "q", chars: ["\u24e0", "\uff51", "\u024b", "\ua757", "\ua759"] }, { base: "r", chars: [ "\u24e1", "\uff52", "\u024d", "\u027d", "\ua75b", "\ua7a7", "\ua783" ] }, { base: "s", chars: [ "\u24e2", "\uff53", "\u023f", "\ua7a9", "\ua785", "\u1e9b", "\u0282" ] }, { base: "ss", chars: ["\u00df"] }, { base: "t", chars: [ "\u24e3", "\uff54", "\u0167", "\u01ad", "\u0288", "\u2c66", "\ua787" ] }, { base: "th", chars: ["\u00fe"] }, { base: "tz", chars: ["\ua729"] }, { base: "u", chars: ["\u24e4", "\uff55", "\u0289"] }, { base: "v", chars: ["\u24e5", "\uff56", "\u028b", "\ua75f", "\u028c"] }, { base: "vy", chars: ["\ua761"] }, { base: "w", chars: ["\u24e6", "\uff57", "\u2c73"] }, { base: "x", chars: ["\u24e7", "\uff58"] }, { base: "y", chars: ["\u24e8", "\uff59", "\u01b4", "\u024f", "\u1eff"] }, { base: "z", chars: [ "\u24e9", "\uff5a", "\u01b6", "\u0225", "\u0240", "\u2c6c", "\ua763" ] } ]; var diacriticMap = new Map(diacriticList.flatMap(function (_a) { var base = _a.base, chars = _a.chars; return chars.map(function (char) { return [char, base]; }); })); function removeDiacritics(str, options) { var subject = "".concat(str).normalize("NFD"); var result; try { // more complete modern variant result = subject.replace(/\p{Diacritic}/gu, ""); } catch (_a) { // backwards compatible variant result = subject.replace(/[\u0300-\u036f]/g, ""); } if (options === null || options === void 0 ? void 0 : options.lowerCase) { return result.toLowerCase(); } return result; } var symbolList = [ { base: " ", chars: ["\u00A0"] }, { base: "'", chars: ["\u2019", "\u2018"] }, { base: '"', chars: ["\u201C", "\u201D", "\uFF02"] }, { base: "-", chars: ["\u2013", "\u2014", "\u2212"] } ]; var symbolMap = new Map(symbolList.flatMap(function (_a) { var base = _a.base, chars = _a.chars; return chars.map(function (char) { return [char, base]; }); })); var allSymbols = new RegExp(symbolList.flatMap(function (c) { return c.chars; }).join("|"), "g"); function normalizeSymbols(str, options) { var _a, _b, _c; var trim = (_a = options === null || options === void 0 ? void 0 : options.trim) !== null && _a !== void 0 ? _a : true; var forceSingleSpace = (_b = options === null || options === void 0 ? void 0 : options.forceSingleSpace) !== null && _b !== void 0 ? _b : false; var replaceWhiteSpace = (_c = options === null || options === void 0 ? void 0 : options.replaceWhiteSpace) !== null && _c !== void 0 ? _c : false; var subject = "".concat(str); var tmp, result = subject.replace(allSymbols, function (char) { if ((tmp = symbolMap.get(char))) return tmp; return char; }); if (trim) result = result.trim(); if (forceSingleSpace) result = result.replace(/\s{2,}/g, " "); if (replaceWhiteSpace !== false) { result = result.replace(/\s/g, replaceWhiteSpace); } return result; } function latinize(str, options) { var _a, _b, _c, _d, _e; var symbols = (_a = options === null || options === void 0 ? void 0 : options.symbols) !== null && _a !== void 0 ? _a : true; var lowerCase = (_b = options === null || options === void 0 ? void 0 : options.lowerCase) !== null && _b !== void 0 ? _b : false; var trim = (_c = options === null || options === void 0 ? void 0 : options.trim) !== null && _c !== void 0 ? _c : false; var forceSingleSpace = (_d = options === null || options === void 0 ? void 0 : options.forceSingleSpace) !== null && _d !== void 0 ? _d : false; var replaceWhiteSpace = (_e = options === null || options === void 0 ? void 0 : options.replaceWhiteSpace) !== null && _e !== void 0 ? _e : undefined; var tmp, subject = removeDiacritics(str), result = ""; // prepare subject if (trim) subject = subject.trim(); if (symbols) { subject = normalizeSymbols(subject, { trim: false, forceSingleSpace: forceSingleSpace, replaceWhiteSpace: replaceWhiteSpace }); } for (var i = 0; i < subject.length; i++) { var char = subject[i]; if (/[\u0300-\u036f]/.test(char)) continue; tmp = diacriticMap.get(char); // handle lowerCase if (lowerCase && (tmp || /[A-Z]/.test(char))) { tmp = (tmp || char).toLowerCase(); } result += tmp || char; } return result; } function slugify(str, options) { var _a, _b; return latinize(str, { lowerCase: true, replaceWhiteSpace: "-", forceSingleSpace: (_a = options === null || options === void 0 ? void 0 : options.forceSingleSpace) !== null && _a !== void 0 ? _a : false, trim: (_b = options === null || options === void 0 ? void 0 : options.trim) !== null && _b !== void 0 ? _b : false }) .replace(/_|\(|\)/g, "-") .replace(/[^a-z-]/g, ""); } export { latinize, normalizeSymbols, removeDiacritics, slugify };