modern-diacritics
Version:
A modern way to latinize/ascii-fold strings and normalize symbols.
698 lines (689 loc) • 15.5 kB
JavaScript
;
Object.defineProperty(exports, '__esModule', { value: true });
var diacriticList = [
{
base: "0",
chars: ["\u07c0"]
},
{
base: "A",
chars: ["\u24b6", "\uff21", "\u023a", "\u2c6f"]
},
{
base: "AA",
chars: ["\ua732"]
},
{
base: "AE",
chars: ["\u00c6", "\u01fc", "\u01e2"]
},
{
base: "AO",
chars: ["\ua734"]
},
{
base: "AU",
chars: ["\ua736"]
},
{
base: "AV",
chars: ["\ua738", "\ua73a"]
},
{
base: "AY",
chars: ["\ua73c"]
},
{
base: "B",
chars: ["\u24b7", "\uff22", "\u0243", "\u0181"]
},
{
base: "C",
chars: ["\u24b8", "\uff23", "\ua73e", "\u0187", "\u023b"]
},
{
base: "D",
chars: [
"\u24b9",
"\uff24",
"\u0110",
"\u018a",
"\u0189",
"\u1d05",
"\ua779"
]
},
{
base: "Dh",
chars: ["\u00d0"]
},
{
base: "DZ",
chars: ["\u01f1", "\u01c4"]
},
{
base: "Dz",
chars: ["\u01f2", "\u01c5"]
},
{
base: "E",
chars: ["\u025b", "\u24ba", "\uff25", "\u0190", "\u018e", "\u1d07"]
},
{
base: "F",
chars: ["\ua77c", "\u24bb", "\uff26", "\u0191", "\ua77b"]
},
{
base: "G",
chars: [
"\u24bc",
"\uff27",
"\u01e4",
"\u0193",
"\ua7a0",
"\ua77d",
"\ua77e",
"\u0262"
]
},
{
base: "H",
chars: ["\u24bd", "\uff28", "\u0126", "\u2c67", "\u2c75", "\ua78d"]
},
{
base: "I",
chars: ["\u24be", "\uff29", "\u0197"]
},
{
base: "J",
chars: ["\u24bf", "\uff2a", "\u0248", "\u0237"]
},
{
base: "K",
chars: [
"\u24c0",
"\uff2b",
"\u0198",
"\u2c69",
"\ua740",
"\ua742",
"\ua744",
"\ua7a2"
]
},
{
base: "L",
chars: [
"\u24c1",
"\uff2c",
"\u013f",
"\u0141",
"\u023d",
"\u2c62",
"\u2c60",
"\ua748",
"\ua746",
"\ua780"
]
},
{
base: "LJ",
chars: ["\u01c7"]
},
{
base: "Lj",
chars: ["\u01c8"]
},
{
base: "M",
chars: ["\u24c2", "\uff2d", "\u2c6e", "\u019c", "\u03fb"]
},
{
base: "N",
chars: [
"\ua7a4",
"\u0220",
"\u24c3",
"\uff2e",
"\u019d",
"\ua790",
"\u1d0e"
]
},
{
base: "NJ",
chars: ["\u01ca"]
},
{
base: "Nj",
chars: ["\u01cb"]
},
{
base: "O",
chars: [
"\u24c4",
"\uff2f",
"\u00d8",
"\u01fe",
"\u0186",
"\u019f",
"\ua74a",
"\ua74c"
]
},
{
base: "OE",
chars: ["\u0152"]
},
{
base: "OI",
chars: ["\u01a2"]
},
{
base: "OO",
chars: ["\ua74e"]
},
{
base: "OU",
chars: ["\u0222"]
},
{
base: "P",
chars: [
"\u24c5",
"\uff30",
"\u01a4",
"\u2c63",
"\ua750",
"\ua752",
"\ua754"
]
},
{
base: "Q",
chars: ["\u24c6", "\uff31", "\ua756", "\ua758", "\u024a"]
},
{
base: "R",
chars: [
"\u24c7",
"\uff32",
"\u024c",
"\u2c64",
"\ua75a",
"\ua7a6",
"\ua782"
]
},
{
base: "S",
chars: ["\u24c8", "\uff33", "\u1e9e", "\u2c7e", "\ua7a8", "\ua784"]
},
{
base: "T",
chars: [
"\u24c9",
"\uff34",
"\u0166",
"\u01ac",
"\u01ae",
"\u023e",
"\ua786"
]
},
{
base: "Th",
chars: ["\u00de"]
},
{
base: "TZ",
chars: ["\ua728"]
},
{
base: "U",
chars: ["\u24ca", "\uff35", "\u0244"]
},
{
base: "V",
chars: ["\u24cb", "\uff36", "\u01b2", "\ua75e", "\u0245"]
},
{
base: "VY",
chars: ["\ua760"]
},
{
base: "W",
chars: ["\u24cc", "\uff37", "\u2c72"]
},
{
base: "X",
chars: ["\u24cd", "\uff38"]
},
{
base: "Y",
chars: ["\u24ce", "\uff39", "\u01b3", "\u024e", "\u1efe"]
},
{
base: "Z",
chars: [
"\u24cf",
"\uff3a",
"\u01b5",
"\u0224",
"\u2c7f",
"\u2c6b",
"\ua762"
]
},
{
base: "a",
chars: ["\u24d0", "\uff41", "\u1e9a", "\u2c65", "\u0250", "\u0251"]
},
{
base: "aa",
chars: ["\ua733"]
},
{
base: "ae",
chars: ["\u00e6", "\u01fd", "\u01e3"]
},
{
base: "ao",
chars: ["\ua735"]
},
{
base: "au",
chars: ["\ua737"]
},
{
base: "av",
chars: ["\ua739", "\ua73b"]
},
{
base: "ay",
chars: ["\ua73d"]
},
{
base: "b",
chars: ["\u24d1", "\uff42", "\u0180", "\u0183", "\u0253", "\u0182"]
},
{
base: "c",
chars: ["\uff43", "\u24d2", "\u0188", "\u023c", "\ua73f", "\u2184"]
},
{
base: "d",
chars: [
"\u24d3",
"\uff44",
"\u0111",
"\u018c",
"\u0256",
"\u0257",
"\u018b",
"\u13e7",
"\u0501",
"\ua7aa"
]
},
{
base: "dh",
chars: ["\u00f0"]
},
{
base: "dz",
chars: ["\u01f3", "\u01c6"]
},
{
base: "e",
chars: ["\u24d4", "\uff45", "\u0247", "\u01dd"]
},
{
base: "f",
chars: ["\u24d5", "\uff46", "\u0192"]
},
{
base: "ff",
chars: ["\ufb00"]
},
{
base: "fi",
chars: ["\ufb01"]
},
{
base: "fl",
chars: ["\ufb02"]
},
{
base: "ffi",
chars: ["\ufb03"]
},
{
base: "ffl",
chars: ["\ufb04"]
},
{
base: "g",
chars: [
"\u24d6",
"\uff47",
"\u01e5",
"\u0260",
"\ua7a1",
"\ua77f",
"\u1d79"
]
},
{
base: "h",
chars: ["\u24d7", "\uff48", "\u0127", "\u2c68", "\u2c76", "\u0265"]
},
{
base: "hv",
chars: ["\u0195"]
},
{
base: "i",
chars: ["\u24d8", "\uff49", "\u0268", "\u0131"]
},
{
base: "j",
chars: ["\u24d9", "\uff4a", "\u0249"]
},
{
base: "k",
chars: [
"\u24da",
"\uff4b",
"\u0199",
"\u2c6a",
"\ua741",
"\ua743",
"\ua745",
"\ua7a3"
]
},
{
base: "l",
chars: [
"\u24db",
"\uff4c",
"\u0140",
"\u017f",
"\u0142",
"\u019a",
"\u026b",
"\u2c61",
"\ua749",
"\ua781",
"\ua747",
"\u026d"
]
},
{
base: "lj",
chars: ["\u01c9"]
},
{
base: "m",
chars: ["\u24dc", "\uff4d", "\u0271", "\u026f"]
},
{
base: "n",
chars: [
"\u24dd",
"\uff4e",
"\u019e",
"\u0272",
"\u0149",
"\ua791",
"\ua7a5",
"\u043b",
"\u0509"
]
},
{
base: "nj",
chars: ["\u01cc"]
},
{
base: "o",
chars: [
"\u24de",
"\uff4f",
"\u00f8",
"\u01ff",
"\ua74b",
"\ua74d",
"\u0275",
"\u0254",
"\u1d11"
]
},
{
base: "oe",
chars: ["\u0153"]
},
{
base: "oi",
chars: ["\u01a3"]
},
{
base: "oo",
chars: ["\ua74f"]
},
{
base: "ou",
chars: ["\u0223"]
},
{
base: "p",
chars: [
"\u24df",
"\uff50",
"\u01a5",
"\u1d7d",
"\ua751",
"\ua753",
"\ua755",
"\u03c1"
]
},
{
base: "q",
chars: ["\u24e0", "\uff51", "\u024b", "\ua757", "\ua759"]
},
{
base: "r",
chars: [
"\u24e1",
"\uff52",
"\u024d",
"\u027d",
"\ua75b",
"\ua7a7",
"\ua783"
]
},
{
base: "s",
chars: [
"\u24e2",
"\uff53",
"\u023f",
"\ua7a9",
"\ua785",
"\u1e9b",
"\u0282"
]
},
{
base: "ss",
chars: ["\u00df"]
},
{
base: "t",
chars: [
"\u24e3",
"\uff54",
"\u0167",
"\u01ad",
"\u0288",
"\u2c66",
"\ua787"
]
},
{
base: "th",
chars: ["\u00fe"]
},
{
base: "tz",
chars: ["\ua729"]
},
{
base: "u",
chars: ["\u24e4", "\uff55", "\u0289"]
},
{
base: "v",
chars: ["\u24e5", "\uff56", "\u028b", "\ua75f", "\u028c"]
},
{
base: "vy",
chars: ["\ua761"]
},
{
base: "w",
chars: ["\u24e6", "\uff57", "\u2c73"]
},
{
base: "x",
chars: ["\u24e7", "\uff58"]
},
{
base: "y",
chars: ["\u24e8", "\uff59", "\u01b4", "\u024f", "\u1eff"]
},
{
base: "z",
chars: [
"\u24e9",
"\uff5a",
"\u01b6",
"\u0225",
"\u0240",
"\u2c6c",
"\ua763"
]
}
];
var diacriticMap = new Map(diacriticList.flatMap(function (_a) {
var base = _a.base, chars = _a.chars;
return chars.map(function (char) { return [char, base]; });
}));
function removeDiacritics(str, options) {
var subject = "".concat(str).normalize("NFD");
var result;
try {
// more complete modern variant
result = subject.replace(/\p{Diacritic}/gu, "");
}
catch (_a) {
// backwards compatible variant
result = subject.replace(/[\u0300-\u036f]/g, "");
}
if (options === null || options === void 0 ? void 0 : options.lowerCase) {
return result.toLowerCase();
}
return result;
}
var symbolList = [
{
base: " ",
chars: ["\u00A0"]
},
{
base: "'",
chars: ["\u2019", "\u2018"]
},
{
base: '"',
chars: ["\u201C", "\u201D", "\uFF02"]
},
{
base: "-",
chars: ["\u2013", "\u2014", "\u2212"]
}
];
var symbolMap = new Map(symbolList.flatMap(function (_a) {
var base = _a.base, chars = _a.chars;
return chars.map(function (char) { return [char, base]; });
}));
var allSymbols = new RegExp(symbolList.flatMap(function (c) { return c.chars; }).join("|"), "g");
function normalizeSymbols(str, options) {
var _a, _b, _c;
var trim = (_a = options === null || options === void 0 ? void 0 : options.trim) !== null && _a !== void 0 ? _a : true;
var forceSingleSpace = (_b = options === null || options === void 0 ? void 0 : options.forceSingleSpace) !== null && _b !== void 0 ? _b : false;
var replaceWhiteSpace = (_c = options === null || options === void 0 ? void 0 : options.replaceWhiteSpace) !== null && _c !== void 0 ? _c : false;
var subject = "".concat(str);
var tmp, result = subject.replace(allSymbols, function (char) {
if ((tmp = symbolMap.get(char)))
return tmp;
return char;
});
if (trim)
result = result.trim();
if (forceSingleSpace)
result = result.replace(/\s{2,}/g, " ");
if (replaceWhiteSpace !== false) {
result = result.replace(/\s/g, replaceWhiteSpace);
}
return result;
}
function latinize(str, options) {
var _a, _b, _c, _d, _e;
var symbols = (_a = options === null || options === void 0 ? void 0 : options.symbols) !== null && _a !== void 0 ? _a : true;
var lowerCase = (_b = options === null || options === void 0 ? void 0 : options.lowerCase) !== null && _b !== void 0 ? _b : false;
var trim = (_c = options === null || options === void 0 ? void 0 : options.trim) !== null && _c !== void 0 ? _c : false;
var forceSingleSpace = (_d = options === null || options === void 0 ? void 0 : options.forceSingleSpace) !== null && _d !== void 0 ? _d : false;
var replaceWhiteSpace = (_e = options === null || options === void 0 ? void 0 : options.replaceWhiteSpace) !== null && _e !== void 0 ? _e : undefined;
var tmp, subject = removeDiacritics(str), result = "";
// prepare subject
if (trim)
subject = subject.trim();
if (symbols) {
subject = normalizeSymbols(subject, {
trim: false,
forceSingleSpace: forceSingleSpace,
replaceWhiteSpace: replaceWhiteSpace
});
}
for (var i = 0; i < subject.length; i++) {
var char = subject[i];
if (/[\u0300-\u036f]/.test(char))
continue;
tmp = diacriticMap.get(char);
// handle lowerCase
if (lowerCase && (tmp || /[A-Z]/.test(char))) {
tmp = (tmp || char).toLowerCase();
}
result += tmp || char;
}
return result;
}
function slugify(str, options) {
var _a, _b;
return latinize(str, {
lowerCase: true,
replaceWhiteSpace: "-",
forceSingleSpace: (_a = options === null || options === void 0 ? void 0 : options.forceSingleSpace) !== null && _a !== void 0 ? _a : false,
trim: (_b = options === null || options === void 0 ? void 0 : options.trim) !== null && _b !== void 0 ? _b : false
})
.replace(/_|\(|\)/g, "-")
.replace(/[^a-z-]/g, "");
}
exports.latinize = latinize;
exports.normalizeSymbols = normalizeSymbols;
exports.removeDiacritics = removeDiacritics;
exports.slugify = slugify;