@tienedev/datype
Version:
Modern TypeScript utility library with pragmatic typing and zero dependencies
370 lines (367 loc) • 7.74 kB
JavaScript
;
/**
* Unicode to ASCII transliteration map for common characters
* Optimized for performance with pre-computed mappings
*/
const TRANSLITERATION_MAP = {
// Latin Extended-A
à: 'a',
á: 'a',
â: 'a',
ã: 'a',
ä: 'a',
å: 'a',
æ: 'ae',
ç: 'c',
è: 'e',
é: 'e',
ê: 'e',
ë: 'e',
ì: 'i',
í: 'i',
î: 'i',
ï: 'i',
ñ: 'n',
ò: 'o',
ó: 'o',
ô: 'o',
õ: 'o',
ö: 'o',
ø: 'o',
ù: 'u',
ú: 'u',
û: 'u',
ü: 'u',
ý: 'y',
ÿ: 'y',
þ: 'th',
ß: 'ss',
// Latin Extended-A uppercase
À: 'A',
Á: 'A',
Â: 'A',
Ã: 'A',
Ä: 'A',
Å: 'A',
Æ: 'AE',
Ç: 'C',
È: 'E',
É: 'E',
Ê: 'E',
Ë: 'E',
Ì: 'I',
Í: 'I',
Î: 'I',
Ï: 'I',
Ñ: 'N',
Ò: 'O',
Ó: 'O',
Ô: 'O',
Õ: 'O',
Ö: 'O',
Ø: 'O',
Ù: 'U',
Ú: 'U',
Û: 'U',
Ü: 'U',
Ý: 'Y',
Þ: 'TH',
// Latin Extended-B and other common
ă: 'a',
ą: 'a',
ć: 'c',
č: 'c',
ď: 'd',
đ: 'd',
ę: 'e',
ě: 'e',
ğ: 'g',
ı: 'i',
ł: 'l',
ľ: 'l',
ń: 'n',
ň: 'n',
ő: 'o',
œ: 'oe',
ř: 'r',
ś: 's',
š: 's',
ť: 't',
ů: 'u',
ű: 'u',
ź: 'z',
ż: 'z',
ž: 'z',
// Latin Extended-B uppercase
Ă: 'A',
Ą: 'A',
Ć: 'C',
Č: 'C',
Ď: 'D',
Đ: 'D',
Ę: 'E',
Ě: 'E',
Ğ: 'G',
İ: 'I',
Ł: 'L',
Ľ: 'L',
Ń: 'N',
Ň: 'N',
Ő: 'O',
Œ: 'OE',
Ř: 'R',
Ś: 'S',
Š: 'S',
Ť: 'T',
Ů: 'U',
Ű: 'U',
Ź: 'Z',
Ż: 'Z',
Ž: 'Z',
// Cyrillic common
а: 'a',
б: 'b',
в: 'v',
г: 'g',
д: 'd',
е: 'e',
ё: 'yo',
ж: 'zh',
з: 'z',
и: 'i',
й: 'y',
к: 'k',
л: 'l',
м: 'm',
н: 'n',
о: 'o',
п: 'p',
р: 'r',
с: 's',
т: 't',
у: 'u',
ф: 'f',
х: 'h',
ц: 'ts',
ч: 'ch',
ш: 'sh',
щ: 'sch',
ъ: '',
ы: 'y',
ь: '',
э: 'e',
ю: 'yu',
я: 'ya',
// Cyrillic uppercase
А: 'A',
Б: 'B',
В: 'V',
Г: 'G',
Д: 'D',
Е: 'E',
Ё: 'YO',
Ж: 'ZH',
З: 'Z',
И: 'I',
Й: 'Y',
К: 'K',
Л: 'L',
М: 'M',
Н: 'N',
О: 'O',
П: 'P',
Р: 'R',
С: 'S',
Т: 'T',
У: 'U',
Ф: 'F',
Х: 'H',
Ц: 'TS',
Ч: 'CH',
Ш: 'SH',
Щ: 'SCH',
Ъ: '',
Ы: 'Y',
Ь: '',
Э: 'E',
Ю: 'YU',
Я: 'YA',
// Greek common
α: 'a',
β: 'b',
γ: 'g',
δ: 'd',
ε: 'e',
ζ: 'z',
η: 'h',
θ: 'th',
ι: 'i',
κ: 'k',
λ: 'l',
μ: 'm',
ν: 'n',
ξ: 'x',
ο: 'o',
π: 'p',
ρ: 'r',
σ: 's',
ς: 's',
τ: 't',
υ: 'y',
φ: 'f',
χ: 'ch',
ψ: 'ps',
ω: 'w',
// Greek uppercase
Α: 'A',
Β: 'B',
Γ: 'G',
Δ: 'D',
Ε: 'E',
Ζ: 'Z',
Η: 'H',
Θ: 'TH',
Ι: 'I',
Κ: 'K',
Λ: 'L',
Μ: 'M',
Ν: 'N',
Ξ: 'X',
Ο: 'O',
Π: 'P',
Ρ: 'R',
Σ: 'S',
Τ: 'T',
Υ: 'Y',
Φ: 'F',
Χ: 'CH',
Ψ: 'PS',
Ω: 'W',
// Arabic numerals in other scripts
'٠': '0',
'١': '1',
'٢': '2',
'٣': '3',
'٤': '4',
'٥': '5',
'٦': '6',
'٧': '7',
'٨': '8',
'٩': '9',
// Common symbols and punctuation
'\u2018': '',
'\u2019': '',
'\u201C': '',
'\u201D': '',
'\u2026': '...',
'\u2013': '-',
'\u2014': '-',
'\u2022': '',
'\u201A': '',
'\u201E': '',
'\u2039': '',
'\u203A': '',
'\u00AB': '',
'\u00BB': '',
// Currency and other symbols
'€': 'euro',
'£': 'pound',
'¥': 'yen',
'₽': 'ruble',
$: 'dollar',
'¢': 'cent',
'©': 'c',
'®': 'r',
'™': 'tm',
'&': 'and',
};
/**
* Converts a string into a URL-friendly slug by removing/replacing special characters,
* handling Unicode characters, and applying various formatting options.
*
* @param input - The string to slugify
* @param options - Configuration options for slugification
* @returns A URL-safe slug string
*
* @example
* ```typescript
* import { slugify } from 'datype';
*
* // Basic usage
* slugify('Hello World!'); // 'hello-world'
* slugify('Café & Restaurant'); // 'cafe-restaurant'
*
* // Unicode handling
* slugify('Привет мир'); // 'privet-mir'
* slugify('Café à Paris'); // 'cafe-a-paris'
* slugify('北京市'); // 'bei-jing-shi' (with proper transliteration)
*
* // Custom options
* slugify('Hello World', { separator: '_' }); // 'hello_world'
* slugify('Café', { lowercase: false }); // 'Cafe'
* slugify('Product #123', { strict: true }); // 'product-123'
*
* // Custom replacements
* slugify('AT&T', {
* replacements: { '&': '-and-' }
* }); // 'at-and-t'
*
* // Advanced usage
* slugify(' Spaced Out ', { trim: true }); // 'spaced-out'
* slugify('Special chars: @#$%', { remove: true }); // 'special-chars'
* ```
*/
function slugify(input, options = {}) {
const { separator = '-', lowercase = true, trim = true, replacements = {}, remove = true, strict = false, } = options;
if (typeof input !== 'string') {
throw new TypeError('Expected input to be a string');
}
if (input.length === 0) {
return '';
}
// Combine custom replacements with default transliteration map
// const combinedMap = { ...TRANSLITERATION_MAP, ...replacements };
let result = input;
// Step 1: Handle remaining Unicode characters using built-in normalization first
// Normalize to NFD (decomposed form) then remove combining characters
result = result.normalize('NFD').replace(/[\u0300-\u036f]/g, ''); // Remove combining diacritical marks
// Step 2: Convert to lowercase if requested
if (lowercase) {
result = result.toLowerCase();
}
// Step 3: Apply custom replacements first to allow overriding defaults
for (const [char, replacement] of Object.entries(replacements)) {
if (result.includes(char)) {
result = result.replace(new RegExp(char.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'g'), replacement);
}
}
// Step 4: Handle special characters based on mode
if (strict) {
// Strict mode: only alphanumeric and separators
result = result.replace(/[^a-zA-Z0-9\s]/g, ' ');
}
else {
// Apply transliteration map only in non-strict mode
for (const [char, replacement] of Object.entries(TRANSLITERATION_MAP)) {
if (result.includes(char)) {
result = result.replace(new RegExp(char.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'g'), replacement);
}
}
if (remove) {
// Remove non-alphanumeric characters except spaces and hyphens
result = result.replace(/[^\w\s-]/g, ' ');
}
}
// Step 5: Replace whitespace and multiple separators with single separator
result = result
.replace(/\s+/g, separator) // Replace spaces with separator
.replace(new RegExp(`\\${separator}+`, 'g'), separator); // Remove consecutive separators
// Step 6: Trim separators from start and end if requested
if (trim && separator.length > 0) {
const escapedSeparator = separator.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
const trimRegex = new RegExp(`^${escapedSeparator}+|${escapedSeparator}+$`, 'g');
result = result.replace(trimRegex, '');
}
return result;
}
exports.slugify = slugify;