UNPKG

deep-profanity-filter

Version:

A thorough profanity filter that considers most common circumventions. Works with your custom list of blocked and whitelisted words and phrases. Identifies and/or replaces bad words. Works with *wildcards* at *start and/or end* of words.

202 lines â€Ē 14.1 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.reduceRepeatCharacters = exports.textToLatin = exports.removeTextAccents = exports.unEmoji = void 0; /** * Replaces all emojis in a text that feature a letter with * normal latin characters. * Example: "🇎⭕ 🔛" turns into "go on" or * "ðŸ…ŋ🇚ÂŪïļðŸ’°ðŸ‡Š" turns into "purse". * Note: This does NOT replace random emojis used to represent * or mask letters, such as 'ðŸŒļ' representing an 'O'. * * Suggested usage: `textToLatin(unEmoji(inputText))` * @param {string} inputText - The text from which you would like to remove all text based emojis. * @returns the input text, with all letter based emojis transformed to become text. */ function unEmoji(inputText) { return inputText .replace(/🆔/g, 'id') .replace(/🆚/g, 'vs') .replace(/ðŸ”Ī/g, 'abc') .replace(/🆎/g, 'ab') .replace(/🆑/g, 'cl') .replace(/🆘/g, 'sos') .replace(/ðŸšū/g, 'wc') .replace(/🆖/g, 'ng') .replace(/🆗/g, 'ok') .replace(/🆙/g, 'up') .replace(/🆒/g, 'cool') .replace(/🆕/g, 'new') .replace(/🔟/g, '10') .replace(/🆓/g, 'free') .replace(/🔚/g, 'end') .replace(/🔙/g, 'back') .replace(/🔛/g, 'on') .replace(/🔝/g, 'top') .replace(/🔜/g, 'soon') .replace(/ðŸ‡Ķ|🅰/g, 'a') .replace(/🇧|ðŸ…ą/g, 'b') .replace(/ðŸ‡Ļ|ÂĐïļ/g, 'c') .replace(/ðŸ‡Đ/g, 'd') .replace(/🇊/g, 'e') .replace(/ðŸ‡Ŧ/g, 'f') .replace(/🇎/g, 'g') .replace(/🇭/g, 'h') .replace(/ðŸ‡Ū/g, 'i') .replace(/ðŸ‡Ŋ/g, 'j') .replace(/🇰/g, 'k') .replace(/ðŸ‡ą/g, 'l') .replace(/ðŸ‡ē/g, 'm') .replace(/ðŸ‡ģ/g, 'n') .replace(/ðŸ‡ī|ðŸ…ū|⭕/g, 'o') .replace(/ðŸ‡ĩ|ðŸ…ŋ/g, 'p') .replace(/ðŸ‡ķ/g, 'q') .replace(/🇷|ÂŪïļ/g, 'r') .replace(/ðŸ‡ļ|💰/g, 's') .replace(/ðŸ‡đ/g, 't') .replace(/🇚/g, 'u') .replace(/ðŸ‡ŧ/g, 'v') .replace(/🇞/g, 'w') .replace(/ðŸ‡―|❎|❌/g, 'x') .replace(/ðŸ‡ū/g, 'y') .replace(/ðŸ‡ŋ/g, 'z'); } exports.unEmoji = unEmoji; /** * Removes most common accents from characters. * Example: The text "ZĖĩÍĖ­ĖĄaĖķĖ‰Í„ĖŽĖ˜lĖķÍ—ĖœgĖĩÍ’ÍĖœĖēoĖķĖ…ĖŠĖž" becomes "Zalgo", * the text "à-cÃītÃĐs" becomes "a-cotes", * non-latin characters stay non-latin, e.g. "ᑕⓞ֑ÎŋĖĪ͕𝕃Ũ…Ü‘" becomes "ᑕⓞÎŋ𝕃". * @param {string} inputText - The text for which you wish to have all * accents removed. * @returns the input text, stripped of all accents. */ function removeTextAccents(inputText) { return inputText .normalize('NFD') .replace(/[\u0300-\u036f]|[\u0591-\u05bd]|\u05bf|\u05c1|\u05c2|\u05c4|\u05c5|\u05c7|\u0711|[\u0730-\u074a]/g, ''); } exports.removeTextAccents = removeTextAccents; /** * Converts a text of fancy unicode font to latin alphabet characters. * This translation happens based on "visual appearance" of the letters, * so if you do this to text that is written in a language of non-latin * alphabet, you will get weird outputs. * * Disclaimer: This may at times mistranslate messages, and the list of * characters that get converted is most likely not complete, although * it is very thoroughly assembled. It will remove most common accents, * and returns a latin string in lower case letters. Any characters that * could not be mapped to latin characters will still appear in the string. * * Example: * "áĩšâ“˜ęŊą â‚ŪᏋäđ‚Õ‡" would turn into "this text" or "ZĖĩÍĖ­ĖĄaĖķĖ‰Í„ĖŽĖ˜lĖķÍ—ĖœgĖĩÍ’ÍĖœĖēoĖķĖ…ĖŠĖž" turns into "zalgo", or * "ᑕⓞ֑ÎŋĖĪ͕𝕃Ũ…Ü‘" turns into "cool". * * Suggested usage: `textToLatin(unEmoji(inputText))` * @param {string} inputText - The text which you would like to convert to latin * @returns the input text, with foreign or special alphabet letters translated * to latin lower case characters */ function textToLatin(inputText) { inputText = removeTextAccents(inputText); inputText = unEmoji(inputText); // replace upper case letters that look different in lower case inputText = inputText .replace(/Ð/g, 'd') .replace(/ÎĢ/g, 'e') .replace(/Ⴙ/g, 'g') .replace(/Η/g, 'h') .replace(/Ꮰ/g, 'j') .replace(/Μ/g, 'm') .replace(/áŽĪ/g, 'q') .replace(/Շ/g, 't') .replace(/Ôą/g, 'u') .replace(/Հ/g, 'z'); // convert to lower case and replace the rest inputText = inputText.toLowerCase(); return inputText .replace(/áī€|ₐ|áīŽ|áĩƒ|Îą|âąĨ|ɐ|áķ|Ṛ|𝐚|𝐀|𝑎|ðī|ð‘Ļ|𝒂|𝖚|𝖠|ⓐ|𝔞|𝔄|𝖆|𝕎|𝓊|𝓐|ð’ķ|𝒜|𝕒|ð”ļ|ï―|🄰|ɑ|āļ„|ð—Ū|𝗔|ð˜Ē|𝘈|𝙖|𝘞|𝚊|𝙰|Îŧ|â‚ģ|卂|ïū‘|á—Đ/g, 'a') .replace(/áī|ÃĶ|áī‚/g, 'ae') .replace(/ęœĩ/g, 'ao') .replace(/朷/g, 'au') .replace(/ęœđ/g, 'av') .replace(/ęœŧ|ęœģ/g, 'aa') .replace(/ęœ―/g, 'ay') .replace(/ʙ|áīƒ|Ðē|áĩĶ|áīŪ|áĩ‡|ɓ|ƀ|ƃ|áĩŽ|áķ€|𝐛|𝐁|𝒃|ð‘Đ|ð–ŧ|ð–Ą|ⓑ|𝔟|𝔅|𝖇|𝕭|ð“Ŧ|𝓑|𝒷|𝑏|ðĩ|𝕓|ð”đ|ï―‚|ðŸ„ą|℮|ϐ|āđ’|âī†|ÉŪ|áļ|჊|āđ–|ð—Ŋ|𝗕|ð˜Ģ|𝘉|𝙗|ð˜―|𝚋|ð™ą|ß|āļŋ|äđƒ|Ō|Îē|ᗷ/g, 'b') .replace(/áī„|áķœ|ÂĒ|ƈ|Čž|ꞇ|ð’ļ|ɕ|áķ—|ɔ|ↄ|áī|𝐜|𝐂|𝑐|ðķ|𝒄|𝑊|𝖞|ð–Ē|ⓒ|𝔠|ℭ|𝖈|ð•Ū|𝓎|𝓒|𝒞|𝕔|ℂ|ï―ƒ|ðŸ„ē|áĨī|ðŸ…ē|ς|ፈ|𝗰|𝗖|ð˜Ī|𝘊|𝙘|ð˜ū|𝚌|ð™ē|ᄃ|â‚ĩ|匚|â†ŧ|ᑕ|á‘Ē|℃/g, 'c') .replace(/áī…|áī°|áĩˆ|∂|Ꝺ|ɗ|đ|ƌ|ð’đ|ČĄ|áķ‘|áĩ­|áķ|ɖ|𝐝|𝐃|𝒅|ð‘Ŧ|ð–―|ð–Ģ|𝑑|𝐷|ⓓ|ð”Ą|𝔇|𝖉|ð•Ŋ|𝓭|𝓓|𝒟|𝕕|ð”ŧ|ï―„|ðŸ„ģ|ᗩ|áĶ”|ÕŠ|ðŸ…ģ|āđ”|ԃ|ęŪ„|āŧ“|ð—ą|𝗗|ð˜Ĩ|𝘋|𝙙|ð˜ŋ|𝚍|ð™ģ|り|ę­°|á•ē/g, 'd') .replace(/Įģ|Į†/g, 'dz') .replace(/áī‡|ₑ|áīą|áĩ‰|âąŧ|ɛ|Ņ”|ɇ|Éĩ|ęœŋ|Į|âąļ|áķ’|áķ“|ɘ|𝐞|𝐄|𝒆|𝑎|ð–ū|ð–Ī|ⓔ|ð”Ē|𝔈|𝖊|𝕰|ð“Ū|𝓔|𝑒|ðļ|𝕖|𝔞|ï―…|ðŸ„ī|ðŸ…ī|Ō―|ęŪ›|ð—ē|𝗘|ð˜Ķ|𝘌|𝙚|𝙀|𝚎|ð™ī|äđ‡|Îū|á˜ŋ|₮|á—ī/g, 'e') .replace(/朰|áķ |ƒ|ʄ|ꝭ|ð’ŧ|Ꝿ|áĩŪ|áķ‚|Ṝ|᚝|Ō“|áĩģ|𝐟|𝐅|𝒇|𝑭|ð–ŋ|ð–Ĩ|ęŸŧ|ⓕ|ð”Ģ|𝔉|𝖋|ð•ą|ð“Ŋ|𝓕|𝑓|ðđ|𝕗|ð”―|ï―†|ðŸ„ĩ|ℱ|𝒇|á ŧ|âĻ|ðŸ…ĩ|ϝ|ð—ģ|𝗙|𝘧|𝘍|𝙛|𝙁|𝚏|ð™ĩ|ÂĢ|â‚Ģ|ę­ķ|千|ï―·|á–ī/g, 'f') .replace(//g, 'fi') .replace(//g, 'fl') .replace(//g, 'ff') .replace(//g, 'ffi') .replace(//g, 'ffl') .replace(/ÉĒ|ʛ|áīģ|áĩ|É |ĮĨ|áķƒ|ÉĄ|áĩ·|𝓰|𝐠|𝐆|𝐚|𝒈|ð‘Ū|𝗀|ð–Ķ|ⓖ|ð”Ī|𝔊|𝖌|ð•ē|𝓖|𝑔|ð’Ē|𝕘|ð”ū|ï―‡|ðŸ„ķ|Ų‚|ᧁ|ðŸ…ķ|ïŧŪ|ęŪ†|āš‡|ð—ī|𝗚|ð˜Ļ|𝘎|𝙜|𝙂|𝚐|ð™ķ|â‚ē|ム|ց|ᘜ/g, 'g') .replace(/Ɯ|Ð―|ₕ|áīī|ʰ|âąĻ|ħ|ÉĶ|ÉĨ|ĘŪ|ĘŊ|ðĄ|𝐇|𝒉|ð‘Ŋ|𝗁|𝖧|ⓗ|ð”Ĩ|ℌ|𝖍|ð•ģ|ð“ą|𝓗|ð’―|ðŧ|𝕙|ℍ|ï―ˆ|🄷|ℋ|🅷|Ņ’|ԋ|ęŪ’|ɧ|ð—ĩ|𝗛|ð˜Đ|𝘏|𝙝|𝙃|𝚑|𝙷|卄|ん|Õ°|ᕾ/g, 'h') .replace(/ƕ/g, 'hu') .replace(/ÉŠ|áĩĒ|áīĩ|áķĶ|⁹|Îđ|Äą|ÉĻ|áķ–|áī‰|ðĒ|𝐈|𝒊|𝑰|𝗂|ð–Ļ|ðšĪ|ⓘ|ð”Ķ|ℑ|𝖎|ð•ī|ð“ē|𝓘|ð’ū|𝑖|𝐞|𝕚|𝕀|ï―‰|ðŸ„ļ|ę ļ|ðŸ…ļ|āđ€|ę­ĩ|ð—ķ|𝗜|𝘊|𝘐|𝙞|𝙄|𝚒|ð™ļ|äļĻ|ïū‰|ᓰ|áķĪ/g, 'i') .replace(/Äģ/g, 'ij') .replace(/áīŠ|âąž|áīķ|Ęē|ʝ|ɉ|Č·|ɟ|ðĢ|𝐉|𝑗|ð―|𝒋|ð‘ą|𝔧|𝗃|ð–Đ|ðšĨ|ⓙ|𝔧|𝔍|𝖏|ð•ĩ|ð“ģ|𝓙|ð’ŋ|ð’Ĩ|𝕛|𝕁|ï―Š|ðŸ„đ|ℐ|Ҙ|ę đ|Õĩ|ðŸ…đ|āļ§|𝗷|𝗝|ð˜Ŧ|𝘑|𝙟|𝙅|𝚓|ð™đ|Ũ |ïūŒ|Ų„|ᒍ|ᒚ|Úķ|ᒎ/g, 'j') .replace(/áī‹|К|ₖ|áī·|áĩ|⹊|ꝃ|ƙ|ꝁ|ꝅ|áķ„|ʞ|ðĪ|𝐊|𝑘|ðū|𝒌|ð‘ē|𝗄|𝖊|ⓚ|ð”Ļ|𝔎|𝖐|ð•ķ|ð“ī|𝓚|𝓀|ð’Ķ|𝕜|𝕂|ï―‹|🄚|Ō|ᛕ|🅚|ӄ|ęŪķ|ð—ļ|𝗞|𝘎|𝘒|𝙠|𝙆|𝚔|𝙚|₭|ã‚đ|ԟ|á–―áļ|ŌĄ/g, 'k') .replace(/Ɵ|áīŒ|ₗ|áīļ|ËĄ|ℓ|⹥|ꝉ|ŀ|ł|ꞁ|ÉŽ|Čī|áķ…|É­|Åŋ|Éŋ|ŨŸ|ðĨ|𝐋|𝒍|ð‘ģ|𝗅|ð–Ŧ|⅃|ⓛ|ð”Đ|𝔏|𝖑|𝕷|ð“ĩ|𝓛|𝓁|𝑙|ðŋ|𝕝|𝕃|ï―Œ|ðŸ„ŧ|ℒ|áĨĢ|ðŸ…ŧ|ʅ|ęŪ­|ÉĐ|ð—đ|𝗟|𝘭|𝘓|ð™Ą|𝙇|𝚕|ð™ŧ|ᄂ|ã„Ĩ|ïūš|ӏ|ᒩ/g, 'l') .replace(/Į‰/g, 'lj') .replace(/áī|О|ₘ|áīđ|áĩ|Éą|áĩŊ|áķ†|ɰ|ðĶ|𝐌|𝒎|ð‘ī|𝗆|𝖎|ⓜ|𝔊|𝔐|𝖒|ð•ļ|ð“ķ|𝓜|𝓂|𝑚|𝑀|𝕞|𝕄|ï―|🄞|â„ģ|ᗰ|🅞|āđ“|ęŪ‡|𝗚|𝗠|ð˜Ū|𝘔|ð™Ē|𝙈|𝚖|𝙞|â‚Ĩ|įˆŠ|ïūķ|á˜ŧ|Ïŧ/g, 'm') .replace(/Éī|ₙ|áīš|âŋ|áīŽ|Ðļ|ꞃ|Éē|ƞ|Čĩ|áĩ°|áķ‡|Éģ|áī|𝐧|𝐍|𝑛|𝑁|𝒏|ð‘ĩ|𝗇|𝖭|ⓝ|ð”Ŧ|𝔑|𝖓|ð•đ|𝓷|𝓝|𝓃|ð’Đ|𝕟|ℕ|ï―Ž|ðŸ„―|ðŸ…―|āļ |Õž|ęŪ‘|ŋ|āš–|ð—ŧ|ð—Ą|ð˜Ŋ|𝘕|ð™Ģ|𝙉|𝚗|ð™―|Ðŋ|η|â‚Ķ|几|刀|Õē|ហ|ᑎ|ᘉ|áķ°/g, 'n') .replace(/ĮŒ/g, 'nj') .replace(/áī|ₒ|áīž|áĩ’|σ|ꝋ|ꝍ|Ãļ|ČĢ|âąš|áī‘|áī“|ðĻ|𝐎|𝑂|𝒐|ð‘ķ|𝗈|ð–Ū|ⓞ|𝔎|𝔒|𝖔|𝕚|ð“ļ|𝓞|𝑜|𝒊|𝕠|𝕆|ï―|ę­ī|ðŸ„ū|❀|āđ|օ|ę­·|āŧ|𝗞|ð—Ē|𝘰|𝘖|ð™Ī|𝙊|𝚘|ð™ū|ÓĐ|ð|ㄖ|ãŪ|â™Ē|āķ§|ᓍ|♡|ðŸĩ|ᗝ|Îŋ|❁|â˜Ŋ/g, 'o') .replace(/ꝏ/g, 'oo') .replace(/Éķ|œ|áī”/g, 'oe') .replace(/ÆĢ/g, 'on') .replace(/áī˜|ₚ|áīū|áĩ–|ρ|ꝓ|ÆĨ|ꝕ|áĩ―|ꝑ|áĩą|áķˆ|ðĐ|𝐏|𝑝|𝑃|ꟾ|𝒑|𝑷|𝗉|ð–Ŋ|ⓟ|𝔭|𝔓|𝖕|ð•ŧ|ð“đ|𝓟|𝓅|ð’Ŧ|ð•Ą|ℙ|ï―|ðŸ„ŋ|℘|Ũ§|ք|ę­ū|ð—―|ð—Ģ|ð˜ą|𝘗|ð™Ĩ|𝙋|𝚙|ð™ŋ|Ãū|₱|åĐ|ï―ą|φ|ᑭ|á•ĩ/g, 'p') .replace(/ĮŦ|áĩĐ|áĩ |ꝙ|ꝗ|Ę |ɋ|𝐊|𝐐|𝒒|ð‘ļ|𝗊|𝖰|ⓠ|ð”Ū|𝔔|𝖖|𝕞|𝓚|𝓠|𝓆|𝒎|ð•Ē|ℚ|ï―‘|🅀|𝑞|𝑄|🆀|ϙ|ÕĶ|āđ‘|ð—ū|ð—Ī|ð˜ē|𝘘|ð™Ķ|𝙌|𝚚|𝚀|ゐ|ŌĐ|á‘Ŧ|á•ī/g, 'q') .replace(/ʀ|áĩĢ|áīŋ|Ęģ|áī•|áīš|ʁ|áī™|Ņ|ꞅ|ɍ|É―|Éū|Éž|áĩē|áķ‰|Éđ|Éŧ|Éš|âąđ|ęŪ§|ðŦ|𝐑|𝒓|ð‘đ|𝗋|ð–ą|ⓡ|ð”Ŋ|ℜ|𝖗|ð•―|ð“ŧ|ð“Ą|𝓇|𝑟|𝑅|ð•Ģ|ℝ|ï―’|🅁|ℛ|ꭈ|🆁|Ðģ|ęŪĒ|ā―ž|ð—ŋ|ð—Ĩ|ð˜ģ|𝘙|𝙧|𝙍|𝚛|𝚁|å°š|ā― |ᖇ/g, 'r') .replace(/ꜱ|ₛ|ËĒ|ʂ|áĩī|áķŠ|Čŋ|ęļ|𝐎|𝐒|𝑠|𝑆|𝒔|𝑚|𝗌|ð–ē|â“Ē|ꙅ|𝔰|𝔖|𝖘|ð•ū|𝓞|ð“Ē|𝓈|ð’Ū|ð•Ī|𝕊|ï―“|🅂|áĶ“|ęŊą|🆂|āļĢ|ֆ|ęŪĨ|𝘀|ð—Ķ|ð˜ī|𝘚|ð™Ļ|𝙎|𝚜|𝚂|ÆĻ|Ņ•|§|â‚ī|äļ‚|ϛ|ᔕ/g, 's') .replace(//g, 'st') .replace(/áī›|Ņ‚|ₜ|áĩ€|áĩ—|ƚ|ÉŦ|âąĶ|Æ­|ʈ|ŧ|Čķ|áĩĩ|ÆŦ|ʇ|𝐭|𝐓|𝒕|ð‘ŧ|𝗍|ð–ģ|â“Ģ|ð”ą|𝔗|𝖙|ð•ŋ|ð“―|ð“Ģ|𝓉|ð’Ŋ|ð•Ĩ|𝕋|ï―”|🅃|𝑇|🆃|ęŪĶ|𝘁|𝗧|ð˜ĩ|ð‘Ą|𝘛|ð™Đ|𝙏|𝚝|𝚃|†|â‚Ū|ㄒ|ï―ē|Õ§|Íģ|á–ķ|äļ…/g, 't') .replace(/áĩš/g, 'th') .replace(/ęœĐ/g, 'tz') .replace(/áīœ|áĩĪ|áĩ|áĩ˜|ʋ|áķ™|ðŪ|𝐔|ð‘Ē|𝑈|𝒖|𝑞|𝗎|ð–ī|â“Ī|ð”ē|𝔘|𝖚|𝖀|ð“ū|ð“Ī|𝓊|𝒰|ð•Ķ|𝕌|ï―•|🅄|υ|🆄|āļĒ|Ɗ|ęŪž|ęŪ°|āļ™|𝘂|ð—Ļ|ð˜ķ|𝘜|𝙊|𝙐|𝚞|𝚄|҆|Âĩ|Ξ|ʉ|ã„Đ|ãē|Õī|ᑌ|ᑘ/g, 'u') .replace(/áĩŦ/g, 'ue') .replace(/áī |áĩĨ|âą―|áĩ›|Î―|ƌ|ꝟ|âąī|áķŒ|âąą|ðŊ|𝐕|ð‘Ģ|𝑉|𝒗|ð‘―|𝗏|ð–ĩ|â“Ĩ|ð”ģ|𝔙|𝖛|𝖁|ð“ŋ|ð“Ĩ|𝓋|ð’ą|𝕧|𝕍|ï―–|🅅|🆅|ŨĐ|ęŪ™|Û·|āļ‡|𝘃|ð—Đ|𝘷|𝘝|ð™Ŧ|𝙑|𝚟|𝚅|áŊ|√|Ņĩ|áŧ―|ᐚ/g, 'v') .replace(/ꝡ/g, 'vy') .replace(/áīĄ|áĩ‚|Ę·|ω|âąģ|ʍ|ÉŊ|𝐰|𝐖|ð‘Ī|𝑊|𝒘|ð‘ū|𝗐|ð–ķ|â“Ķ|ð”ī|𝔚|𝖜|𝖂|𝔀|ð“Ķ|𝓌|ð’ē|ð•Ļ|𝕎|ï―—|🅆|āļŽ|🆆|ÕĄ|ęŪ—|āšŸ|𝘄|𝗊|ð˜ļ|𝘞|𝙎|𝙒|𝚠|𝚆|҉|â‚Đ|åąą|Ⱊ|á—Ŋ|ᘚ/g, 'w') .replace(/χ|ₓ|ËĢ|áķ|ðą|𝐗|ð‘Ĩ|𝑋|𝒙|ð‘ŋ|𝗑|𝖷|ⓧ|ð”ĩ|𝔛|𝖝|𝖃|𝔁|𝓧|𝓍|ð’ģ|ð•Đ|𝕏|ï―˜|🅇|🆇|Ũ|Ó―|Ōģ|𝘅|ð—Ŧ|ð˜đ|𝘟|𝙭|𝙓|ðšĄ|𝚇|×|Óŋ|äđ‚|ïū’|áƒŊ|᙭|Ðķ|ጀ/g, 'x') .replace(/ʏ|áĩ§|Ęļ|҇|Æī|áŧŋ|ɏ|ʎ|ðē|𝐘|ð‘Ķ|𝑌|𝒚|𝒀|𝗒|ð–ļ|â“Ļ|ð”ķ|𝔜|𝖞|𝖄|𝔂|ð“Ļ|𝓎|ð’ī|𝕊|𝕐|ï―™|🅈|âī˜|ŌŊ|🆈|ŨĨ|ყ|ę­đ|āļŊ|𝘆|𝗎|𝘚|𝘠|ð™Ū|𝙔|ðšĒ|𝚈|҃|ÂĨ|ㄚ|ïū˜|Õū|ӌ|á–ŧ/g, 'y') .replace(/áīĒ|áķŧ|ęŦ|áĩđ|⹎|ČĨ|Æķ|ʑ|áĩķ|áķŽ|ʐ|ɀ|ðģ|𝐙|𝑧|𝑍|𝒛|𝒁|𝗓|ð–đ|â“Đ|𝔷|â„Ļ|𝖟|𝖅|𝔃|ð“Đ|𝓏|ð’ĩ|ð•Ŧ|â„Ī|ï―š|🅉|Æš|🆉|Õđ|ፚ|āšŠ|𝘇|𝗭|ð˜ŧ|ð˜Ą|ð™Ŋ|𝙕|ðšĢ|𝚉|äđ™|ᘔ|ᗱ|Õ·/g, 'z'); } exports.textToLatin = textToLatin; /** * For any given input text, reduce any repeating characters to a given maximum amount of repetitions. * * As an example, the input string: `"heeellllooooo"` becomes: `"heelloo"` if that number is 2, or * `"heeelllooo"` if that number is 3, or `helo` if that number is 1. * * For English, it is recommended to not use values lower than 2. If this preprocessing is used, make * sure that the bad words and allowed terms also feature at most the same number of repeated characters. * I.e. if using this with the number "2", there is no use of putting words like "helllo" on the lists. * @param inputText - The text from which to remove repeat characters. * @param maxAllowedCharsInSequence - The maximum number of characters in sequence (such as "aaa", "bbb", * ...) that are allowed to remain in the input string. `Recommended: 2 or 3`, depending on the language * of your input text. * @returns The input text with all repeat characters that occur more than the max amount in sequence * removed. * @throws If `maxAllowedCharsInSequence` is not an integer (such as 1.5) or if it is 0 or less. */ function reduceRepeatCharacters(inputText, maxAllowedCharsInSequence) { if (maxAllowedCharsInSequence === void 0) { maxAllowedCharsInSequence = 2; } if (!Number.isInteger(maxAllowedCharsInSequence) || maxAllowedCharsInSequence < 1) { throw new Error('reduceRepeatCharacters - maxAllowedCharsInSequence needs to be an integer that is larger than 0'); } // match any word character `(\w)`, then refer to the last match `\1` and check if it appears // n or more times `{n,}` (where n is maxAllowedCharsInSequence) // replace this with our matched character '$1' repeated n times // in normal regexp terms this would be `string.replace(/(\w)\1{2,}/g, '$1$1')` if `n = 2` var regexp = new RegExp('(\\w)\\1{' + maxAllowedCharsInSequence + ',}', 'g'); var replacementStr = '$1'.repeat(maxAllowedCharsInSequence); return inputText.replace(regexp, replacementStr); } exports.reduceRepeatCharacters = reduceRepeatCharacters; //# sourceMappingURL=input_preprocessor.js.map