UNPKG

deep-profanity-filter

Version:

A thorough profanity filter that considers most common circumventions. Works with your custom list of blocked and whitelisted words and phrases. Identifies and/or replaces bad words. Works with *wildcards* at *start and/or end* of words.

github.com/Zariem/deep-profanity-filter

Zariem/deep-profanity-filter

577 lines • 35.5 kB

JavaScript

"use strict"; var __rest = (this && this.__rest) || function (s, e) { var t = {}; for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p) && e.indexOf(p) < 0) t[p] = s[p]; if (s != null && typeof Object.getOwnPropertySymbols === "function") for (var i = 0, p = Object.getOwnPropertySymbols(s); i < p.length; i++) { if (e.indexOf(p[i]) < 0 && Object.prototype.propertyIsEnumerable.call(s, p[i])) t[p[i]] = s[p[i]]; } return t; }; var __values = (this && this.__values) || function(o) { var s = typeof Symbol === "function" && Symbol.iterator, m = s && o[s], i = 0; if (m) return m.call(o); if (o && typeof o.length === "number") return { next: function () { if (o && i >= o.length) o = void 0; return { value: o && o[i++], done: !o }; } }; throw new TypeError(s ? "Object is not iterable." : "Symbol.iterator is not defined."); }; Object.defineProperty(exports, "__esModule", { value: true }); exports.censorText = exports.replaceBadWords = exports.findAllBadWords = exports.getBadWords = exports.findAnyBadWord = exports.findBadWordLocations = exports.doesContainBadWords = exports.getWhitelistType = exports.WordReplacementType = exports.WordReplacementMethod = exports.InputPreprocessMethod = void 0; var input_preprocessor_1 = require("./input_preprocessor"); var reduce_input_string_1 = require("./reduce_input_string"); var replace_input_1 = require("./replace_input"); var wordlist_preprocessor_1 = require("./wordlist_preprocessor"); var InputPreprocessMethod; (function (InputPreprocessMethod) { InputPreprocessMethod[InputPreprocessMethod["Thorough"] = 0] = "Thorough"; InputPreprocessMethod[InputPreprocessMethod["CaseInsensitive"] = 1] = "CaseInsensitive"; InputPreprocessMethod[InputPreprocessMethod["ExactMatch"] = 2] = "ExactMatch"; })(InputPreprocessMethod || (exports.InputPreprocessMethod = InputPreprocessMethod = {})); var WordReplacementMethod; (function (WordReplacementMethod) { WordReplacementMethod[WordReplacementMethod["ReplaceAll"] = 0] = "ReplaceAll"; WordReplacementMethod[WordReplacementMethod["KeepFirstCharacter"] = 1] = "KeepFirstCharacter"; WordReplacementMethod[WordReplacementMethod["KeepFirstAndLastCharacter"] = 2] = "KeepFirstAndLastCharacter"; })(WordReplacementMethod || (exports.WordReplacementMethod = WordReplacementMethod = {})); var WordReplacementType; (function (WordReplacementType) { WordReplacementType[WordReplacementType["RepeatCharacter"] = 0] = "RepeatCharacter"; WordReplacementType[WordReplacementType["Grawlix"] = 1] = "Grawlix"; })(WordReplacementType || (exports.WordReplacementType = WordReplacementType = {})); /** * Turns an object of type `BadWordMatchInfo` into the type `BadWordMatchData` */ var toBadWordMatchData = function (badWordMatchInfo) { var isWhitelisted = badWordMatchInfo.isWhitelisted, matchData = __rest(badWordMatchInfo, ["isWhitelisted"]); return matchData; }; /** * Turns an object of type `BadWordMatchData` into the type `BadWordMatchInfo`. * Always sets isWhitelisted to false on the newly generated MatchInfo objects. */ var toBadWordMatchInfo = function (badWordMatchData) { return Object.assign({}, badWordMatchData, { isWhitelisted: false }); }; /** * Checks if the bad word is fully contained within the whitelisted word, * given their start indices and lengths. * @param badWordStartIndex - The index at which the bad word starts in the input string. * @param badWordLength - The length of the bad word in the input string. * @param goodWordStartIndex - The index at which the whitelisted word starts in the input string. * @param goodWordLength - The length of the whitelisted word in the input string. * @returns True, if the bad word is fully contained in this whitelisted term. False, otherwise. */ var isMatchWhitelisted = function (badWordStartIndex, badWordLength, goodWordStartIndex, goodWordLength) { return (badWordStartIndex >= goodWordStartIndex && badWordStartIndex + badWordLength <= goodWordStartIndex + goodWordLength); }; /** * Whether to check the whitelist's `Normal` regular expressions (check for a direct match), * or their `Strict` regular expressions (check for a circumvention of that word), or `Both`. */ var WhitelistCheckType; (function (WhitelistCheckType) { WhitelistCheckType[WhitelistCheckType["Normal"] = 0] = "Normal"; WhitelistCheckType[WhitelistCheckType["Strict"] = 1] = "Strict"; WhitelistCheckType[WhitelistCheckType["Both"] = 2] = "Both"; })(WhitelistCheckType || (WhitelistCheckType = {})); /** * Removes any terms related to the bad word from `badwordData` that are whitelisted by the * `whitelistMap` (potentially modified by `overrideData`) from the `badWordMatchInfo` array of * all matches that were found for this bad word. * `Note!` Might have side effects and modify the input array `badWordMatchInfo` * @param inputString - the input string for which we found any bad word matches * @param badwordData - the bad word we found matches for * @param badWordMatchInfo - the matches we found of this bad word. * StartIndices and length need to be related to input string. * @param whitelistMap - the whitelisted words to check for overlaps with any found bad words * @param checkType - `Normal`, `Strict` or `Both`, defining which whitelist regular expressions to check. * If normal, only the word itself is matched. If strict, only circumvention terms are checked in the * whitelist. If both, both are checked. * @param overrideData - (Optional) Any potential overrides to the whitelist map. * @param whitelistWordType - (Optional) Only consider the whitelisted words of a specific type, e.g. * `WhitelistWordType.Reduced` only considers whitelist terms that don't match the bad word directly, * since they contain some special characters that, if removed, makes the term match the bad word. * `WhitelistWordType.Circumvention` only considers whitelist terms that match the bad word's circumvention * regular expression. * @returns the `badWordMatchInfo` array without any terms that were whitelisted under the input conditions. */ var removeWhitelistedMatchesFromMatchInfos = function (inputString, badwordData, badWordMatchInfo, whitelistMap, checkType, overrideData, whitelistWordType) { var e_1, _a, e_2, _b, e_3, _c; if (badWordMatchInfo.length === 0) { return []; } var whitelistArray = whitelistMap[badwordData.word] || []; if (overrideData) { // remove all the whitelisted words that the override disables if (overrideData.whitelistDisables[badwordData.word]) { whitelistArray = whitelistArray.filter(function (element) { return !overrideData.whitelistDisables[badwordData.word].includes(element.word); }); } // add all the whitelisted words that the override adds if (overrideData.whitelistEnables[badwordData.word]) { whitelistArray = whitelistArray.concat(overrideData.whitelistEnables[badwordData.word]); } } if (whitelistWordType !== undefined) { // if a type is specified, only check the whitelist terms of that type whitelistArray = whitelistArray.filter(function (elem) { return (0, wordlist_preprocessor_1.hasWhitelistWordType)(elem, whitelistWordType); }); } // if we are checking for bad word circumventions also check the specific "strict" whitelist // that avoids blocking good content such as "s h e l l" for discovering bad content within, such as "h e l l" var whitelistRegexData = checkType === WhitelistCheckType.Normal ? whitelistArray.map(function (elem) { return elem.normalRegexp; }) : whitelistArray.map(function (elem) { return elem.strictRegexp; }).concat(badwordData.whitelistedStrictRegexpArray); if (checkType === WhitelistCheckType.Both) { whitelistRegexData = whitelistRegexData.concat(whitelistArray.map(function (elem) { return elem.normalRegexp; })); } try { // iterate through the whitelist that overlaps with this bad word, // based on start index and match length, we can determine if a bad word // is okay to be used (i.e. whether it is whitelisted). for (var whitelistRegexData_1 = __values(whitelistRegexData), whitelistRegexData_1_1 = whitelistRegexData_1.next(); !whitelistRegexData_1_1.done; whitelistRegexData_1_1 = whitelistRegexData_1.next()) { var whitelistRegex = whitelistRegexData_1_1.value; var whitelistMatches = inputString.matchAll(whitelistRegex); if (whitelistMatches) { try { for (var whitelistMatches_1 = (e_2 = void 0, __values(whitelistMatches)), whitelistMatches_1_1 = whitelistMatches_1.next(); !whitelistMatches_1_1.done; whitelistMatches_1_1 = whitelistMatches_1.next()) { var whitelistMatch = whitelistMatches_1_1.value; try { for (var badWordMatchInfo_1 = (e_3 = void 0, __values(badWordMatchInfo)), badWordMatchInfo_1_1 = badWordMatchInfo_1.next(); !badWordMatchInfo_1_1.done; badWordMatchInfo_1_1 = badWordMatchInfo_1.next()) { var badWordElement = badWordMatchInfo_1_1.value; if (isMatchWhitelisted(badWordElement.startIndex, badWordElement.length, whitelistMatch.index, whitelistMatch[0].length)) { badWordElement.isWhitelisted = true; // remove later, to not break the loop } } } catch (e_3_1) { e_3 = { error: e_3_1 }; } finally { try { if (badWordMatchInfo_1_1 && !badWordMatchInfo_1_1.done && (_c = badWordMatchInfo_1.return)) _c.call(badWordMatchInfo_1); } finally { if (e_3) throw e_3.error; } } badWordMatchInfo = badWordMatchInfo.filter(function (element) { return !element.isWhitelisted; }); if (badWordMatchInfo.length === 0) { return []; // early out - all bad words were whitelisted } } } catch (e_2_1) { e_2 = { error: e_2_1 }; } finally { try { if (whitelistMatches_1_1 && !whitelistMatches_1_1.done && (_b = whitelistMatches_1.return)) _b.call(whitelistMatches_1); } finally { if (e_2) throw e_2.error; } } } } } catch (e_1_1) { e_1 = { error: e_1_1 }; } finally { try { if (whitelistRegexData_1_1 && !whitelistRegexData_1_1.done && (_a = whitelistRegexData_1.return)) _a.call(whitelistRegexData_1); } finally { if (e_1) throw e_1.error; } } return badWordMatchInfo; }; /** * Finds the location(s) of a given bad word's appearances in an input string. * Includes checks of the whitelist, in order to ensure that whitelisted occurrences * of bad words are not considered "bad". * Can check either strictly, considering circumvention attempts, or non-strictly, * considering only whether the bad word (or phrase) itself appears in the input string. * @param inputString - The text to check for bad words. * @param badwordData - The data for one bad word - from of the array of bad word data generated by `preprocessWordLists` * @param whitelistMap - The map of whitelisted words, generated by `preprocessWordLists` * @param checkStrict - Whether to check for circumventions or not. * @param overrideData - (Default: `undefined`) Data used to modify a list by removing words or whitelisted * terms or by adding new whitelisted terms. Created with `preprocessWordListOverrideData` * @returns An array of BadWordMatchData that contains the word and the start indices and * lengths of any substrings matching the word. */ var findBadWordMatchData = function (inputString, badwordData, whitelistMap, checkStrict, overrideData) { var e_4, _a; var badwordRegExp = checkStrict ? badwordData.strictRegexp : badwordData.normalRegexp; var matches = inputString.matchAll(badwordRegExp); // make a checklist of which matches we found, remove them once we see they are whitelisted var badWordMatchInfo = []; try { for (var matches_1 = __values(matches), matches_1_1 = matches_1.next(); !matches_1_1.done; matches_1_1 = matches_1.next()) { var match = matches_1_1.value; badWordMatchInfo.push({ word: badwordData.word, isWhitelisted: false, startIndex: match.index, length: match[0].length, }); } } catch (e_4_1) { e_4 = { error: e_4_1 }; } finally { try { if (matches_1_1 && !matches_1_1.done && (_a = matches_1.return)) _a.call(matches_1); } finally { if (e_4) throw e_4.error; } } if (badWordMatchInfo.length === 0) { return []; // no bad word was found } var checkType = checkStrict ? WhitelistCheckType.Strict : WhitelistCheckType.Normal; badWordMatchInfo = removeWhitelistedMatchesFromMatchInfos(inputString, badwordData, badWordMatchInfo, whitelistMap, checkType, overrideData); return badWordMatchInfo.map(function (match) { return toBadWordMatchData(match); }); }; /** * Figure out if the good word matches the bad word in its normal form * or if it represents a variant that reduces to the bad word (by removing special characters) * or if it represents a circumvention that spaces out the bad word. * * @param goodword - the whitelist term * @param badwordData - the bad word's regular expressions, created with `getBadWordData(...)` * @returns `WhitelistWordType.None` if the whitelist term does not match this bad word, * `WhitelistWordType.Normal` if the term matches the bad word in its normal form, * `WhitelistWordType.Reduced` if the term without special characters (reduced string) matches the word, * `WhitelistWordType.Circumvention` if the whitelisted word matches the bad word's circumvention regexp, * `WhitelistWordType.ReducedAndCircumvention` if it matches both the reduced string and the circumvention. */ var getWhitelistType = function (goodword, badwordData) { // Figure out if the good word matches the bad word in its normal form // or if it represents a variant that reduces to the bad word (by removing special characters) // or if it represents a circumvention that spaces out the bad word. var whitelistType = wordlist_preprocessor_1.WhitelistWordType.None; if (goodword.match(badwordData.normalRegexp)) { whitelistType = wordlist_preprocessor_1.WhitelistWordType.Normal; } else if (badwordData.strictRegexp) { var reducedGoodwordData = (0, reduce_input_string_1.reduceInputString)(goodword); if (reducedGoodwordData.reducedInput.match(badwordData.normalRegexp)) { whitelistType = wordlist_preprocessor_1.WhitelistWordType.Reduced; } // using findBadWordMatchData as the integrated circumvention whitelist regexps need to be checked // for edge cases such as s h e l l (fully spaced out words that contain a bad word, parsed "shell") var circumventionMatches = findBadWordMatchData(goodword, badwordData, {}, true); if (circumventionMatches.length > 0) { whitelistType = whitelistType === wordlist_preprocessor_1.WhitelistWordType.Reduced ? wordlist_preprocessor_1.WhitelistWordType.ReducedAndCircumvention : wordlist_preprocessor_1.WhitelistWordType.Circumvention; } } return whitelistType; }; exports.getWhitelistType = getWhitelistType; /** * Given your preprocessed bad word list and whitelist, checks if a given text contains any bad word * that hasn't been allowed by the whitelist. Checks for the most common circumventions as well. * * IMPORTANT: Make sure that any backslash in the inputString is escaped correctly. * If you are trying to see whether the string `¯\_(ツ)_/¯` is a bad word, you have to * enter it as `¯\\_(ツ)_/¯` to match it correctly. * * @param inputString - The text you wish to check for bad words. * @param processedWordLists - The preprocessed bad word list and whitelist, generated by `preprocessWordLists(...)` * @param overrideData - (Default: `undefined`) Data used to modify a list by removing words or whitelisted * terms or by adding new whitelisted terms. Created with `preprocessWordListOverrideData` * @returns True if any bad word was found, false if no bad word was found or all bad words were whitelisted. */ var doesContainBadWords = function (inputString, processedWordLists, overrideData) { if ((0, exports.findAnyBadWord)(inputString, processedWordLists, overrideData)) { return true; } return false; }; exports.doesContainBadWords = doesContainBadWords; /** * Finds all bad words contained in a string, as well as their locations, indicated by start index and length. * * IMPORTANT: Make sure that any backslash in the inputString is escaped correctly. * If you are trying to see whether the string `¯\_(ツ)_/¯` is a bad word, you have to * enter it as `¯\\_(ツ)_/¯` to match it correctly. * * @param inputString - The text you wish to check for bad words. * @param processedWordLists - The preprocessed bad word list and whitelist, generated by `preprocessWordLists(...)` * @param firstMatchOnly - (Default: `false`) If true, returns only the first match. If false, returns all matched * bad words. * @param overrideData - (Default: `undefined`) Data used to modify a list by removing words or whitelisted * terms or by adding new whitelisted terms. Created with `preprocessWordListOverrideData` * -------- * @returns an array of information about all found bad words and where they are located in the input string. */ var findBadWordLocations = function (inputString, processedWordLists, _a) { var e_5, _b; var _c = _a === void 0 ? {} : _a, _d = _c.firstMatchOnly, firstMatchOnly = _d === void 0 ? false : _d, overrideData = _c.overrideData; var allBadWordLocations = []; var _loop_1 = function (badwordData) { // check if we ignore the word, in that case we can skip it if (overrideData && overrideData.badWordDisables.includes(badwordData.word)) { return "continue"; } // if we check for an exact match // try to match the word with its special characters, as they could // form word boundaries, such as in "test-kitty-word". var locations = findBadWordMatchData(inputString, badwordData, processedWordLists.whitelistMap, false, overrideData); // if we are only looking for one word, return it at this point if (firstMatchOnly && locations.length > 0) { return { value: [locations[0]] }; } // if we are checking circumventions at all if (badwordData.strictRegexp !== undefined) { // try removing all special characters from the input string // and match it against the word itself, with word boundaries. var reducedData = (0, reduce_input_string_1.reduceInputString)(inputString); if (reducedData.reducedInput !== inputString) { var matchedReducedStringLocations = findBadWordMatchData(reducedData.reducedInput, badwordData, processedWordLists.whitelistMap, false, overrideData); var actualReducedStringLocations = []; if (matchedReducedStringLocations.length > 0) { actualReducedStringLocations = (0, reduce_input_string_1.reconstructLocations)(reducedData.reducedLocations, matchedReducedStringLocations); // remove duplicates found of words that were the same in the normal and the reduced input string actualReducedStringLocations = actualReducedStringLocations.filter(function (loc) { return locations.findIndex(function (foundLoc) { return foundLoc.startIndex === loc.startIndex && foundLoc.length === loc.length; }) === -1; }); // check if we whitelisted any specific terms that were matched var reducedStringMatchInfos = actualReducedStringLocations.map(function (elem) { return toBadWordMatchInfo(elem); }); reducedStringMatchInfos = removeWhitelistedMatchesFromMatchInfos(inputString, badwordData, reducedStringMatchInfos, processedWordLists.whitelistMap, WhitelistCheckType.Both, overrideData, wordlist_preprocessor_1.WhitelistWordType.Reduced); actualReducedStringLocations = reducedStringMatchInfos.map(function (elem) { return toBadWordMatchData(elem); }); if (actualReducedStringLocations.length > 0 && firstMatchOnly) { return { value: [actualReducedStringLocations[0]] }; } } locations = locations.concat(actualReducedStringLocations); } // finally try to match the word with common circumventions, such as // "bad k i t t y" while ensuring words such as "k i t t y c a t" are considered bad. var circumventions = findBadWordMatchData(inputString, badwordData, processedWordLists.whitelistMap, true, overrideData); if (circumventions.length > 0) { var circumventionMatchInfos = circumventions.map(function (elem) { return toBadWordMatchInfo(elem); }); circumventionMatchInfos = removeWhitelistedMatchesFromMatchInfos(inputString, badwordData, circumventionMatchInfos, processedWordLists.whitelistMap, WhitelistCheckType.Both, overrideData, wordlist_preprocessor_1.WhitelistWordType.Circumvention); circumventions = circumventionMatchInfos.map(function (elem) { return toBadWordMatchData(elem); }); if (firstMatchOnly && circumventions.length > 0) { return { value: [circumventions[0]] }; } locations = locations.concat(circumventions); } } // collect all the matches for this bad word allBadWordLocations = allBadWordLocations.concat(locations); }; try { for (var _e = __values(processedWordLists.badWordData), _f = _e.next(); !_f.done; _f = _e.next()) { var badwordData = _f.value; var state_1 = _loop_1(badwordData); if (typeof state_1 === "object") return state_1.value; } } catch (e_5_1) { e_5 = { error: e_5_1 }; } finally { try { if (_f && !_f.done && (_b = _e.return)) _b.call(_e); } finally { if (e_5) throw e_5.error; } } return allBadWordLocations; }; exports.findBadWordLocations = findBadWordLocations; /** * Given your preprocessed bad word list and whitelist, checks if a given text contains any bad word * that hasn't been allowed by the whitelist. Checks for the most common circumventions as well. * If any bad word was found, the first word that was found will be returned. * * IMPORTANT: Make sure that any backslash in the inputString is escaped correctly. * If you are trying to see whether the string `¯\_(ツ)_/¯` is a bad word, you have to * enter it as `¯\\_(ツ)_/¯` to match it correctly. * * @param inputString - The text you wish to check for bad words. * @param processedWordLists - The preprocessed bad word list and whitelist, generated by `preprocessWordLists(...)` * @param overrideData - (Default: `undefined`) Data used to modify a list by removing words or whitelisted * terms or by adding new whitelisted terms. Created with `preprocessWordListOverrideData` * @returns The first bad word that was found in the input, or undefined if no bad word was found. */ var findAnyBadWord = function (inputString, processedWordLists, overrideData) { var result = (0, exports.findBadWordLocations)(inputString, processedWordLists, { firstMatchOnly: true, overrideData: overrideData, }); if (result.length > 0) { return result[0].word; } return undefined; }; exports.findAnyBadWord = findAnyBadWord; /** * Given the bad word locations found by `findBadWordLocations(...)`, * extract all the bad words from that data. * This function is useful if you need both the bad words as well as the input string * with all bad words replaced (check out `replaceBadWords(...)` for the latter.) * * If the bad words are all you need, consider using `findAllBadWords(...)` instead. * If you only need one bad word, consider using `findAnyBadWord(...)`, and if you * only need to know whether there is a bad word, consider using `doesContainBadWord(...)`. * * @param badWordLocations - The locations of bad words in your input string checked * with `findBadWordLocations(...)` * @returns An array of strings of all bad words found in the text. Only contains each * bad word once, even if they repeat. */ var getBadWords = function (badWordLocations) { var e_6, _a; var foundWords = []; try { for (var badWordLocations_1 = __values(badWordLocations), badWordLocations_1_1 = badWordLocations_1.next(); !badWordLocations_1_1.done; badWordLocations_1_1 = badWordLocations_1.next()) { var matchInfo = badWordLocations_1_1.value; if (!foundWords.includes(matchInfo.word)) { foundWords.push(matchInfo.word); } } } catch (e_6_1) { e_6 = { error: e_6_1 }; } finally { try { if (badWordLocations_1_1 && !badWordLocations_1_1.done && (_a = badWordLocations_1.return)) _a.call(badWordLocations_1); } finally { if (e_6) throw e_6.error; } } return foundWords; }; exports.getBadWords = getBadWords; /** * Given your preprocessed bad word list and whitelist, checks for all bad words in a given input text * that haven't been allowed by the whitelist. Checks for the most common circumventions as well. * Returns an array of strings of all bad words. * * IMPORTANT: Make sure that any backslash in the inputString is escaped correctly. * If you are trying to see whether the string `¯\_(ツ)_/¯` is a bad word, you have to * enter it as `¯\\_(ツ)_/¯` to match it correctly. * * @param inputString - The text you wish to check for bad words. * @param processedWordLists - The preprocessed bad word list and whitelist, generated by `preprocessWordLists(...)` * @param overrideData - (Default: `undefined`) Data used to modify a list by removing words or whitelisted * terms or by adding new whitelisted terms. Created with `preprocessWordListOverrideData` * @returns The first bad word that was found in the input, or undefined if no bad word was found. */ var findAllBadWords = function (inputString, processedWordLists, overrideData) { var badWordLocations = (0, exports.findBadWordLocations)(inputString, processedWordLists, { overrideData: overrideData }); return (0, exports.getBadWords)(badWordLocations); }; exports.findAllBadWords = findAllBadWords; /** * Sanitise any text by replacing bad words in it with Grawlix (`$!#@&%`) or a single repeated character. * This function is useful if you need both the bad words as well as the input string * with all bad words replaced (check out `getBadWords(...)` for the latter.) * * If you only need a "censored" input string but are not interested in identifying which bad words were * censored, consider using `censorText(...)` instead. * * @param inputString - The text that got checked for bad words in `findBadWordLocations(...)` * @param badWordLocations - The information on all bad word matches found with `findBadWordLocations(...)` * @param replacementMethod - (Default: `WordReplacementMethod.ReplaceAll`) Used to select whether to replace the * whole word, or keep the first (and last) characters from the bad word intact. * @param replacementType - (Default: `WordReplacementType.Grawlix`) Used to select whether to replace the * word with a jumbled mess of Grawlix (`$!#@&%`) characters, or with a selected repeatable character defined * in the next parameter. * @param replacementRepeatCharacter - (Default: `-`) The character to repeat in order to replace the bad word. * (If several characters are entered, only the first one will be used.) * @returns the input string, with all bad words replaced by either Grawlix or a repeated character. */ var replaceBadWords = function (inputString, badWordLocations, _a) { var e_7, _b; var _c = _a === void 0 ? {} : _a, _d = _c.replacementMethod, replacementMethod = _d === void 0 ? WordReplacementMethod.ReplaceAll : _d, _e = _c.replacementType, replacementType = _e === void 0 ? WordReplacementType.Grawlix : _e, _f = _c.replacementRepeatCharacter, replacementRepeatCharacter = _f === void 0 ? '-' : _f; var outString = inputString; try { for (var badWordLocations_2 = __values(badWordLocations), badWordLocations_2_1 = badWordLocations_2.next(); !badWordLocations_2_1.done; badWordLocations_2_1 = badWordLocations_2.next()) { var badWordLocation = badWordLocations_2_1.value; var startIndex = badWordLocation.startIndex + (replacementMethod === WordReplacementMethod.ReplaceAll ? 0 : 1); var lengthModifier = 0; if (replacementMethod === WordReplacementMethod.KeepFirstAndLastCharacter) { lengthModifier = 2; } else if (replacementMethod === WordReplacementMethod.KeepFirstCharacter) { lengthModifier = 1; } var length = badWordLocation.length - lengthModifier; var wordToReplace = outString.substring(startIndex, startIndex + length); var newBadWord = replacementType === WordReplacementType.Grawlix ? (0, replace_input_1.grawlix)(wordToReplace) : (0, replace_input_1.replaceChars)(wordToReplace, replacementRepeatCharacter); outString = outString.substring(0, startIndex) + newBadWord + outString.substring(startIndex + length); } } catch (e_7_1) { e_7 = { error: e_7_1 }; } finally { try { if (badWordLocations_2_1 && !badWordLocations_2_1.done && (_b = badWordLocations_2.return)) _b.call(badWordLocations_2); } finally { if (e_7) throw e_7.error; } } return outString; }; exports.replaceBadWords = replaceBadWords; /** * Sanitise any text by replacing bad words in it with Grawlix (`$!#@&%`) or a single repeated character. * * @param inputString - The text that got checked for bad words in `findBadWordLocations(...)` * @param processedWordLists - The preprocessed bad word list and whitelist, generated by `preprocessWordLists(...)` * @param inputPreprocessMethod - (Default: `InputPreprocessMethod.CaseInsensitive`) Used to preprocess the input * string before identifying bad words. `CaseInsensitive`: transforms the input to lower case and then matches it against * the bad word list. * `Thorough` uses the `textToLatin()` function to remove text accents, translate letter emojis and * any other fancy unicode fonts to latin before testing for bad words. Note: If non-latin characters are found, * the censored text will be returned all in lower case and in latin letters. * `ExactMatch` matches the input string against the bad word list exactly. * @param reduceRepeatCharactersTo - (Default: `undefined` meaning repeat characters are not modified.) * Otherwise required to be a number >= 1. `Will throw an error if this number is <= 0.` * The amount of characters a repeating sequence of characters (e.g. "aaaabcc") is reduced to in _*every*_ input string. * (e.g. "abc" if the number is 1, "aabcc" if the number is 2, "aaabcc" if the number is 3, etc.) * This reduction will be applied after any other input preprocess method. (Uses function `reduceRepeatCharacters(...)`) * * `Important:` keep in mind that reducing to 1 * repeat character will likely result in mismatches/false positives ("loot" -> "lot"). Depending on the language of the * input, this number should be around 2 or 3. * * `Note:` when setting this number to 1 or larger, keep in mind that all words in your filter need to adhere to this, * so if you set the number to 2, putting "princessship" on the bad word list won't take effect, as any such input would * get reduced to "princesship". * @param replacementMethod - (Default: `WordReplacementMethod.ReplaceAll`) Used to select whether to replace the * whole word, or keep the first (and last) characters from the bad word intact. * @param replacementType - (Default: `WordReplacementType.Grawlix`) Used to select whether to replace the * word with a jumbled mess of Grawlix (`$!#@&%`) characters, or with a selected repeatable character defined * in the next parameter. * @param replacementRepeatCharacter - (Default: `-`) The character to repeat in order to replace the bad word. * (If several characters are entered, only the first one will be used.) * @param overrideData - (Default: `undefined`) Data used to modify a list by removing words or whitelisted * terms or by adding new whitelisted terms. Created with `preprocessWordListOverrideData` * @returns the input string, with all bad words replaced by either Grawlix or a repeated character. */ var censorText = function (inputString, processedWordLists, _a, overrideData) { var _b = _a === void 0 ? {} : _a, _c = _b.inputPreprocessMethod, inputPreprocessMethod = _c === void 0 ? InputPreprocessMethod.CaseInsensitive : _c, _d = _b.reduceRepeatCharactersTo, reduceRepeatCharactersTo = _d === void 0 ? undefined : _d, _e = _b.replacementMethod, replacementMethod = _e === void 0 ? WordReplacementMethod.ReplaceAll : _e, _f = _b.replacementType, replacementType = _f === void 0 ? WordReplacementType.Grawlix : _f, _g = _b.replacementRepeatCharacter, replacementRepeatCharacter = _g === void 0 ? '-' : _g; var stringToScan = inputString; if (inputPreprocessMethod === InputPreprocessMethod.CaseInsensitive) { stringToScan = stringToScan.toLowerCase(); } else if (inputPreprocessMethod === InputPreprocessMethod.Thorough) { stringToScan = (0, input_preprocessor_1.textToLatin)(inputString); } if (reduceRepeatCharactersTo !== undefined) { stringToScan = (0, input_preprocessor_1.reduceRepeatCharacters)(stringToScan, reduceRepeatCharactersTo); } var locations = (0, exports.findBadWordLocations)(stringToScan, processedWordLists, { overrideData: overrideData }); if (locations.length === 0) { return inputString; } var stringToReplace = stringToScan; if (stringToScan === inputString.toLowerCase()) { stringToReplace = inputString; } return (0, exports.replaceBadWords)(stringToReplace, locations, { replacementMethod: replacementMethod, replacementType: replacementType, replacementRepeatCharacter: replacementRepeatCharacter, }); }; exports.censorText = censorText; //# sourceMappingURL=word_filter.js.map