deep-profanity-filter
Version:
A thorough profanity filter that considers most common circumventions. Works with your custom list of blocked and whitelisted words and phrases. Identifies and/or replaces bad words. Works with *wildcards* at *start and/or end* of words.
577 lines • 35.5 kB
JavaScript
;
var __rest = (this && this.__rest) || function (s, e) {
var t = {};
for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p) && e.indexOf(p) < 0)
t[p] = s[p];
if (s != null && typeof Object.getOwnPropertySymbols === "function")
for (var i = 0, p = Object.getOwnPropertySymbols(s); i < p.length; i++) {
if (e.indexOf(p[i]) < 0 && Object.prototype.propertyIsEnumerable.call(s, p[i]))
t[p[i]] = s[p[i]];
}
return t;
};
var __values = (this && this.__values) || function(o) {
var s = typeof Symbol === "function" && Symbol.iterator, m = s && o[s], i = 0;
if (m) return m.call(o);
if (o && typeof o.length === "number") return {
next: function () {
if (o && i >= o.length) o = void 0;
return { value: o && o[i++], done: !o };
}
};
throw new TypeError(s ? "Object is not iterable." : "Symbol.iterator is not defined.");
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.censorText = exports.replaceBadWords = exports.findAllBadWords = exports.getBadWords = exports.findAnyBadWord = exports.findBadWordLocations = exports.doesContainBadWords = exports.getWhitelistType = exports.WordReplacementType = exports.WordReplacementMethod = exports.InputPreprocessMethod = void 0;
var input_preprocessor_1 = require("./input_preprocessor");
var reduce_input_string_1 = require("./reduce_input_string");
var replace_input_1 = require("./replace_input");
var wordlist_preprocessor_1 = require("./wordlist_preprocessor");
var InputPreprocessMethod;
(function (InputPreprocessMethod) {
InputPreprocessMethod[InputPreprocessMethod["Thorough"] = 0] = "Thorough";
InputPreprocessMethod[InputPreprocessMethod["CaseInsensitive"] = 1] = "CaseInsensitive";
InputPreprocessMethod[InputPreprocessMethod["ExactMatch"] = 2] = "ExactMatch";
})(InputPreprocessMethod || (exports.InputPreprocessMethod = InputPreprocessMethod = {}));
var WordReplacementMethod;
(function (WordReplacementMethod) {
WordReplacementMethod[WordReplacementMethod["ReplaceAll"] = 0] = "ReplaceAll";
WordReplacementMethod[WordReplacementMethod["KeepFirstCharacter"] = 1] = "KeepFirstCharacter";
WordReplacementMethod[WordReplacementMethod["KeepFirstAndLastCharacter"] = 2] = "KeepFirstAndLastCharacter";
})(WordReplacementMethod || (exports.WordReplacementMethod = WordReplacementMethod = {}));
var WordReplacementType;
(function (WordReplacementType) {
WordReplacementType[WordReplacementType["RepeatCharacter"] = 0] = "RepeatCharacter";
WordReplacementType[WordReplacementType["Grawlix"] = 1] = "Grawlix";
})(WordReplacementType || (exports.WordReplacementType = WordReplacementType = {}));
/**
* Turns an object of type `BadWordMatchInfo` into the type `BadWordMatchData`
*/
var toBadWordMatchData = function (badWordMatchInfo) {
var isWhitelisted = badWordMatchInfo.isWhitelisted, matchData = __rest(badWordMatchInfo, ["isWhitelisted"]);
return matchData;
};
/**
* Turns an object of type `BadWordMatchData` into the type `BadWordMatchInfo`.
* Always sets isWhitelisted to false on the newly generated MatchInfo objects.
*/
var toBadWordMatchInfo = function (badWordMatchData) {
return Object.assign({}, badWordMatchData, { isWhitelisted: false });
};
/**
* Checks if the bad word is fully contained within the whitelisted word,
* given their start indices and lengths.
* @param badWordStartIndex - The index at which the bad word starts in the input string.
* @param badWordLength - The length of the bad word in the input string.
* @param goodWordStartIndex - The index at which the whitelisted word starts in the input string.
* @param goodWordLength - The length of the whitelisted word in the input string.
* @returns True, if the bad word is fully contained in this whitelisted term. False, otherwise.
*/
var isMatchWhitelisted = function (badWordStartIndex, badWordLength, goodWordStartIndex, goodWordLength) {
return (badWordStartIndex >= goodWordStartIndex && badWordStartIndex + badWordLength <= goodWordStartIndex + goodWordLength);
};
/**
* Whether to check the whitelist's `Normal` regular expressions (check for a direct match),
* or their `Strict` regular expressions (check for a circumvention of that word), or `Both`.
*/
var WhitelistCheckType;
(function (WhitelistCheckType) {
WhitelistCheckType[WhitelistCheckType["Normal"] = 0] = "Normal";
WhitelistCheckType[WhitelistCheckType["Strict"] = 1] = "Strict";
WhitelistCheckType[WhitelistCheckType["Both"] = 2] = "Both";
})(WhitelistCheckType || (WhitelistCheckType = {}));
/**
* Removes any terms related to the bad word from `badwordData` that are whitelisted by the
* `whitelistMap` (potentially modified by `overrideData`) from the `badWordMatchInfo` array of
* all matches that were found for this bad word.
* `Note!` Might have side effects and modify the input array `badWordMatchInfo`
* @param inputString - the input string for which we found any bad word matches
* @param badwordData - the bad word we found matches for
* @param badWordMatchInfo - the matches we found of this bad word.
* StartIndices and length need to be related to input string.
* @param whitelistMap - the whitelisted words to check for overlaps with any found bad words
* @param checkType - `Normal`, `Strict` or `Both`, defining which whitelist regular expressions to check.
* If normal, only the word itself is matched. If strict, only circumvention terms are checked in the
* whitelist. If both, both are checked.
* @param overrideData - (Optional) Any potential overrides to the whitelist map.
* @param whitelistWordType - (Optional) Only consider the whitelisted words of a specific type, e.g.
* `WhitelistWordType.Reduced` only considers whitelist terms that don't match the bad word directly,
* since they contain some special characters that, if removed, makes the term match the bad word.
* `WhitelistWordType.Circumvention` only considers whitelist terms that match the bad word's circumvention
* regular expression.
* @returns the `badWordMatchInfo` array without any terms that were whitelisted under the input conditions.
*/
var removeWhitelistedMatchesFromMatchInfos = function (inputString, badwordData, badWordMatchInfo, whitelistMap, checkType, overrideData, whitelistWordType) {
var e_1, _a, e_2, _b, e_3, _c;
if (badWordMatchInfo.length === 0) {
return [];
}
var whitelistArray = whitelistMap[badwordData.word] || [];
if (overrideData) {
// remove all the whitelisted words that the override disables
if (overrideData.whitelistDisables[badwordData.word]) {
whitelistArray = whitelistArray.filter(function (element) { return !overrideData.whitelistDisables[badwordData.word].includes(element.word); });
}
// add all the whitelisted words that the override adds
if (overrideData.whitelistEnables[badwordData.word]) {
whitelistArray = whitelistArray.concat(overrideData.whitelistEnables[badwordData.word]);
}
}
if (whitelistWordType !== undefined) {
// if a type is specified, only check the whitelist terms of that type
whitelistArray = whitelistArray.filter(function (elem) { return (0, wordlist_preprocessor_1.hasWhitelistWordType)(elem, whitelistWordType); });
}
// if we are checking for bad word circumventions also check the specific "strict" whitelist
// that avoids blocking good content such as "s h e l l" for discovering bad content within, such as "h e l l"
var whitelistRegexData = checkType === WhitelistCheckType.Normal
? whitelistArray.map(function (elem) { return elem.normalRegexp; })
: whitelistArray.map(function (elem) { return elem.strictRegexp; }).concat(badwordData.whitelistedStrictRegexpArray);
if (checkType === WhitelistCheckType.Both) {
whitelistRegexData = whitelistRegexData.concat(whitelistArray.map(function (elem) { return elem.normalRegexp; }));
}
try {
// iterate through the whitelist that overlaps with this bad word,
// based on start index and match length, we can determine if a bad word
// is okay to be used (i.e. whether it is whitelisted).
for (var whitelistRegexData_1 = __values(whitelistRegexData), whitelistRegexData_1_1 = whitelistRegexData_1.next(); !whitelistRegexData_1_1.done; whitelistRegexData_1_1 = whitelistRegexData_1.next()) {
var whitelistRegex = whitelistRegexData_1_1.value;
var whitelistMatches = inputString.matchAll(whitelistRegex);
if (whitelistMatches) {
try {
for (var whitelistMatches_1 = (e_2 = void 0, __values(whitelistMatches)), whitelistMatches_1_1 = whitelistMatches_1.next(); !whitelistMatches_1_1.done; whitelistMatches_1_1 = whitelistMatches_1.next()) {
var whitelistMatch = whitelistMatches_1_1.value;
try {
for (var badWordMatchInfo_1 = (e_3 = void 0, __values(badWordMatchInfo)), badWordMatchInfo_1_1 = badWordMatchInfo_1.next(); !badWordMatchInfo_1_1.done; badWordMatchInfo_1_1 = badWordMatchInfo_1.next()) {
var badWordElement = badWordMatchInfo_1_1.value;
if (isMatchWhitelisted(badWordElement.startIndex, badWordElement.length, whitelistMatch.index, whitelistMatch[0].length)) {
badWordElement.isWhitelisted = true; // remove later, to not break the loop
}
}
}
catch (e_3_1) { e_3 = { error: e_3_1 }; }
finally {
try {
if (badWordMatchInfo_1_1 && !badWordMatchInfo_1_1.done && (_c = badWordMatchInfo_1.return)) _c.call(badWordMatchInfo_1);
}
finally { if (e_3) throw e_3.error; }
}
badWordMatchInfo = badWordMatchInfo.filter(function (element) { return !element.isWhitelisted; });
if (badWordMatchInfo.length === 0) {
return []; // early out - all bad words were whitelisted
}
}
}
catch (e_2_1) { e_2 = { error: e_2_1 }; }
finally {
try {
if (whitelistMatches_1_1 && !whitelistMatches_1_1.done && (_b = whitelistMatches_1.return)) _b.call(whitelistMatches_1);
}
finally { if (e_2) throw e_2.error; }
}
}
}
}
catch (e_1_1) { e_1 = { error: e_1_1 }; }
finally {
try {
if (whitelistRegexData_1_1 && !whitelistRegexData_1_1.done && (_a = whitelistRegexData_1.return)) _a.call(whitelistRegexData_1);
}
finally { if (e_1) throw e_1.error; }
}
return badWordMatchInfo;
};
/**
* Finds the location(s) of a given bad word's appearances in an input string.
* Includes checks of the whitelist, in order to ensure that whitelisted occurrences
* of bad words are not considered "bad".
* Can check either strictly, considering circumvention attempts, or non-strictly,
* considering only whether the bad word (or phrase) itself appears in the input string.
* @param inputString - The text to check for bad words.
* @param badwordData - The data for one bad word - from of the array of bad word data generated by `preprocessWordLists`
* @param whitelistMap - The map of whitelisted words, generated by `preprocessWordLists`
* @param checkStrict - Whether to check for circumventions or not.
* @param overrideData - (Default: `undefined`) Data used to modify a list by removing words or whitelisted
* terms or by adding new whitelisted terms. Created with `preprocessWordListOverrideData`
* @returns An array of BadWordMatchData that contains the word and the start indices and
* lengths of any substrings matching the word.
*/
var findBadWordMatchData = function (inputString, badwordData, whitelistMap, checkStrict, overrideData) {
var e_4, _a;
var badwordRegExp = checkStrict ? badwordData.strictRegexp : badwordData.normalRegexp;
var matches = inputString.matchAll(badwordRegExp);
// make a checklist of which matches we found, remove them once we see they are whitelisted
var badWordMatchInfo = [];
try {
for (var matches_1 = __values(matches), matches_1_1 = matches_1.next(); !matches_1_1.done; matches_1_1 = matches_1.next()) {
var match = matches_1_1.value;
badWordMatchInfo.push({
word: badwordData.word,
isWhitelisted: false,
startIndex: match.index,
length: match[0].length,
});
}
}
catch (e_4_1) { e_4 = { error: e_4_1 }; }
finally {
try {
if (matches_1_1 && !matches_1_1.done && (_a = matches_1.return)) _a.call(matches_1);
}
finally { if (e_4) throw e_4.error; }
}
if (badWordMatchInfo.length === 0) {
return []; // no bad word was found
}
var checkType = checkStrict ? WhitelistCheckType.Strict : WhitelistCheckType.Normal;
badWordMatchInfo = removeWhitelistedMatchesFromMatchInfos(inputString, badwordData, badWordMatchInfo, whitelistMap, checkType, overrideData);
return badWordMatchInfo.map(function (match) { return toBadWordMatchData(match); });
};
/**
* Figure out if the good word matches the bad word in its normal form
* or if it represents a variant that reduces to the bad word (by removing special characters)
* or if it represents a circumvention that spaces out the bad word.
*
* @param goodword - the whitelist term
* @param badwordData - the bad word's regular expressions, created with `getBadWordData(...)`
* @returns `WhitelistWordType.None` if the whitelist term does not match this bad word,
* `WhitelistWordType.Normal` if the term matches the bad word in its normal form,
* `WhitelistWordType.Reduced` if the term without special characters (reduced string) matches the word,
* `WhitelistWordType.Circumvention` if the whitelisted word matches the bad word's circumvention regexp,
* `WhitelistWordType.ReducedAndCircumvention` if it matches both the reduced string and the circumvention.
*/
var getWhitelistType = function (goodword, badwordData) {
// Figure out if the good word matches the bad word in its normal form
// or if it represents a variant that reduces to the bad word (by removing special characters)
// or if it represents a circumvention that spaces out the bad word.
var whitelistType = wordlist_preprocessor_1.WhitelistWordType.None;
if (goodword.match(badwordData.normalRegexp)) {
whitelistType = wordlist_preprocessor_1.WhitelistWordType.Normal;
}
else if (badwordData.strictRegexp) {
var reducedGoodwordData = (0, reduce_input_string_1.reduceInputString)(goodword);
if (reducedGoodwordData.reducedInput.match(badwordData.normalRegexp)) {
whitelistType = wordlist_preprocessor_1.WhitelistWordType.Reduced;
}
// using findBadWordMatchData as the integrated circumvention whitelist regexps need to be checked
// for edge cases such as s h e l l (fully spaced out words that contain a bad word, parsed "shell")
var circumventionMatches = findBadWordMatchData(goodword, badwordData, {}, true);
if (circumventionMatches.length > 0) {
whitelistType =
whitelistType === wordlist_preprocessor_1.WhitelistWordType.Reduced
? wordlist_preprocessor_1.WhitelistWordType.ReducedAndCircumvention
: wordlist_preprocessor_1.WhitelistWordType.Circumvention;
}
}
return whitelistType;
};
exports.getWhitelistType = getWhitelistType;
/**
* Given your preprocessed bad word list and whitelist, checks if a given text contains any bad word
* that hasn't been allowed by the whitelist. Checks for the most common circumventions as well.
*
* IMPORTANT: Make sure that any backslash in the inputString is escaped correctly.
* If you are trying to see whether the string `¯\_(ツ)_/¯` is a bad word, you have to
* enter it as `¯\\_(ツ)_/¯` to match it correctly.
*
* @param inputString - The text you wish to check for bad words.
* @param processedWordLists - The preprocessed bad word list and whitelist, generated by `preprocessWordLists(...)`
* @param overrideData - (Default: `undefined`) Data used to modify a list by removing words or whitelisted
* terms or by adding new whitelisted terms. Created with `preprocessWordListOverrideData`
* @returns True if any bad word was found, false if no bad word was found or all bad words were whitelisted.
*/
var doesContainBadWords = function (inputString, processedWordLists, overrideData) {
if ((0, exports.findAnyBadWord)(inputString, processedWordLists, overrideData)) {
return true;
}
return false;
};
exports.doesContainBadWords = doesContainBadWords;
/**
* Finds all bad words contained in a string, as well as their locations, indicated by start index and length.
*
* IMPORTANT: Make sure that any backslash in the inputString is escaped correctly.
* If you are trying to see whether the string `¯\_(ツ)_/¯` is a bad word, you have to
* enter it as `¯\\_(ツ)_/¯` to match it correctly.
*
* @param inputString - The text you wish to check for bad words.
* @param processedWordLists - The preprocessed bad word list and whitelist, generated by `preprocessWordLists(...)`
* @param firstMatchOnly - (Default: `false`) If true, returns only the first match. If false, returns all matched
* bad words.
* @param overrideData - (Default: `undefined`) Data used to modify a list by removing words or whitelisted
* terms or by adding new whitelisted terms. Created with `preprocessWordListOverrideData`
* --------
* @returns an array of information about all found bad words and where they are located in the input string.
*/
var findBadWordLocations = function (inputString, processedWordLists, _a) {
var e_5, _b;
var _c = _a === void 0 ? {} : _a, _d = _c.firstMatchOnly, firstMatchOnly = _d === void 0 ? false : _d, overrideData = _c.overrideData;
var allBadWordLocations = [];
var _loop_1 = function (badwordData) {
// check if we ignore the word, in that case we can skip it
if (overrideData && overrideData.badWordDisables.includes(badwordData.word)) {
return "continue";
}
// if we check for an exact match
// try to match the word with its special characters, as they could
// form word boundaries, such as in "test-kitty-word".
var locations = findBadWordMatchData(inputString, badwordData, processedWordLists.whitelistMap, false, overrideData);
// if we are only looking for one word, return it at this point
if (firstMatchOnly && locations.length > 0) {
return { value: [locations[0]] };
}
// if we are checking circumventions at all
if (badwordData.strictRegexp !== undefined) {
// try removing all special characters from the input string
// and match it against the word itself, with word boundaries.
var reducedData = (0, reduce_input_string_1.reduceInputString)(inputString);
if (reducedData.reducedInput !== inputString) {
var matchedReducedStringLocations = findBadWordMatchData(reducedData.reducedInput, badwordData, processedWordLists.whitelistMap, false, overrideData);
var actualReducedStringLocations = [];
if (matchedReducedStringLocations.length > 0) {
actualReducedStringLocations = (0, reduce_input_string_1.reconstructLocations)(reducedData.reducedLocations, matchedReducedStringLocations);
// remove duplicates found of words that were the same in the normal and the reduced input string
actualReducedStringLocations = actualReducedStringLocations.filter(function (loc) {
return locations.findIndex(function (foundLoc) { return foundLoc.startIndex === loc.startIndex && foundLoc.length === loc.length; }) === -1;
});
// check if we whitelisted any specific terms that were matched
var reducedStringMatchInfos = actualReducedStringLocations.map(function (elem) { return toBadWordMatchInfo(elem); });
reducedStringMatchInfos = removeWhitelistedMatchesFromMatchInfos(inputString, badwordData, reducedStringMatchInfos, processedWordLists.whitelistMap, WhitelistCheckType.Both, overrideData, wordlist_preprocessor_1.WhitelistWordType.Reduced);
actualReducedStringLocations = reducedStringMatchInfos.map(function (elem) { return toBadWordMatchData(elem); });
if (actualReducedStringLocations.length > 0 && firstMatchOnly) {
return { value: [actualReducedStringLocations[0]] };
}
}
locations = locations.concat(actualReducedStringLocations);
}
// finally try to match the word with common circumventions, such as
// "bad k i t t y" while ensuring words such as "k i t t y c a t" are considered bad.
var circumventions = findBadWordMatchData(inputString, badwordData, processedWordLists.whitelistMap, true, overrideData);
if (circumventions.length > 0) {
var circumventionMatchInfos = circumventions.map(function (elem) { return toBadWordMatchInfo(elem); });
circumventionMatchInfos = removeWhitelistedMatchesFromMatchInfos(inputString, badwordData, circumventionMatchInfos, processedWordLists.whitelistMap, WhitelistCheckType.Both, overrideData, wordlist_preprocessor_1.WhitelistWordType.Circumvention);
circumventions = circumventionMatchInfos.map(function (elem) { return toBadWordMatchData(elem); });
if (firstMatchOnly && circumventions.length > 0) {
return { value: [circumventions[0]] };
}
locations = locations.concat(circumventions);
}
}
// collect all the matches for this bad word
allBadWordLocations = allBadWordLocations.concat(locations);
};
try {
for (var _e = __values(processedWordLists.badWordData), _f = _e.next(); !_f.done; _f = _e.next()) {
var badwordData = _f.value;
var state_1 = _loop_1(badwordData);
if (typeof state_1 === "object")
return state_1.value;
}
}
catch (e_5_1) { e_5 = { error: e_5_1 }; }
finally {
try {
if (_f && !_f.done && (_b = _e.return)) _b.call(_e);
}
finally { if (e_5) throw e_5.error; }
}
return allBadWordLocations;
};
exports.findBadWordLocations = findBadWordLocations;
/**
* Given your preprocessed bad word list and whitelist, checks if a given text contains any bad word
* that hasn't been allowed by the whitelist. Checks for the most common circumventions as well.
* If any bad word was found, the first word that was found will be returned.
*
* IMPORTANT: Make sure that any backslash in the inputString is escaped correctly.
* If you are trying to see whether the string `¯\_(ツ)_/¯` is a bad word, you have to
* enter it as `¯\\_(ツ)_/¯` to match it correctly.
*
* @param inputString - The text you wish to check for bad words.
* @param processedWordLists - The preprocessed bad word list and whitelist, generated by `preprocessWordLists(...)`
* @param overrideData - (Default: `undefined`) Data used to modify a list by removing words or whitelisted
* terms or by adding new whitelisted terms. Created with `preprocessWordListOverrideData`
* @returns The first bad word that was found in the input, or undefined if no bad word was found.
*/
var findAnyBadWord = function (inputString, processedWordLists, overrideData) {
var result = (0, exports.findBadWordLocations)(inputString, processedWordLists, {
firstMatchOnly: true,
overrideData: overrideData,
});
if (result.length > 0) {
return result[0].word;
}
return undefined;
};
exports.findAnyBadWord = findAnyBadWord;
/**
* Given the bad word locations found by `findBadWordLocations(...)`,
* extract all the bad words from that data.
* This function is useful if you need both the bad words as well as the input string
* with all bad words replaced (check out `replaceBadWords(...)` for the latter.)
*
* If the bad words are all you need, consider using `findAllBadWords(...)` instead.
* If you only need one bad word, consider using `findAnyBadWord(...)`, and if you
* only need to know whether there is a bad word, consider using `doesContainBadWord(...)`.
*
* @param badWordLocations - The locations of bad words in your input string checked
* with `findBadWordLocations(...)`
* @returns An array of strings of all bad words found in the text. Only contains each
* bad word once, even if they repeat.
*/
var getBadWords = function (badWordLocations) {
var e_6, _a;
var foundWords = [];
try {
for (var badWordLocations_1 = __values(badWordLocations), badWordLocations_1_1 = badWordLocations_1.next(); !badWordLocations_1_1.done; badWordLocations_1_1 = badWordLocations_1.next()) {
var matchInfo = badWordLocations_1_1.value;
if (!foundWords.includes(matchInfo.word)) {
foundWords.push(matchInfo.word);
}
}
}
catch (e_6_1) { e_6 = { error: e_6_1 }; }
finally {
try {
if (badWordLocations_1_1 && !badWordLocations_1_1.done && (_a = badWordLocations_1.return)) _a.call(badWordLocations_1);
}
finally { if (e_6) throw e_6.error; }
}
return foundWords;
};
exports.getBadWords = getBadWords;
/**
* Given your preprocessed bad word list and whitelist, checks for all bad words in a given input text
* that haven't been allowed by the whitelist. Checks for the most common circumventions as well.
* Returns an array of strings of all bad words.
*
* IMPORTANT: Make sure that any backslash in the inputString is escaped correctly.
* If you are trying to see whether the string `¯\_(ツ)_/¯` is a bad word, you have to
* enter it as `¯\\_(ツ)_/¯` to match it correctly.
*
* @param inputString - The text you wish to check for bad words.
* @param processedWordLists - The preprocessed bad word list and whitelist, generated by `preprocessWordLists(...)`
* @param overrideData - (Default: `undefined`) Data used to modify a list by removing words or whitelisted
* terms or by adding new whitelisted terms. Created with `preprocessWordListOverrideData`
* @returns The first bad word that was found in the input, or undefined if no bad word was found.
*/
var findAllBadWords = function (inputString, processedWordLists, overrideData) {
var badWordLocations = (0, exports.findBadWordLocations)(inputString, processedWordLists, { overrideData: overrideData });
return (0, exports.getBadWords)(badWordLocations);
};
exports.findAllBadWords = findAllBadWords;
/**
* Sanitise any text by replacing bad words in it with Grawlix (`$!#@&%`) or a single repeated character.
* This function is useful if you need both the bad words as well as the input string
* with all bad words replaced (check out `getBadWords(...)` for the latter.)
*
* If you only need a "censored" input string but are not interested in identifying which bad words were
* censored, consider using `censorText(...)` instead.
*
* @param inputString - The text that got checked for bad words in `findBadWordLocations(...)`
* @param badWordLocations - The information on all bad word matches found with `findBadWordLocations(...)`
* @param replacementMethod - (Default: `WordReplacementMethod.ReplaceAll`) Used to select whether to replace the
* whole word, or keep the first (and last) characters from the bad word intact.
* @param replacementType - (Default: `WordReplacementType.Grawlix`) Used to select whether to replace the
* word with a jumbled mess of Grawlix (`$!#@&%`) characters, or with a selected repeatable character defined
* in the next parameter.
* @param replacementRepeatCharacter - (Default: `-`) The character to repeat in order to replace the bad word.
* (If several characters are entered, only the first one will be used.)
* @returns the input string, with all bad words replaced by either Grawlix or a repeated character.
*/
var replaceBadWords = function (inputString, badWordLocations, _a) {
var e_7, _b;
var _c = _a === void 0 ? {} : _a, _d = _c.replacementMethod, replacementMethod = _d === void 0 ? WordReplacementMethod.ReplaceAll : _d, _e = _c.replacementType, replacementType = _e === void 0 ? WordReplacementType.Grawlix : _e, _f = _c.replacementRepeatCharacter, replacementRepeatCharacter = _f === void 0 ? '-' : _f;
var outString = inputString;
try {
for (var badWordLocations_2 = __values(badWordLocations), badWordLocations_2_1 = badWordLocations_2.next(); !badWordLocations_2_1.done; badWordLocations_2_1 = badWordLocations_2.next()) {
var badWordLocation = badWordLocations_2_1.value;
var startIndex = badWordLocation.startIndex + (replacementMethod === WordReplacementMethod.ReplaceAll ? 0 : 1);
var lengthModifier = 0;
if (replacementMethod === WordReplacementMethod.KeepFirstAndLastCharacter) {
lengthModifier = 2;
}
else if (replacementMethod === WordReplacementMethod.KeepFirstCharacter) {
lengthModifier = 1;
}
var length = badWordLocation.length - lengthModifier;
var wordToReplace = outString.substring(startIndex, startIndex + length);
var newBadWord = replacementType === WordReplacementType.Grawlix
? (0, replace_input_1.grawlix)(wordToReplace)
: (0, replace_input_1.replaceChars)(wordToReplace, replacementRepeatCharacter);
outString = outString.substring(0, startIndex) + newBadWord + outString.substring(startIndex + length);
}
}
catch (e_7_1) { e_7 = { error: e_7_1 }; }
finally {
try {
if (badWordLocations_2_1 && !badWordLocations_2_1.done && (_b = badWordLocations_2.return)) _b.call(badWordLocations_2);
}
finally { if (e_7) throw e_7.error; }
}
return outString;
};
exports.replaceBadWords = replaceBadWords;
/**
* Sanitise any text by replacing bad words in it with Grawlix (`$!#@&%`) or a single repeated character.
*
* @param inputString - The text that got checked for bad words in `findBadWordLocations(...)`
* @param processedWordLists - The preprocessed bad word list and whitelist, generated by `preprocessWordLists(...)`
* @param inputPreprocessMethod - (Default: `InputPreprocessMethod.CaseInsensitive`) Used to preprocess the input
* string before identifying bad words. `CaseInsensitive`: transforms the input to lower case and then matches it against
* the bad word list.
* `Thorough` uses the `textToLatin()` function to remove text accents, translate letter emojis and
* any other fancy unicode fonts to latin before testing for bad words. Note: If non-latin characters are found,
* the censored text will be returned all in lower case and in latin letters.
* `ExactMatch` matches the input string against the bad word list exactly.
* @param reduceRepeatCharactersTo - (Default: `undefined` meaning repeat characters are not modified.)
* Otherwise required to be a number >= 1. `Will throw an error if this number is <= 0.`
* The amount of characters a repeating sequence of characters (e.g. "aaaabcc") is reduced to in _*every*_ input string.
* (e.g. "abc" if the number is 1, "aabcc" if the number is 2, "aaabcc" if the number is 3, etc.)
* This reduction will be applied after any other input preprocess method. (Uses function `reduceRepeatCharacters(...)`)
*
* `Important:` keep in mind that reducing to 1
* repeat character will likely result in mismatches/false positives ("loot" -> "lot"). Depending on the language of the
* input, this number should be around 2 or 3.
*
* `Note:` when setting this number to 1 or larger, keep in mind that all words in your filter need to adhere to this,
* so if you set the number to 2, putting "princessship" on the bad word list won't take effect, as any such input would
* get reduced to "princesship".
* @param replacementMethod - (Default: `WordReplacementMethod.ReplaceAll`) Used to select whether to replace the
* whole word, or keep the first (and last) characters from the bad word intact.
* @param replacementType - (Default: `WordReplacementType.Grawlix`) Used to select whether to replace the
* word with a jumbled mess of Grawlix (`$!#@&%`) characters, or with a selected repeatable character defined
* in the next parameter.
* @param replacementRepeatCharacter - (Default: `-`) The character to repeat in order to replace the bad word.
* (If several characters are entered, only the first one will be used.)
* @param overrideData - (Default: `undefined`) Data used to modify a list by removing words or whitelisted
* terms or by adding new whitelisted terms. Created with `preprocessWordListOverrideData`
* @returns the input string, with all bad words replaced by either Grawlix or a repeated character.
*/
var censorText = function (inputString, processedWordLists, _a, overrideData) {
var _b = _a === void 0 ? {} : _a, _c = _b.inputPreprocessMethod, inputPreprocessMethod = _c === void 0 ? InputPreprocessMethod.CaseInsensitive : _c, _d = _b.reduceRepeatCharactersTo, reduceRepeatCharactersTo = _d === void 0 ? undefined : _d, _e = _b.replacementMethod, replacementMethod = _e === void 0 ? WordReplacementMethod.ReplaceAll : _e, _f = _b.replacementType, replacementType = _f === void 0 ? WordReplacementType.Grawlix : _f, _g = _b.replacementRepeatCharacter, replacementRepeatCharacter = _g === void 0 ? '-' : _g;
var stringToScan = inputString;
if (inputPreprocessMethod === InputPreprocessMethod.CaseInsensitive) {
stringToScan = stringToScan.toLowerCase();
}
else if (inputPreprocessMethod === InputPreprocessMethod.Thorough) {
stringToScan = (0, input_preprocessor_1.textToLatin)(inputString);
}
if (reduceRepeatCharactersTo !== undefined) {
stringToScan = (0, input_preprocessor_1.reduceRepeatCharacters)(stringToScan, reduceRepeatCharactersTo);
}
var locations = (0, exports.findBadWordLocations)(stringToScan, processedWordLists, { overrideData: overrideData });
if (locations.length === 0) {
return inputString;
}
var stringToReplace = stringToScan;
if (stringToScan === inputString.toLowerCase()) {
stringToReplace = inputString;
}
return (0, exports.replaceBadWords)(stringToReplace, locations, {
replacementMethod: replacementMethod,
replacementType: replacementType,
replacementRepeatCharacter: replacementRepeatCharacter,
});
};
exports.censorText = censorText;
//# sourceMappingURL=word_filter.js.map