@2toad/profanity
Version:
A multi-language profanity filter with full TypeScript support
233 lines • 10.1 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.profanity = exports.Profanity = void 0;
const profanity_options_1 = require("./profanity-options");
const models_1 = require("./models");
const utils_1 = require("./utils");
const data_1 = require("./data");
class Profanity {
constructor(options) {
this.options = options ? { ...new profanity_options_1.ProfanityOptions(), ...options } : new profanity_options_1.ProfanityOptions();
this.whitelist = new models_1.List(() => this.clearRegexes());
this.blacklist = new models_1.List(() => this.clearRegexes());
this.removed = new models_1.List(() => this.clearRegexes());
this.regexes = new Map();
}
/**
* Checks if the given text contains any profanity.
* @param text - The text to check for profanity.
* @param languages - Optional array of language codes to use for profanity detection.
* If not provided, uses the languages specified in the options.
* @returns True if profanity is found, false otherwise.
*/
exists(text, languages) {
if (typeof text !== "string") {
return false;
}
const regex = this.getRegex(this.resolveLanguages(languages));
regex.lastIndex = 0;
const lowercaseText = text.toLowerCase();
let match;
while ((match = regex.exec(lowercaseText)) !== null) {
const matchStart = match.index;
const matchEnd = matchStart + match[0].length;
if (!this.isWhitelisted(matchStart, matchEnd, lowercaseText)) {
return true;
}
}
return false;
}
/**
* Censors profanity in the given text.
* @param text - The text to censor.
* @param censorType - The type of censoring to apply. Defaults to CensorType.Word.
* @param languages - Optional array of language codes to use for profanity detection.
* If not provided, uses the languages specified in the options.
* @returns The censored text.
*/
censor(text, censorType = models_1.CensorType.Word, languages) {
if (typeof text !== "string") {
return text;
}
const regex = this.getRegex(this.resolveLanguages(languages));
regex.lastIndex = 0;
const lowercaseText = text.toLowerCase();
return this.replaceProfanity(text, lowercaseText, (word, start, end) => {
if (this.isWhitelisted(start, end, lowercaseText)) {
return word;
}
switch (censorType) {
case models_1.CensorType.Word: {
const underscore = word.includes("_") ? "_" : "";
return this.options.grawlix + underscore;
}
case models_1.CensorType.FirstChar:
return this.options.grawlixChar + word.slice(1);
case models_1.CensorType.FirstVowel:
case models_1.CensorType.AllVowels: {
const vowelRegex = new RegExp("[aeiou]", censorType === models_1.CensorType.FirstVowel ? "i" : "ig");
return word.replace(vowelRegex, this.options.grawlixChar);
}
default:
throw new Error(`Invalid replacement type: "${censorType}"`);
}
}, regex);
}
/**
* Adds words to the profanity blacklist.
* @param words - An array of words to add to the blacklist.
*/
addWords(words) {
const removedWords = [];
const blacklistWords = [];
words.forEach((word) => {
const lowerCaseWord = word.toLowerCase();
if (this.removed.words.has(lowerCaseWord)) {
removedWords.push(lowerCaseWord);
}
else {
blacklistWords.push(lowerCaseWord);
}
});
if (removedWords.length) {
this.removed.removeWords(removedWords);
}
if (blacklistWords.length) {
this.blacklist.addWords(blacklistWords);
}
}
/**
* Removes words from the profanity blacklist.
* @param words - An array of words to remove from the blacklist.
*/
removeWords(words) {
const blacklistedWords = [];
const removeWords = [];
words.forEach((word) => {
const lowerCaseWord = word.toLowerCase();
if (this.blacklist.words.has(lowerCaseWord)) {
blacklistedWords.push(lowerCaseWord);
}
else {
removeWords.push(lowerCaseWord);
}
});
if (blacklistedWords.length) {
this.blacklist.removeWords(blacklistedWords);
}
if (removeWords.length) {
this.removed.addWords(removeWords);
}
}
/**
* Checks if a given match is whitelisted.
* @param matchStart - The starting index of the match in the text.
* @param matchEnd - The ending index of the match in the text.
* @param text - The lowercase text being checked.
* @returns True if the match is whitelisted, false otherwise.
*/
isWhitelisted(matchStart, matchEnd, text) {
for (const whitelistedWord of this.whitelist.words) {
const whitelistedIndex = text.indexOf(whitelistedWord, Math.max(0, matchStart - whitelistedWord.length + 1));
if (whitelistedIndex !== -1) {
const whitelistedEnd = whitelistedIndex + whitelistedWord.length;
if (this.options.wholeWord) {
if (matchStart === whitelistedIndex &&
matchEnd === whitelistedEnd &&
(matchStart === 0 || !/[\w-_]/.test(text[matchStart - 1])) &&
(matchEnd === text.length || !/[\w-_]/.test(text[matchEnd]))) {
return true;
}
}
else {
if ((matchStart >= whitelistedIndex && matchStart < whitelistedEnd) ||
(matchEnd > whitelistedIndex && matchEnd <= whitelistedEnd) ||
(whitelistedIndex >= matchStart && whitelistedEnd <= matchEnd)) {
return true;
}
}
}
}
return false;
}
/**
* Replaces profanity in the text using the provided replacer function.
* @param text - The original text.
* @param lowercaseText - The lowercase version of the text.
* @param replacer - A function that determines how to replace profane words.
* @param regex - The regular expression used to find profane words.
* @returns The text with profanity replaced.
*/
replaceProfanity(text, lowercaseText, replacer, regex) {
let result = text;
let offset = 0;
let match;
while ((match = regex.exec(lowercaseText)) !== null) {
const matchStart = match.index;
const matchEnd = matchStart + match[0].length;
const originalWord = text.slice(matchStart + offset, matchEnd + offset);
const censoredWord = replacer(originalWord, matchStart, matchEnd);
result = result.slice(0, matchStart + offset) + censoredWord + result.slice(matchEnd + offset);
offset += censoredWord.length - originalWord.length;
}
return result;
}
/**
* Determines the list of languages to use, either from the provided list or falling back to default languages.
* @param languages - An optional list of languages to use.
* @returns The list of languages to be used.
*/
resolveLanguages(languages) {
return (languages === null || languages === void 0 ? void 0 : languages.length) ? languages : this.options.languages;
}
/**
* Retrieves or constructs a regular expression for detecting profanity in the specified languages.
* This method first checks if a regex for the given combination of languages already exists in the cache.
*
* @param languages - An array of languages to include in the regex.
* @throws {Error} If no languages are provided.
* @returns A RegExp object for detecting profanity in the specified languages.
*/
getRegex(languages) {
if (!languages.length) {
throw new Error("At least one language must be provided");
}
const uniqueLanguages = [...new Set(languages.map((language) => language.trim().toLowerCase()))];
const regexKey = uniqueLanguages.sort().join(",");
if (this.regexes.has(regexKey)) {
return this.regexes.get(regexKey);
}
const allWords = uniqueLanguages.flatMap((language) => {
const words = data_1.profaneWords.get(language);
if (!words) {
throw new Error(`Invalid language: "${language}"`);
}
return words.filter((word) => !this.removed.words.has(word));
});
const regex = this.buildRegex(allWords);
this.regexes.set(regexKey, regex);
return regex;
}
/**
* Constructs a regular expression for detecting profane words.
*
* @param words - An array of profane words to be included in the regex.
* @returns A RegExp that matches any of the profane or blacklisted words.
*/
buildRegex(words) {
const allProfaneWords = [...words, ...this.blacklist.words];
const escapedProfaneWords = allProfaneWords.map(utils_1.escapeRegExp);
const profanityPattern = `${this.options.wholeWord ? "(?:\\b|_)" : ""}(${escapedProfaneWords.join("|")})${this.options.wholeWord ? "(?:\\b|_)" : ""}`;
// eslint-disable-next-line security/detect-non-literal-regexp
return new RegExp(profanityPattern, "gi");
}
/**
* Clear the cached regexes.
*/
clearRegexes() {
this.regexes.clear();
}
}
exports.Profanity = Profanity;
exports.profanity = new Profanity();
//# sourceMappingURL=profanity.js.map