allprofanity
Version:
A blazing-fast, multi-language profanity filter with advanced algorithms (Aho-Corasick, Bloom Filters) delivering 664% faster performance on large texts, intelligent leet-speak detection, and pattern-based context analysis
1,288 lines (1,287 loc) • 64.9 kB
JavaScript
// Language dictionaries imports
import englishBadWords from "./languages/english-words.js";
import hindiBadWords from "./languages/hindi-words.js";
import frenchBadWords from "./languages/french-words.js";
import germanBadWords from "./languages/german-words.js";
import spanishBadWords from "./languages/spanish-words.js";
import bengaliBadWords from "./languages/bengali-words.js";
import tamilBadWords from "./languages/tamil-words.js";
import teluguBadWords from "./languages/telugu-words.js";
import brazilianBadWords from "./languages/brazilian-words.js";
// Advanced algorithm imports
import { AhoCorasick } from "./algos/aho-corasick.js";
import { BloomFilter } from "./algos/bloom-filter.js";
import { ContextAnalyzer } from "./algos/context-patterns.js";
// Export language dictionaries for direct access
export { default as englishBadWords } from "./languages/english-words.js";
export { default as hindiBadWords } from "./languages/hindi-words.js";
export { default as frenchBadWords } from "./languages/french-words.js";
export { default as germanBadWords } from "./languages/german-words.js";
export { default as spanishBadWords } from "./languages/spanish-words.js";
export { default as bengaliBadWords } from "./languages/bengali-words.js";
export { default as tamilBadWords } from "./languages/tamil-words.js";
export { default as teluguBadWords } from "./languages/telugu-words.js";
export { default as brazilianBadWords } from "./languages/brazilian-words.js";
/**
* Default console logger implementation for AllProfanity.
*
* @class ConsoleLogger
* @implements {Logger}
* @description Logs messages to the browser or Node.js console with an "[AllProfanity]" prefix.
* This is the default logger used when no custom logger is provided.
*
* @internal
*/
class ConsoleLogger {
/**
* Log informational messages to console.log with [AllProfanity] prefix.
*
* @param message - The message to log
* @returns void
*/
info(message) {
console.log(`[AllProfanity] ${message}`);
}
/**
* Log warning messages to console.warn with [AllProfanity] prefix.
*
* @param message - The warning message to log
* @returns void
*/
warn(message) {
console.warn(`[AllProfanity] ${message}`);
}
/**
* Log error messages to console.error with [AllProfanity] prefix.
*
* @param message - The error message to log
* @returns void
*/
error(message) {
console.error(`[AllProfanity] ${message}`);
}
}
/**
* Silent logger implementation that suppresses all log output.
*
* @class SilentLogger
* @implements {Logger}
* @description A no-op logger that discards all log messages. Used when `silent: true` is set
* in AllProfanityOptions, or when you want to completely disable logging.
*
* @internal
*/
class SilentLogger {
/**
* No-op implementation - messages are discarded.
*
* @param _message - The message (unused)
* @returns void
*/
info(_message) {
// Silent mode - no logging
}
/**
* No-op implementation - warnings are discarded.
*
* @param _message - The warning message (unused)
* @returns void
*/
warn(_message) {
// Silent mode - no logging
}
/**
* No-op implementation - errors are discarded.
*
* @param _message - The error message (unused)
* @returns void
*/
error(_message) {
// Silent mode - no logging
}
}
/**
* Severity levels for profanity detection results.
*
* @enum {number}
* @description Categorizes the severity of detected profanity based on the number
* of unique words and total matches found in the text.
*
* @readonly
* @example
* ```typescript
* const result = filter.detect("some text");
* if (result.severity === ProfanitySeverity.EXTREME) {
* // Handle extreme profanity
* }
* ```
*/
export var ProfanitySeverity;
(function (ProfanitySeverity) {
/** Mild profanity: 1 unique word or 1 total match */
ProfanitySeverity[ProfanitySeverity["MILD"] = 1] = "MILD";
/** Moderate profanity: 2 unique words or 2 total matches */
ProfanitySeverity[ProfanitySeverity["MODERATE"] = 2] = "MODERATE";
/** Severe profanity: 3 unique words or 3 total matches */
ProfanitySeverity[ProfanitySeverity["SEVERE"] = 3] = "SEVERE";
/** Extreme profanity: 4+ unique words or 5+ total matches */
ProfanitySeverity[ProfanitySeverity["EXTREME"] = 4] = "EXTREME";
})(ProfanitySeverity = ProfanitySeverity || (ProfanitySeverity = {}));
/**
* Validates that an input is a non-empty string.
*
* @function validateString
* @param {unknown} input - The value to validate
* @param {string} paramName - Name of the parameter being validated (used in error messages)
* @returns {string} The validated string
* @throws {TypeError} If input is not a string
*
* @internal
*
* @example
* ```typescript
* const text = validateString(userInput, 'text');
* // Returns userInput if it's a string, throws TypeError otherwise
* ```
*/
function validateString(input, paramName) {
if (typeof input !== "string") {
throw new TypeError(`${paramName} must be a string, got ${typeof input}`);
}
return input;
}
/**
* Validates and filters a string array, removing non-string and empty items.
*
* @function validateStringArray
* @param {unknown} input - The value to validate (expected to be an array)
* @param {string} paramName - Name of the parameter being validated (used in error/warning messages)
* @returns {string[]} Array of valid, non-empty strings
* @throws {TypeError} If input is not an array
*
* @internal
*
* @example
* ```typescript
* const words = validateStringArray(['word1', '', 123, 'word2'], 'words');
* // Returns: ['word1', 'word2']
* // Logs warning: "Skipping non-string item in words: 123"
* ```
*/
function validateStringArray(input, paramName) {
if (!Array.isArray(input)) {
throw new TypeError(`${paramName} must be an array`);
}
return input.filter((item) => {
if (typeof item !== "string") {
console.warn(`Skipping non-string item in ${paramName}: ${item}`);
return false;
}
return item.trim().length > 0;
});
}
/**
* Trie (prefix tree) node for efficient pattern matching and word storage.
*
* @class TrieNode
* @description Implements a trie data structure for O(m) time complexity word matching,
* where m is the length of the word being searched. Each node represents a character
* in the word, and paths from root to nodes with isEndOfWord=true represent complete words.
*
* @internal
*
* @example
* ```typescript
* const trie = new TrieNode();
* trie.addWord('bad');
* trie.addWord('badword');
* const matches = trie.findMatches('badwords here', 0, false);
* // Returns matches for 'bad' and 'badword'
* ```
*/
class TrieNode {
constructor() {
/** Map of characters to child nodes for fast lookups */
this.children = new Map();
/** Flag indicating if this node represents the end of a complete word */
this.isEndOfWord = false;
/** The complete word ending at this node (only set when isEndOfWord is true) */
this.word = "";
}
/**
* Adds a word to the trie structure.
*
* @param {string} word - The word to add to the trie
* @returns {void}
*
* @remarks
* - Time Complexity: O(m) where m is the length of the word
* - Space Complexity: O(m) in worst case when all characters are new
* - Supports any Unicode characters
*
* @example
* ```typescript
* const trie = new TrieNode();
* trie.addWord('hello');
* trie.addWord('world');
* ```
*/
addWord(word) {
let current = this;
for (const char of word) {
if (!current.children.has(char)) {
current.children.set(char, new TrieNode());
}
const nextNode = current.children.get(char);
if (nextNode) {
current = nextNode;
}
}
current.isEndOfWord = true;
current.word = word;
}
/**
* Removes a word from the trie structure.
*
* @param {string} word - The word to remove from the trie
* @returns {boolean} True if the word existed and was removed, false if word was not found
*
* @remarks
* - Time Complexity: O(m) where m is the length of the word
* - Also removes unnecessary nodes to keep the trie optimized
* - Only removes the word marking; shared prefixes with other words are preserved
*
* @example
* ```typescript
* const trie = new TrieNode();
* trie.addWord('hello');
* trie.removeWord('hello'); // Returns: true
* trie.removeWord('world'); // Returns: false (word not in trie)
* ```
*/
removeWord(word) {
return this.removeHelper(word, 0);
}
/**
* Recursive helper method for removing a word from the trie.
*
* @param {string} word - The word being removed
* @param {number} index - Current character index in the word
* @returns {boolean} True if this node should be deleted (has no children and is not end of another word)
*
* @internal
*/
removeHelper(word, index) {
if (index === word.length) {
if (!this.isEndOfWord)
return false;
this.isEndOfWord = false;
return this.children.size === 0;
}
const char = word[index];
const node = this.children.get(char);
if (!node)
return false;
const shouldDeleteChild = node.removeHelper(word, index + 1);
if (shouldDeleteChild) {
this.children.delete(char);
return this.children.size === 0 && !this.isEndOfWord;
}
return false;
}
/**
* Finds all word matches in text starting at a specific position.
*
* @param {string} text - The text to search for profanity
* @param {number} startPos - The starting position (0-based index) in the text
* @param {boolean} allowPartial - If true, finds partial matches within larger words
* @returns {Array<{ word: string; start: number; end: number }>} Array of match objects with word and position info
*
* @remarks
* - Time Complexity: O(k) where k is the length of the longest match from startPos
* - Returns all valid words that can be formed starting from startPos
* - When allowPartial is false, respects word boundaries
*
* @example
* ```typescript
* const trie = new TrieNode();
* trie.addWord('bad');
* const matches = trie.findMatches('badword', 0, false);
* // Returns: [{ word: 'bad', start: 0, end: 3 }]
* ```
*/
findMatches(text, startPos, allowPartial) {
const matches = [];
let current = this;
let pos = startPos;
while (pos < text.length) {
const nextNode = current.children.get(text[pos]);
if (!nextNode)
break;
current = nextNode;
pos++;
if (current.isEndOfWord) {
if (!allowPartial) {
const wordStart = startPos;
const wordEnd = pos;
matches.push({
word: current.word,
start: wordStart - startPos,
end: wordEnd - startPos,
});
}
else {
matches.push({
word: current.word,
start: 0,
end: pos - startPos,
});
}
}
}
return matches;
}
/**
* Clears all words from the trie, resetting it to empty state.
*
* @returns {void}
*
* @remarks
* - Time Complexity: O(1) - clears the root node only (JavaScript GC handles children)
* - Removes all stored words and resets the trie to initial state
*
* @example
* ```typescript
* const trie = new TrieNode();
* trie.addWord('hello');
* trie.addWord('world');
* trie.clear();
* // Trie is now empty
* ```
*/
clear() {
this.children.clear();
this.isEndOfWord = false;
this.word = "";
}
}
/**
* AllProfanity - Professional-grade multilingual profanity detection and filtering library.
*
* @class AllProfanity
* @description A comprehensive, high-performance profanity filtering system supporting 9+ languages
* with advanced features including leet speak detection, context analysis, multiple matching algorithms,
* and customizable filtering options.
*
* @remarks
* ### Features:
* - **Multi-language Support**: English, Hindi, French, German, Spanish, Bengali, Tamil, Telugu, Brazilian Portuguese
* - **Advanced Algorithms**: Trie, Aho-Corasick, Bloom Filter, and hybrid approaches
* - **Leet Speak Detection**: Automatically normalizes and detects variations like "h3ll0"
* - **Context Analysis**: Reduces false positives using surrounding word context
* - **Performance**: Built-in caching and optimized data structures
* - **Flexible**: Custom dictionaries, whitelisting, severity levels
*
* ### Default Behavior:
* - Loads English and Hindi dictionaries by default
* - Case-insensitive matching
* - Leet speak detection enabled
* - Uses Trie algorithm (fastest for most cases)
*
* @example
* ```typescript
* // Basic usage with default instance
* import allProfanity from 'allprofanity';
*
* const result = allProfanity.detect("This is some bad text");
* console.log(result.hasProfanity); // true
* console.log(result.cleanedText); // "This is some *** text"
* console.log(result.severity); // ProfanitySeverity.MILD
* ```
*
* @example
* ```typescript
* // Advanced usage with custom configuration
* import { AllProfanity, ProfanitySeverity } from 'allprofanity';
*
* const filter = new AllProfanity({
* languages: ['english', 'french', 'spanish'],
* enableLeetSpeak: true,
* strictMode: true,
* algorithm: {
* matching: 'hybrid',
* useBloomFilter: true
* },
* performance: {
* enableCaching: true,
* cacheSize: 500
* },
* whitelistWords: ['class', 'assignment']
* });
*
* const text = "This text has some b@d w0rds";
* const result = filter.detect(text);
*
* if (result.hasProfanity) {
* console.log(`Found ${result.detectedWords.length} profane words`);
* console.log(`Severity: ${ProfanitySeverity[result.severity]}`);
* console.log(`Cleaned: ${result.cleanedText}`);
* }
* ```
*
* @example
* ```typescript
* // Using individual methods
* const filter = new AllProfanity();
*
* // Simple check
* if (filter.check("some text")) {
* console.log("Contains profanity!");
* }
*
* // Clean with custom placeholder
* const cleaned = filter.clean("bad words here", "#");
*
* // Load additional languages
* filter.loadLanguage('german');
* filter.loadIndianLanguages(); // Loads hindi, bengali, tamil, telugu
*
* // Add custom words
* filter.add(['customword1', 'customword2']);
*
* // Remove words
* filter.remove(['someword']);
*
* // Whitelist words
* filter.addToWhitelist(['class', 'assignment']);
* ```
*
* @see {@link AllProfanityOptions} for all configuration options
* @see {@link ProfanityDetectionResult} for detection result format
* @see {@link ProfanitySeverity} for severity levels
*/
export class AllProfanity {
/**
* Creates a new AllProfanity instance with the specified configuration.
*
* @constructor
* @param {AllProfanityOptions} [options] - Configuration options for profanity detection behavior
*
* @remarks
* ### Default Initialization:
* - Loads English and Hindi dictionaries automatically
* - Enables leet speak detection
* - Case-insensitive matching
* - Uses Trie algorithm for pattern matching
*
* ### Performance Considerations:
* - Initial load time depends on number of languages loaded
* - Aho-Corasick automaton (if enabled) is built during construction
* - Bloom Filter (if enabled) is populated during construction
*
* @throws {TypeError} If invalid options are provided
*
* @example
* ```typescript
* // Default instance
* const filter = new AllProfanity();
*
* // Custom configuration
* const filter = new AllProfanity({
* languages: ['english', 'french'],
* strictMode: true,
* defaultPlaceholder: '#',
* algorithm: { matching: 'hybrid' }
* });
*
* // Silent mode (no logging)
* const filter = new AllProfanity({ silent: true });
* ```
*
* @see {@link AllProfanityOptions} for all available configuration options
*/
constructor(options) {
var _a, _b, _c, _d, _e;
this.profanityTrie = new TrieNode();
this.whitelistSet = new Set();
this.loadedLanguages = new Set();
this.defaultPlaceholder = "*";
this.enableLeetSpeak = true;
this.caseSensitive = false;
this.strictMode = false;
this.detectPartialWords = false;
this.availableLanguages = {
english: englishBadWords || [],
hindi: hindiBadWords || [],
french: frenchBadWords || [],
german: germanBadWords || [],
spanish: spanishBadWords || [],
bengali: bengaliBadWords || [],
tamil: tamilBadWords || [],
telugu: teluguBadWords || [],
brazilian: brazilianBadWords || [],
};
this.leetMappings = new Map([
["@", "a"],
["^", "a"],
["4", "a"],
["8", "b"],
["6", "b"],
["|3", "b"],
["(", "c"],
["<", "c"],
["©", "c"],
["|)", "d"],
["0", "o"],
["3", "e"],
["€", "e"],
["|=", "f"],
["ph", "f"],
["9", "g"],
["#", "h"],
["|-|", "h"],
["1", "i"],
["!", "i"],
["|", "i"],
["_|", "j"],
["¿", "j"],
["|<", "k"],
["1<", "k"],
["7", "l"],
["|\\/|", "m"],
["/\\/\\", "m"],
["|\\|", "n"],
["//", "n"],
["()", "o"],
["|*", "p"],
["|o", "p"],
["(_,)", "q"],
["()_", "q"],
["|2", "r"],
["12", "r"],
["5", "s"],
["$", "s"],
["z", "s"],
["7", "t"],
["+", "t"],
["†", "t"],
["|_|", "u"],
["(_)", "u"],
["v", "u"],
["\\/", "v"],
["|/", "v"],
["\\/\\/", "w"],
["vv", "w"],
["><", "x"],
["}{", "x"],
["`/", "y"],
["j", "y"],
["2", "z"],
["7_", "z"],
]);
this.dynamicWords = new Set();
// Advanced algorithms
this.ahoCorasickAutomaton = null;
this.bloomFilter = null;
this.contextAnalyzer = null;
this.matchingAlgorithm = "trie";
this.resultCache = null;
// Use silent logger if silent mode is enabled, otherwise use provided logger or console logger
this.logger = (options === null || options === void 0 ? void 0 : options.logger) || ((options === null || options === void 0 ? void 0 : options.silent) ? new SilentLogger() : new ConsoleLogger());
if ((options === null || options === void 0 ? void 0 : options.defaultPlaceholder) !== undefined) {
this.setPlaceholder(options.defaultPlaceholder);
}
this.enableLeetSpeak = (_a = options === null || options === void 0 ? void 0 : options.enableLeetSpeak) !== null && _a !== void 0 ? _a : true;
this.caseSensitive = (_b = options === null || options === void 0 ? void 0 : options.caseSensitive) !== null && _b !== void 0 ? _b : false;
this.strictMode = (_c = options === null || options === void 0 ? void 0 : options.strictMode) !== null && _c !== void 0 ? _c : false;
this.detectPartialWords = (_d = options === null || options === void 0 ? void 0 : options.detectPartialWords) !== null && _d !== void 0 ? _d : false;
if (options === null || options === void 0 ? void 0 : options.whitelistWords) {
this.addToWhitelist(options.whitelistWords);
}
// Initialize advanced algorithms BEFORE loading dictionaries
// so that words can be added to all data structures
this.initializeAdvancedAlgorithms(options);
this.loadLanguage("english");
this.loadLanguage("hindi");
if ((_e = options === null || options === void 0 ? void 0 : options.languages) === null || _e === void 0 ? void 0 : _e.length) {
options.languages.forEach((lang) => this.loadLanguage(lang));
}
if (options === null || options === void 0 ? void 0 : options.customDictionaries) {
Object.entries(options.customDictionaries).forEach(([name, words]) => {
this.loadCustomDictionary(name, words);
});
}
}
/**
* Initialize advanced algorithms based on configuration
*/
initializeAdvancedAlgorithms(options) {
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m;
// Set matching algorithm
if ((_a = options === null || options === void 0 ? void 0 : options.algorithm) === null || _a === void 0 ? void 0 : _a.matching) {
this.matchingAlgorithm = options.algorithm.matching;
}
// Initialize Bloom Filter if enabled
const bloomEnabled = ((_b = options === null || options === void 0 ? void 0 : options.algorithm) === null || _b === void 0 ? void 0 : _b.useBloomFilter) ||
((_c = options === null || options === void 0 ? void 0 : options.bloomFilter) === null || _c === void 0 ? void 0 : _c.enabled) ||
this.matchingAlgorithm === "hybrid";
if (bloomEnabled) {
const expectedItems = ((_d = options === null || options === void 0 ? void 0 : options.bloomFilter) === null || _d === void 0 ? void 0 : _d.expectedItems) || 10000;
const falsePositiveRate = ((_e = options === null || options === void 0 ? void 0 : options.bloomFilter) === null || _e === void 0 ? void 0 : _e.falsePositiveRate) || 0.01;
this.bloomFilter = new BloomFilter(expectedItems, falsePositiveRate);
this.logger.info(`Bloom Filter initialized with ${expectedItems} expected items and ${(falsePositiveRate * 100).toFixed(2)}% false positive rate`);
}
// Initialize Aho-Corasick if enabled
const ahoEnabled = ((_f = options === null || options === void 0 ? void 0 : options.algorithm) === null || _f === void 0 ? void 0 : _f.useAhoCorasick) ||
((_g = options === null || options === void 0 ? void 0 : options.ahoCorasick) === null || _g === void 0 ? void 0 : _g.enabled) ||
this.matchingAlgorithm === "aho-corasick" ||
this.matchingAlgorithm === "hybrid";
if (ahoEnabled) {
this.ahoCorasickAutomaton = new AhoCorasick([]);
this.logger.info("Aho-Corasick automaton initialized");
}
// Initialize Context Analyzer if enabled
const contextEnabled = ((_h = options === null || options === void 0 ? void 0 : options.algorithm) === null || _h === void 0 ? void 0 : _h.useContextAnalysis) ||
((_j = options === null || options === void 0 ? void 0 : options.contextAnalysis) === null || _j === void 0 ? void 0 : _j.enabled);
if (contextEnabled) {
const contextLanguages = ((_k = options === null || options === void 0 ? void 0 : options.contextAnalysis) === null || _k === void 0 ? void 0 : _k.languages) || ["en"];
this.contextAnalyzer = new ContextAnalyzer(contextLanguages);
if ((_l = options === null || options === void 0 ? void 0 : options.contextAnalysis) === null || _l === void 0 ? void 0 : _l.contextWindow) {
this.contextAnalyzer.setContextWindow(options.contextAnalysis.contextWindow);
}
this.logger.info(`Context Analyzer initialized for languages: ${contextLanguages.join(", ")}`);
}
// Initialize result cache if enabled
if ((_m = options === null || options === void 0 ? void 0 : options.performance) === null || _m === void 0 ? void 0 : _m.enableCaching) {
const cacheSize = options.performance.cacheSize || 1000;
this.resultCache = new Map();
this.logger.info(`Result caching enabled with size limit: ${cacheSize}`);
}
}
/**
* Normalize leet speak to regular characters.
* @param text - The input text.
* @returns Normalized text.
*/
normalizeLeetSpeak(text) {
if (!this.enableLeetSpeak)
return text;
let normalized = text.toLowerCase();
const sortedMappings = Array.from(this.leetMappings.entries()).sort(([leetA], [leetB]) => leetB.length - leetA.length);
for (const [leet, normal] of sortedMappings) {
const regex = new RegExp(this.escapeRegex(leet), "g");
normalized = normalized.replace(regex, normal);
}
return normalized;
}
/**
* Escape regex special characters in a string.
* @param str - The string to escape.
* @returns The escaped string.
*/
escapeRegex(str) {
return str.replace(/[\\^$.*+?()[\]{}|]/g, "\\$&");
}
/**
* Check if a match is bounded by word boundaries (strict mode).
* @param text - The text.
* @param start - Start index.
* @param end - End index.
* @returns True if match is at word boundaries, false otherwise.
*/
hasWordBoundaries(text, start, end) {
if (!this.strictMode)
return true;
const beforeChar = start > 0 ? text[start - 1] : " ";
const afterChar = end < text.length ? text[end] : " ";
const wordBoundaryRegex = /[\s\p{P}\p{S}]/u;
return (wordBoundaryRegex.test(beforeChar) && wordBoundaryRegex.test(afterChar));
}
/**
* Determine if a match is a whole word.
* @param text - The text.
* @param start - Start index.
* @param end - End index.
* @returns True if whole word, false otherwise.
*/
isWholeWord(text, start, end) {
if (start !== 0 && /\w/.test(text[start - 1]))
return false;
if (end !== text.length && /\w/.test(text[end]))
return false;
return true;
}
/**
* Check if a match is whitelisted.
* @param word - Word from dictionary.
* @param matchedText - Actual matched text.
* @returns True if whitelisted, false otherwise.
*/
isWhitelistedMatch(word, matchedText) {
if (this.caseSensitive) {
return this.whitelistSet.has(word) || this.whitelistSet.has(matchedText);
}
else {
return (this.whitelistSet.has(word.toLowerCase()) ||
this.whitelistSet.has(matchedText.toLowerCase()));
}
}
/**
* Remove overlapping matches, keeping only the longest at each start position.
* @param matches - Array of match results.
* @returns Deduplicated matches.
*/
deduplicateMatches(matches) {
const sorted = [...matches].sort((a, b) => {
if (a.start !== b.start)
return a.start - b.start;
return b.end - a.end;
});
const result = [];
let lastEnd = -1;
for (const match of sorted) {
if (match.start >= lastEnd) {
result.push(match);
lastEnd = match.end;
}
}
return result;
}
/**
* Use Aho-Corasick algorithm for pattern matching
*/
findMatchesWithAhoCorasick(searchText, originalText) {
if (!this.ahoCorasickAutomaton) {
return [];
}
const ahoMatches = this.ahoCorasickAutomaton.findAll(searchText);
const results = [];
for (const match of ahoMatches) {
if (!this.detectPartialWords &&
!this.isWholeWord(originalText, match.start, match.end)) {
continue;
}
const matchedText = originalText.substring(match.start, match.end);
if (this.isWhitelistedMatch(match.pattern, matchedText)) {
continue;
}
if (this.hasWordBoundaries(originalText, match.start, match.end)) {
results.push({
word: match.pattern,
start: match.start,
end: match.end,
originalWord: matchedText,
});
}
}
return results;
}
/**
* Hybrid approach: Aho-Corasick for fast matching, Bloom Filter for validation
*/
findMatchesHybrid(searchText, originalText) {
// Use Aho-Corasick for primary matching if available
if (this.ahoCorasickAutomaton) {
const matches = this.findMatchesWithAhoCorasick(searchText, originalText);
// If Bloom Filter is enabled, validate matches
if (this.bloomFilter) {
return matches.filter((match) => this.bloomFilter.mightContain(match.word));
}
return matches;
}
// Fallback to Trie if Aho-Corasick not available
const matches = [];
this.findMatches(searchText, originalText, matches);
// Validate with Bloom Filter if enabled
if (this.bloomFilter) {
return matches.filter((match) => this.bloomFilter.mightContain(match.word));
}
return matches;
}
/**
* Apply context analysis to filter false positives
*/
applyContextAnalysis(text, matches, scoreThreshold = 0.5) {
if (!this.contextAnalyzer) {
return matches;
}
return matches.filter((match) => {
const analysis = this.contextAnalyzer.analyzeContext(text, match.start, match.end, match.word);
// If score is above threshold, it's likely profanity
return analysis.score >= scoreThreshold;
});
}
/**
* Detects profanity in the provided text and returns comprehensive analysis.
*
* @param {string} text - The text to analyze for profanity
* @returns {ProfanityDetectionResult} Detailed detection result including matches, positions, severity, and cleaned text
*
* @throws {TypeError} If text is not a string
*
* @remarks
* ### Performance:
* - Time Complexity: O(n*m) where n is text length, m is average word length in dictionary
* - With Bloom Filter: O(n) average case (faster early rejection)
* - With Caching: O(1) for repeated identical text
*
* ### Features:
* - Detects leet speak variations (if enabled): "h3ll0" → "hello"
* - Respects word boundaries (strict mode) or detects partial matches
* - Returns exact positions for highlighting/masking
* - Calculates severity based on match count and uniqueness
*
* ### Caching:
* - Results are cached if `performance.enableCaching` is true
* - Cache uses LRU eviction when size limit is reached
*
* @example
* ```typescript
* const filter = new AllProfanity();
* const result = filter.detect("This has bad words");
*
* console.log(result.hasProfanity); // true
* console.log(result.detectedWords); // ['bad']
* console.log(result.cleanedText); // 'This has *** words'
* console.log(result.severity); // ProfanitySeverity.MILD
* console.log(result.positions); // [{ word: 'bad', start: 9, end: 12 }]
* ```
*
* @example
* ```typescript
* // With leet speak detection
* const filter = new AllProfanity({ enableLeetSpeak: true });
* const result = filter.detect("st0p b3ing b@d");
*
* if (result.hasProfanity) {
* result.positions.forEach(pos => {
* console.log(`Found "${pos.word}" at position ${pos.start}-${pos.end}`);
* });
* }
* ```
*
* @see {@link ProfanityDetectionResult} for result structure
* @see {@link ProfanitySeverity} for severity levels
*/
detect(text) {
var _a;
const validatedText = validateString(text, "text");
if (validatedText.length === 0) {
return {
hasProfanity: false,
detectedWords: [],
cleanedText: validatedText,
severity: ProfanitySeverity.MILD,
positions: [],
};
}
// Check cache first if enabled
if ((_a = this.resultCache) === null || _a === void 0 ? void 0 : _a.has(validatedText)) {
return this.resultCache.get(validatedText);
}
let matches = [];
const normalizedText = this.caseSensitive
? validatedText
: validatedText.toLowerCase();
// Choose matching algorithm based on configuration
switch (this.matchingAlgorithm) {
case "aho-corasick":
matches = this.findMatchesWithAhoCorasick(normalizedText, validatedText);
if (this.enableLeetSpeak) {
const leetNormalized = this.normalizeLeetSpeak(normalizedText);
if (leetNormalized !== normalizedText) {
const leetMatches = this.findMatchesWithAhoCorasick(leetNormalized, validatedText);
matches.push(...leetMatches);
}
}
break;
case "hybrid":
matches = this.findMatchesHybrid(normalizedText, validatedText);
if (this.enableLeetSpeak) {
const leetNormalized = this.normalizeLeetSpeak(normalizedText);
if (leetNormalized !== normalizedText) {
const leetMatches = this.findMatchesHybrid(leetNormalized, validatedText);
matches.push(...leetMatches);
}
}
break;
case "trie":
default:
this.findMatches(normalizedText, validatedText, matches);
if (this.enableLeetSpeak) {
const leetNormalized = this.normalizeLeetSpeak(normalizedText);
if (leetNormalized !== normalizedText) {
this.findMatches(leetNormalized, validatedText, matches);
}
}
break;
}
// Apply context analysis if enabled
if (this.contextAnalyzer) {
matches = this.applyContextAnalysis(validatedText, matches);
}
const uniqueMatches = this.deduplicateMatches(matches);
const detectedWords = uniqueMatches.map((m) => m.originalWord);
const severity = this.calculateSeverity(uniqueMatches);
const cleanedText = this.generateCleanedText(validatedText, uniqueMatches);
const result = {
hasProfanity: uniqueMatches.length > 0,
detectedWords,
cleanedText,
severity,
positions: uniqueMatches.map((m) => ({
word: m.originalWord,
start: m.start,
end: m.end,
})),
};
// Cache result if caching is enabled
if (this.resultCache) {
this.resultCache.set(validatedText, result);
// Implement simple LRU by clearing cache when it gets too large
if (this.resultCache.size > 1000) {
const firstKey = this.resultCache.keys().next().value;
if (firstKey !== undefined) {
this.resultCache.delete(firstKey);
}
}
}
return result;
}
/**
* Main matching function, with whole-word logic.
* @param searchText - The normalized text to search.
* @param originalText - The original text.
* @param matches - Array to collect matches.
*/
findMatches(searchText, originalText, matches) {
for (let i = 0; i < searchText.length; i++) {
const matchResults = this.profanityTrie.findMatches(searchText, i, this.detectPartialWords);
for (const match of matchResults) {
const start = i + match.start;
const end = i + match.end;
if (!this.detectPartialWords &&
!this.isWholeWord(originalText, start, end)) {
continue;
}
const matchedText = originalText.substring(start, end);
if (this.isWhitelistedMatch(match.word, matchedText)) {
continue;
}
if (this.hasWordBoundaries(originalText, start, end)) {
matches.push({
word: match.word,
start,
end,
originalWord: matchedText,
});
}
}
}
}
/**
* Generate cleaned text by replacing profane words.
* @param originalText - The original text.
* @param matches - Array of matches.
* @returns Cleaned text.
*/
generateCleanedText(originalText, matches) {
if (matches.length === 0)
return originalText;
let result = originalText;
const sortedMatches = [...this.deduplicateMatches(matches)].sort((a, b) => b.start - a.start);
for (const match of sortedMatches) {
const replacement = this.defaultPlaceholder.repeat(match.originalWord.length);
result =
result.substring(0, match.start) +
replacement +
result.substring(match.end);
}
return result;
}
/**
* Quick boolean check for profanity presence in text.
*
* @param {string} text - The text to check for profanity
* @returns {boolean} True if profanity is detected, false otherwise
*
* @throws {TypeError} If text is not a string
*
* @remarks
* - Convenience method that internally calls `detect()` and returns only the boolean result
* - For detailed information about matches, use `detect()` instead
* - Results are cached if caching is enabled (same cache as `detect()`)
*
* @example
* ```typescript
* const filter = new AllProfanity();
*
* if (filter.check("This has bad words")) {
* console.log("Profanity detected!");
* }
*
* // Quick validation
* const isClean = !filter.check(userInput);
* ```
*
* @see {@link detect} for detailed profanity analysis
*/
check(text) {
return this.detect(text).hasProfanity;
}
/**
* Cleans text by replacing profanity with a placeholder character.
*
* @param {string} text - The text to clean
* @param {string} [placeholder] - Optional custom placeholder character (uses default if not provided)
* @returns {string} The cleaned text with profanity replaced
*
* @throws {TypeError} If text is not a string
*
* @remarks
* ### Character-level Replacement:
* - Each profane character is replaced individually
* - "bad" with placeholder "*" becomes "***"
* - Preserves text length and structure
*
* ### Placeholder Behavior:
* - If no placeholder provided, uses the instance's default placeholder
* - If placeholder provided, uses only the first character
* - Empty placeholder throws error
*
* @example
* ```typescript
* const filter = new AllProfanity();
*
* // Using default placeholder (*)
* const cleaned = filter.clean("This has bad words");
* console.log(cleaned); // "This has *** *****"
*
* // Using custom placeholder
* const cleaned = filter.clean("This has bad words", "#");
* console.log(cleaned); // "This has ### #####"
* ```
*
* @example
* ```typescript
* // Clean user-generated content for display
* const userComment = "Some inappropriate words here";
* const safeComment = filter.clean(userComment);
* displayComment(safeComment);
* ```
*
* @see {@link cleanWithPlaceholder} for word-level replacement
* @see {@link setPlaceholder} to change default placeholder
*/
clean(text, placeholder) {
const detection = this.detect(text);
if (!placeholder || placeholder === this.defaultPlaceholder) {
return detection.cleanedText;
}
let result = text;
const sortedPositions = [
...this.deduplicateMatches(detection.positions.map((p) => ({
word: p.word,
start: p.start,
end: p.end,
originalWord: text.substring(p.start, p.end),
}))),
].sort((a, b) => b.start - a.start);
for (const pos of sortedPositions) {
const originalWord = text.substring(pos.start, pos.end);
const replacement = placeholder.repeat(originalWord.length);
result =
result.substring(0, pos.start) +
replacement +
result.substring(pos.end);
}
return result;
}
/**
* Cleans text by replacing each profane word with a single placeholder string (word-level replacement).
*
* @param {string} text - The text to clean
* @param {string} [placeholder="***"] - The placeholder string to use for each profane word
* @returns {string} The cleaned text with each profane word replaced by the placeholder
*
* @throws {TypeError} If text is not a string
*
* @remarks
* ### Word-level Replacement:
* - Each profane word is replaced with the entire placeholder string (not character-by-character)
* - "bad words" with placeholder "***" becomes "*** ***"
* - Does NOT preserve original text length
*
* ### Difference from `clean()`:
* - `clean()`: Character-level replacement - "bad" becomes "***" (preserves length)
* - `cleanWithPlaceholder()`: Word-level replacement - "bad" becomes "***" (fixed placeholder)
*
* @example
* ```typescript
* const filter = new AllProfanity();
*
* // Default placeholder (***) const text = "This has bad words";
* const cleaned = filter.cleanWithPlaceholder(text);
* console.log(cleaned); // "This has *** ***"
*
* // Custom placeholder
* const cleaned2 = filter.cleanWithPlaceholder(text, "[CENSORED]");
* console.log(cleaned2); // "This has [CENSORED] [CENSORED]"
* ```
*
* @example
* ```typescript
* // Censoring chat messages
* const message = "You are a badword and stupid";
* const censored = filter.cleanWithPlaceholder(message, "[***]");
* // Result: "You are a [***] and [***]"
* ```
*
* @see {@link clean} for character-level replacement
*/
cleanWithPlaceholder(text, placeholder = "***") {
const detection = this.detect(text);
if (detection.positions.length === 0)
return text;
let result = text;
const sortedPositions = [
...this.deduplicateMatches(detection.positions.map((p) => ({
word: p.word,
start: p.start,
end: p.end,
originalWord: text.substring(p.start, p.end),
}))),
].sort((a, b) => b.start - a.start);
for (const pos of sortedPositions) {
if (!this.isWholeWord(result, pos.start, pos.end))
continue;
result =
result.substring(0, pos.start) +
placeholder +
result.substring(pos.end);
}
return result;
}
/**
* Dynamically adds one or more words to the profanity filter at runtime.
*
* @param {string | string[]} word - A single word or array of words to add to the filter
* @returns {void}
*
* @remarks
* ### Behavior:
* - Words are added to all active data structures (Trie, Aho-Corasick, Bloom Filter)
* - Automatically normalizes words based on caseSensitive setting
* - Skips whitelisted words
* - Validates and filters out non-string or empty values
* - Changes take effect immediately for subsequent detect/check/clean calls
*
* ### Use Cases:
* - Adding context-specific profanity
* - Building dynamic word lists from user reports
* - Customizing filters for specific communities/applications
*
* @example
* ```typescript
* const filter = new AllProfanity();
*
* // Add single word
* filter.add('newbadword');
*
* // Add multiple words
* filter.add(['word1', 'word2', 'word3']);
*
* // Now these words will be detected
* filter.check('newbadword'); // true
* ```
*
* @example
* ```typescript
* // Add game-specific slang dynamically
* const filter = new AllProfanity();
* const gamingSlang = ['noob', 'trash', 'tryhard'];
* filter.add(gamingSlang);
*
* const message = "You're such a noob";
* console.log(filter.check(message)); // true
* ```
*
* @see {@link remove} to remove words
* @see {@link loadCustomDictionary} for loading named dictionaries
*/
add(word) {
const words = Array.isArray(word) ? word : [word];
const validatedWords = validateStringArray(words, "words to add");
for (const w of validatedWords) {
this.dynamicWords.add(w);
this.addWordToTrie(w);
}
}
/**
* Dynamically removes one or more words from the profanity filter at runtime.
*
* @param {string | string[]} word - A single word or array of words to remove from the filter
* @returns {void}
*
* @remarks
* ### Behavior:
* - Removes words from all active data structures (Trie, dynamic words set)
* - Normalizes words based on caseSensitive setting before removal
* - Only removes dynamically added words, not words from loaded language dictionaries
* - Changes take effect immediately for subsequent detect/check/clean calls
*
* ### Important Notes:
* - Cannot remove words from built-in language dictionaries
* - To exclude dictionary words, use `addToWhitelist()` instead
* - Validates and filters out non-string or empty values
*
* @example
* ```typescript
* const filter = new AllProfanity();
*
* // Add then remove a word
* filter.add('tempword');
* filter.check('tempword'); // true
*
* filter.remove('tempword');
* filter.check('tempword'); // false
*
* // Remove multiple words
* filter.remove(['word1', 'word2']);
* ```
*
* @example
* ```typescript
* // Managing custom word list
* const filter = new AllProfanity();
* filter.add(['custom1', 'custom2', 'custom3']);
*
* // Later, remove one that's no longer needed
* filter.remove('custom2');
* ```
*
* @see {@link add} to add words
* @see {@link addToWhitelist} to exclude dictionary words without removing them
*/
remove(word) {
const words = Array.isArray(word) ? word : [word];
const validatedWords = validateStringArray(words, "words to remove");
for (const w of validatedWords) {
const normalizedWord = this.caseSensitive ? w : w.toLowerCase();
this.profanityTrie.removeWord(normalizedWord);
this.dynamicWords.delete(w);
}
}
/**
* Add words to the whitelist.
* @param words - Words to whitelist.
*/
addToWhitelist(words) {
const validatedWords = validateStringArray(words, "whitelist words");
for (const word of validatedWords) {
const normalizedWord = this.caseSensitive ? word : word.toLowerCase();
this.whitelistSet.add(normalizedWord);
}
}
/**
* Remove words from the whitelist.