UNPKG

allprofanity

Version:

A blazing-fast, multi-language profanity filter with advanced algorithms (Aho-Corasick, Bloom Filters) delivering 664% faster performance on large texts, intelligent leet-speak detection, and pattern-based context analysis

1,288 lines (1,287 loc) 64.9 kB
// Language dictionaries imports import englishBadWords from "./languages/english-words.js"; import hindiBadWords from "./languages/hindi-words.js"; import frenchBadWords from "./languages/french-words.js"; import germanBadWords from "./languages/german-words.js"; import spanishBadWords from "./languages/spanish-words.js"; import bengaliBadWords from "./languages/bengali-words.js"; import tamilBadWords from "./languages/tamil-words.js"; import teluguBadWords from "./languages/telugu-words.js"; import brazilianBadWords from "./languages/brazilian-words.js"; // Advanced algorithm imports import { AhoCorasick } from "./algos/aho-corasick.js"; import { BloomFilter } from "./algos/bloom-filter.js"; import { ContextAnalyzer } from "./algos/context-patterns.js"; // Export language dictionaries for direct access export { default as englishBadWords } from "./languages/english-words.js"; export { default as hindiBadWords } from "./languages/hindi-words.js"; export { default as frenchBadWords } from "./languages/french-words.js"; export { default as germanBadWords } from "./languages/german-words.js"; export { default as spanishBadWords } from "./languages/spanish-words.js"; export { default as bengaliBadWords } from "./languages/bengali-words.js"; export { default as tamilBadWords } from "./languages/tamil-words.js"; export { default as teluguBadWords } from "./languages/telugu-words.js"; export { default as brazilianBadWords } from "./languages/brazilian-words.js"; /** * Default console logger implementation for AllProfanity. * * @class ConsoleLogger * @implements {Logger} * @description Logs messages to the browser or Node.js console with an "[AllProfanity]" prefix. * This is the default logger used when no custom logger is provided. * * @internal */ class ConsoleLogger { /** * Log informational messages to console.log with [AllProfanity] prefix. * * @param message - The message to log * @returns void */ info(message) { console.log(`[AllProfanity] ${message}`); } /** * Log warning messages to console.warn with [AllProfanity] prefix. * * @param message - The warning message to log * @returns void */ warn(message) { console.warn(`[AllProfanity] ${message}`); } /** * Log error messages to console.error with [AllProfanity] prefix. * * @param message - The error message to log * @returns void */ error(message) { console.error(`[AllProfanity] ${message}`); } } /** * Silent logger implementation that suppresses all log output. * * @class SilentLogger * @implements {Logger} * @description A no-op logger that discards all log messages. Used when `silent: true` is set * in AllProfanityOptions, or when you want to completely disable logging. * * @internal */ class SilentLogger { /** * No-op implementation - messages are discarded. * * @param _message - The message (unused) * @returns void */ info(_message) { // Silent mode - no logging } /** * No-op implementation - warnings are discarded. * * @param _message - The warning message (unused) * @returns void */ warn(_message) { // Silent mode - no logging } /** * No-op implementation - errors are discarded. * * @param _message - The error message (unused) * @returns void */ error(_message) { // Silent mode - no logging } } /** * Severity levels for profanity detection results. * * @enum {number} * @description Categorizes the severity of detected profanity based on the number * of unique words and total matches found in the text. * * @readonly * @example * ```typescript * const result = filter.detect("some text"); * if (result.severity === ProfanitySeverity.EXTREME) { * // Handle extreme profanity * } * ``` */ export var ProfanitySeverity; (function (ProfanitySeverity) { /** Mild profanity: 1 unique word or 1 total match */ ProfanitySeverity[ProfanitySeverity["MILD"] = 1] = "MILD"; /** Moderate profanity: 2 unique words or 2 total matches */ ProfanitySeverity[ProfanitySeverity["MODERATE"] = 2] = "MODERATE"; /** Severe profanity: 3 unique words or 3 total matches */ ProfanitySeverity[ProfanitySeverity["SEVERE"] = 3] = "SEVERE"; /** Extreme profanity: 4+ unique words or 5+ total matches */ ProfanitySeverity[ProfanitySeverity["EXTREME"] = 4] = "EXTREME"; })(ProfanitySeverity = ProfanitySeverity || (ProfanitySeverity = {})); /** * Validates that an input is a non-empty string. * * @function validateString * @param {unknown} input - The value to validate * @param {string} paramName - Name of the parameter being validated (used in error messages) * @returns {string} The validated string * @throws {TypeError} If input is not a string * * @internal * * @example * ```typescript * const text = validateString(userInput, 'text'); * // Returns userInput if it's a string, throws TypeError otherwise * ``` */ function validateString(input, paramName) { if (typeof input !== "string") { throw new TypeError(`${paramName} must be a string, got ${typeof input}`); } return input; } /** * Validates and filters a string array, removing non-string and empty items. * * @function validateStringArray * @param {unknown} input - The value to validate (expected to be an array) * @param {string} paramName - Name of the parameter being validated (used in error/warning messages) * @returns {string[]} Array of valid, non-empty strings * @throws {TypeError} If input is not an array * * @internal * * @example * ```typescript * const words = validateStringArray(['word1', '', 123, 'word2'], 'words'); * // Returns: ['word1', 'word2'] * // Logs warning: "Skipping non-string item in words: 123" * ``` */ function validateStringArray(input, paramName) { if (!Array.isArray(input)) { throw new TypeError(`${paramName} must be an array`); } return input.filter((item) => { if (typeof item !== "string") { console.warn(`Skipping non-string item in ${paramName}: ${item}`); return false; } return item.trim().length > 0; }); } /** * Trie (prefix tree) node for efficient pattern matching and word storage. * * @class TrieNode * @description Implements a trie data structure for O(m) time complexity word matching, * where m is the length of the word being searched. Each node represents a character * in the word, and paths from root to nodes with isEndOfWord=true represent complete words. * * @internal * * @example * ```typescript * const trie = new TrieNode(); * trie.addWord('bad'); * trie.addWord('badword'); * const matches = trie.findMatches('badwords here', 0, false); * // Returns matches for 'bad' and 'badword' * ``` */ class TrieNode { constructor() { /** Map of characters to child nodes for fast lookups */ this.children = new Map(); /** Flag indicating if this node represents the end of a complete word */ this.isEndOfWord = false; /** The complete word ending at this node (only set when isEndOfWord is true) */ this.word = ""; } /** * Adds a word to the trie structure. * * @param {string} word - The word to add to the trie * @returns {void} * * @remarks * - Time Complexity: O(m) where m is the length of the word * - Space Complexity: O(m) in worst case when all characters are new * - Supports any Unicode characters * * @example * ```typescript * const trie = new TrieNode(); * trie.addWord('hello'); * trie.addWord('world'); * ``` */ addWord(word) { let current = this; for (const char of word) { if (!current.children.has(char)) { current.children.set(char, new TrieNode()); } const nextNode = current.children.get(char); if (nextNode) { current = nextNode; } } current.isEndOfWord = true; current.word = word; } /** * Removes a word from the trie structure. * * @param {string} word - The word to remove from the trie * @returns {boolean} True if the word existed and was removed, false if word was not found * * @remarks * - Time Complexity: O(m) where m is the length of the word * - Also removes unnecessary nodes to keep the trie optimized * - Only removes the word marking; shared prefixes with other words are preserved * * @example * ```typescript * const trie = new TrieNode(); * trie.addWord('hello'); * trie.removeWord('hello'); // Returns: true * trie.removeWord('world'); // Returns: false (word not in trie) * ``` */ removeWord(word) { return this.removeHelper(word, 0); } /** * Recursive helper method for removing a word from the trie. * * @param {string} word - The word being removed * @param {number} index - Current character index in the word * @returns {boolean} True if this node should be deleted (has no children and is not end of another word) * * @internal */ removeHelper(word, index) { if (index === word.length) { if (!this.isEndOfWord) return false; this.isEndOfWord = false; return this.children.size === 0; } const char = word[index]; const node = this.children.get(char); if (!node) return false; const shouldDeleteChild = node.removeHelper(word, index + 1); if (shouldDeleteChild) { this.children.delete(char); return this.children.size === 0 && !this.isEndOfWord; } return false; } /** * Finds all word matches in text starting at a specific position. * * @param {string} text - The text to search for profanity * @param {number} startPos - The starting position (0-based index) in the text * @param {boolean} allowPartial - If true, finds partial matches within larger words * @returns {Array<{ word: string; start: number; end: number }>} Array of match objects with word and position info * * @remarks * - Time Complexity: O(k) where k is the length of the longest match from startPos * - Returns all valid words that can be formed starting from startPos * - When allowPartial is false, respects word boundaries * * @example * ```typescript * const trie = new TrieNode(); * trie.addWord('bad'); * const matches = trie.findMatches('badword', 0, false); * // Returns: [{ word: 'bad', start: 0, end: 3 }] * ``` */ findMatches(text, startPos, allowPartial) { const matches = []; let current = this; let pos = startPos; while (pos < text.length) { const nextNode = current.children.get(text[pos]); if (!nextNode) break; current = nextNode; pos++; if (current.isEndOfWord) { if (!allowPartial) { const wordStart = startPos; const wordEnd = pos; matches.push({ word: current.word, start: wordStart - startPos, end: wordEnd - startPos, }); } else { matches.push({ word: current.word, start: 0, end: pos - startPos, }); } } } return matches; } /** * Clears all words from the trie, resetting it to empty state. * * @returns {void} * * @remarks * - Time Complexity: O(1) - clears the root node only (JavaScript GC handles children) * - Removes all stored words and resets the trie to initial state * * @example * ```typescript * const trie = new TrieNode(); * trie.addWord('hello'); * trie.addWord('world'); * trie.clear(); * // Trie is now empty * ``` */ clear() { this.children.clear(); this.isEndOfWord = false; this.word = ""; } } /** * AllProfanity - Professional-grade multilingual profanity detection and filtering library. * * @class AllProfanity * @description A comprehensive, high-performance profanity filtering system supporting 9+ languages * with advanced features including leet speak detection, context analysis, multiple matching algorithms, * and customizable filtering options. * * @remarks * ### Features: * - **Multi-language Support**: English, Hindi, French, German, Spanish, Bengali, Tamil, Telugu, Brazilian Portuguese * - **Advanced Algorithms**: Trie, Aho-Corasick, Bloom Filter, and hybrid approaches * - **Leet Speak Detection**: Automatically normalizes and detects variations like "h3ll0" * - **Context Analysis**: Reduces false positives using surrounding word context * - **Performance**: Built-in caching and optimized data structures * - **Flexible**: Custom dictionaries, whitelisting, severity levels * * ### Default Behavior: * - Loads English and Hindi dictionaries by default * - Case-insensitive matching * - Leet speak detection enabled * - Uses Trie algorithm (fastest for most cases) * * @example * ```typescript * // Basic usage with default instance * import allProfanity from 'allprofanity'; * * const result = allProfanity.detect("This is some bad text"); * console.log(result.hasProfanity); // true * console.log(result.cleanedText); // "This is some *** text" * console.log(result.severity); // ProfanitySeverity.MILD * ``` * * @example * ```typescript * // Advanced usage with custom configuration * import { AllProfanity, ProfanitySeverity } from 'allprofanity'; * * const filter = new AllProfanity({ * languages: ['english', 'french', 'spanish'], * enableLeetSpeak: true, * strictMode: true, * algorithm: { * matching: 'hybrid', * useBloomFilter: true * }, * performance: { * enableCaching: true, * cacheSize: 500 * }, * whitelistWords: ['class', 'assignment'] * }); * * const text = "This text has some b@d w0rds"; * const result = filter.detect(text); * * if (result.hasProfanity) { * console.log(`Found ${result.detectedWords.length} profane words`); * console.log(`Severity: ${ProfanitySeverity[result.severity]}`); * console.log(`Cleaned: ${result.cleanedText}`); * } * ``` * * @example * ```typescript * // Using individual methods * const filter = new AllProfanity(); * * // Simple check * if (filter.check("some text")) { * console.log("Contains profanity!"); * } * * // Clean with custom placeholder * const cleaned = filter.clean("bad words here", "#"); * * // Load additional languages * filter.loadLanguage('german'); * filter.loadIndianLanguages(); // Loads hindi, bengali, tamil, telugu * * // Add custom words * filter.add(['customword1', 'customword2']); * * // Remove words * filter.remove(['someword']); * * // Whitelist words * filter.addToWhitelist(['class', 'assignment']); * ``` * * @see {@link AllProfanityOptions} for all configuration options * @see {@link ProfanityDetectionResult} for detection result format * @see {@link ProfanitySeverity} for severity levels */ export class AllProfanity { /** * Creates a new AllProfanity instance with the specified configuration. * * @constructor * @param {AllProfanityOptions} [options] - Configuration options for profanity detection behavior * * @remarks * ### Default Initialization: * - Loads English and Hindi dictionaries automatically * - Enables leet speak detection * - Case-insensitive matching * - Uses Trie algorithm for pattern matching * * ### Performance Considerations: * - Initial load time depends on number of languages loaded * - Aho-Corasick automaton (if enabled) is built during construction * - Bloom Filter (if enabled) is populated during construction * * @throws {TypeError} If invalid options are provided * * @example * ```typescript * // Default instance * const filter = new AllProfanity(); * * // Custom configuration * const filter = new AllProfanity({ * languages: ['english', 'french'], * strictMode: true, * defaultPlaceholder: '#', * algorithm: { matching: 'hybrid' } * }); * * // Silent mode (no logging) * const filter = new AllProfanity({ silent: true }); * ``` * * @see {@link AllProfanityOptions} for all available configuration options */ constructor(options) { var _a, _b, _c, _d, _e; this.profanityTrie = new TrieNode(); this.whitelistSet = new Set(); this.loadedLanguages = new Set(); this.defaultPlaceholder = "*"; this.enableLeetSpeak = true; this.caseSensitive = false; this.strictMode = false; this.detectPartialWords = false; this.availableLanguages = { english: englishBadWords || [], hindi: hindiBadWords || [], french: frenchBadWords || [], german: germanBadWords || [], spanish: spanishBadWords || [], bengali: bengaliBadWords || [], tamil: tamilBadWords || [], telugu: teluguBadWords || [], brazilian: brazilianBadWords || [], }; this.leetMappings = new Map([ ["@", "a"], ["^", "a"], ["4", "a"], ["8", "b"], ["6", "b"], ["|3", "b"], ["(", "c"], ["<", "c"], ["©", "c"], ["|)", "d"], ["0", "o"], ["3", "e"], ["€", "e"], ["|=", "f"], ["ph", "f"], ["9", "g"], ["#", "h"], ["|-|", "h"], ["1", "i"], ["!", "i"], ["|", "i"], ["_|", "j"], ["¿", "j"], ["|<", "k"], ["1<", "k"], ["7", "l"], ["|\\/|", "m"], ["/\\/\\", "m"], ["|\\|", "n"], ["//", "n"], ["()", "o"], ["|*", "p"], ["|o", "p"], ["(_,)", "q"], ["()_", "q"], ["|2", "r"], ["12", "r"], ["5", "s"], ["$", "s"], ["z", "s"], ["7", "t"], ["+", "t"], ["†", "t"], ["|_|", "u"], ["(_)", "u"], ["v", "u"], ["\\/", "v"], ["|/", "v"], ["\\/\\/", "w"], ["vv", "w"], ["><", "x"], ["}{", "x"], ["`/", "y"], ["j", "y"], ["2", "z"], ["7_", "z"], ]); this.dynamicWords = new Set(); // Advanced algorithms this.ahoCorasickAutomaton = null; this.bloomFilter = null; this.contextAnalyzer = null; this.matchingAlgorithm = "trie"; this.resultCache = null; // Use silent logger if silent mode is enabled, otherwise use provided logger or console logger this.logger = (options === null || options === void 0 ? void 0 : options.logger) || ((options === null || options === void 0 ? void 0 : options.silent) ? new SilentLogger() : new ConsoleLogger()); if ((options === null || options === void 0 ? void 0 : options.defaultPlaceholder) !== undefined) { this.setPlaceholder(options.defaultPlaceholder); } this.enableLeetSpeak = (_a = options === null || options === void 0 ? void 0 : options.enableLeetSpeak) !== null && _a !== void 0 ? _a : true; this.caseSensitive = (_b = options === null || options === void 0 ? void 0 : options.caseSensitive) !== null && _b !== void 0 ? _b : false; this.strictMode = (_c = options === null || options === void 0 ? void 0 : options.strictMode) !== null && _c !== void 0 ? _c : false; this.detectPartialWords = (_d = options === null || options === void 0 ? void 0 : options.detectPartialWords) !== null && _d !== void 0 ? _d : false; if (options === null || options === void 0 ? void 0 : options.whitelistWords) { this.addToWhitelist(options.whitelistWords); } // Initialize advanced algorithms BEFORE loading dictionaries // so that words can be added to all data structures this.initializeAdvancedAlgorithms(options); this.loadLanguage("english"); this.loadLanguage("hindi"); if ((_e = options === null || options === void 0 ? void 0 : options.languages) === null || _e === void 0 ? void 0 : _e.length) { options.languages.forEach((lang) => this.loadLanguage(lang)); } if (options === null || options === void 0 ? void 0 : options.customDictionaries) { Object.entries(options.customDictionaries).forEach(([name, words]) => { this.loadCustomDictionary(name, words); }); } } /** * Initialize advanced algorithms based on configuration */ initializeAdvancedAlgorithms(options) { var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m; // Set matching algorithm if ((_a = options === null || options === void 0 ? void 0 : options.algorithm) === null || _a === void 0 ? void 0 : _a.matching) { this.matchingAlgorithm = options.algorithm.matching; } // Initialize Bloom Filter if enabled const bloomEnabled = ((_b = options === null || options === void 0 ? void 0 : options.algorithm) === null || _b === void 0 ? void 0 : _b.useBloomFilter) || ((_c = options === null || options === void 0 ? void 0 : options.bloomFilter) === null || _c === void 0 ? void 0 : _c.enabled) || this.matchingAlgorithm === "hybrid"; if (bloomEnabled) { const expectedItems = ((_d = options === null || options === void 0 ? void 0 : options.bloomFilter) === null || _d === void 0 ? void 0 : _d.expectedItems) || 10000; const falsePositiveRate = ((_e = options === null || options === void 0 ? void 0 : options.bloomFilter) === null || _e === void 0 ? void 0 : _e.falsePositiveRate) || 0.01; this.bloomFilter = new BloomFilter(expectedItems, falsePositiveRate); this.logger.info(`Bloom Filter initialized with ${expectedItems} expected items and ${(falsePositiveRate * 100).toFixed(2)}% false positive rate`); } // Initialize Aho-Corasick if enabled const ahoEnabled = ((_f = options === null || options === void 0 ? void 0 : options.algorithm) === null || _f === void 0 ? void 0 : _f.useAhoCorasick) || ((_g = options === null || options === void 0 ? void 0 : options.ahoCorasick) === null || _g === void 0 ? void 0 : _g.enabled) || this.matchingAlgorithm === "aho-corasick" || this.matchingAlgorithm === "hybrid"; if (ahoEnabled) { this.ahoCorasickAutomaton = new AhoCorasick([]); this.logger.info("Aho-Corasick automaton initialized"); } // Initialize Context Analyzer if enabled const contextEnabled = ((_h = options === null || options === void 0 ? void 0 : options.algorithm) === null || _h === void 0 ? void 0 : _h.useContextAnalysis) || ((_j = options === null || options === void 0 ? void 0 : options.contextAnalysis) === null || _j === void 0 ? void 0 : _j.enabled); if (contextEnabled) { const contextLanguages = ((_k = options === null || options === void 0 ? void 0 : options.contextAnalysis) === null || _k === void 0 ? void 0 : _k.languages) || ["en"]; this.contextAnalyzer = new ContextAnalyzer(contextLanguages); if ((_l = options === null || options === void 0 ? void 0 : options.contextAnalysis) === null || _l === void 0 ? void 0 : _l.contextWindow) { this.contextAnalyzer.setContextWindow(options.contextAnalysis.contextWindow); } this.logger.info(`Context Analyzer initialized for languages: ${contextLanguages.join(", ")}`); } // Initialize result cache if enabled if ((_m = options === null || options === void 0 ? void 0 : options.performance) === null || _m === void 0 ? void 0 : _m.enableCaching) { const cacheSize = options.performance.cacheSize || 1000; this.resultCache = new Map(); this.logger.info(`Result caching enabled with size limit: ${cacheSize}`); } } /** * Normalize leet speak to regular characters. * @param text - The input text. * @returns Normalized text. */ normalizeLeetSpeak(text) { if (!this.enableLeetSpeak) return text; let normalized = text.toLowerCase(); const sortedMappings = Array.from(this.leetMappings.entries()).sort(([leetA], [leetB]) => leetB.length - leetA.length); for (const [leet, normal] of sortedMappings) { const regex = new RegExp(this.escapeRegex(leet), "g"); normalized = normalized.replace(regex, normal); } return normalized; } /** * Escape regex special characters in a string. * @param str - The string to escape. * @returns The escaped string. */ escapeRegex(str) { return str.replace(/[\\^$.*+?()[\]{}|]/g, "\\$&"); } /** * Check if a match is bounded by word boundaries (strict mode). * @param text - The text. * @param start - Start index. * @param end - End index. * @returns True if match is at word boundaries, false otherwise. */ hasWordBoundaries(text, start, end) { if (!this.strictMode) return true; const beforeChar = start > 0 ? text[start - 1] : " "; const afterChar = end < text.length ? text[end] : " "; const wordBoundaryRegex = /[\s\p{P}\p{S}]/u; return (wordBoundaryRegex.test(beforeChar) && wordBoundaryRegex.test(afterChar)); } /** * Determine if a match is a whole word. * @param text - The text. * @param start - Start index. * @param end - End index. * @returns True if whole word, false otherwise. */ isWholeWord(text, start, end) { if (start !== 0 && /\w/.test(text[start - 1])) return false; if (end !== text.length && /\w/.test(text[end])) return false; return true; } /** * Check if a match is whitelisted. * @param word - Word from dictionary. * @param matchedText - Actual matched text. * @returns True if whitelisted, false otherwise. */ isWhitelistedMatch(word, matchedText) { if (this.caseSensitive) { return this.whitelistSet.has(word) || this.whitelistSet.has(matchedText); } else { return (this.whitelistSet.has(word.toLowerCase()) || this.whitelistSet.has(matchedText.toLowerCase())); } } /** * Remove overlapping matches, keeping only the longest at each start position. * @param matches - Array of match results. * @returns Deduplicated matches. */ deduplicateMatches(matches) { const sorted = [...matches].sort((a, b) => { if (a.start !== b.start) return a.start - b.start; return b.end - a.end; }); const result = []; let lastEnd = -1; for (const match of sorted) { if (match.start >= lastEnd) { result.push(match); lastEnd = match.end; } } return result; } /** * Use Aho-Corasick algorithm for pattern matching */ findMatchesWithAhoCorasick(searchText, originalText) { if (!this.ahoCorasickAutomaton) { return []; } const ahoMatches = this.ahoCorasickAutomaton.findAll(searchText); const results = []; for (const match of ahoMatches) { if (!this.detectPartialWords && !this.isWholeWord(originalText, match.start, match.end)) { continue; } const matchedText = originalText.substring(match.start, match.end); if (this.isWhitelistedMatch(match.pattern, matchedText)) { continue; } if (this.hasWordBoundaries(originalText, match.start, match.end)) { results.push({ word: match.pattern, start: match.start, end: match.end, originalWord: matchedText, }); } } return results; } /** * Hybrid approach: Aho-Corasick for fast matching, Bloom Filter for validation */ findMatchesHybrid(searchText, originalText) { // Use Aho-Corasick for primary matching if available if (this.ahoCorasickAutomaton) { const matches = this.findMatchesWithAhoCorasick(searchText, originalText); // If Bloom Filter is enabled, validate matches if (this.bloomFilter) { return matches.filter((match) => this.bloomFilter.mightContain(match.word)); } return matches; } // Fallback to Trie if Aho-Corasick not available const matches = []; this.findMatches(searchText, originalText, matches); // Validate with Bloom Filter if enabled if (this.bloomFilter) { return matches.filter((match) => this.bloomFilter.mightContain(match.word)); } return matches; } /** * Apply context analysis to filter false positives */ applyContextAnalysis(text, matches, scoreThreshold = 0.5) { if (!this.contextAnalyzer) { return matches; } return matches.filter((match) => { const analysis = this.contextAnalyzer.analyzeContext(text, match.start, match.end, match.word); // If score is above threshold, it's likely profanity return analysis.score >= scoreThreshold; }); } /** * Detects profanity in the provided text and returns comprehensive analysis. * * @param {string} text - The text to analyze for profanity * @returns {ProfanityDetectionResult} Detailed detection result including matches, positions, severity, and cleaned text * * @throws {TypeError} If text is not a string * * @remarks * ### Performance: * - Time Complexity: O(n*m) where n is text length, m is average word length in dictionary * - With Bloom Filter: O(n) average case (faster early rejection) * - With Caching: O(1) for repeated identical text * * ### Features: * - Detects leet speak variations (if enabled): "h3ll0" → "hello" * - Respects word boundaries (strict mode) or detects partial matches * - Returns exact positions for highlighting/masking * - Calculates severity based on match count and uniqueness * * ### Caching: * - Results are cached if `performance.enableCaching` is true * - Cache uses LRU eviction when size limit is reached * * @example * ```typescript * const filter = new AllProfanity(); * const result = filter.detect("This has bad words"); * * console.log(result.hasProfanity); // true * console.log(result.detectedWords); // ['bad'] * console.log(result.cleanedText); // 'This has *** words' * console.log(result.severity); // ProfanitySeverity.MILD * console.log(result.positions); // [{ word: 'bad', start: 9, end: 12 }] * ``` * * @example * ```typescript * // With leet speak detection * const filter = new AllProfanity({ enableLeetSpeak: true }); * const result = filter.detect("st0p b3ing b@d"); * * if (result.hasProfanity) { * result.positions.forEach(pos => { * console.log(`Found "${pos.word}" at position ${pos.start}-${pos.end}`); * }); * } * ``` * * @see {@link ProfanityDetectionResult} for result structure * @see {@link ProfanitySeverity} for severity levels */ detect(text) { var _a; const validatedText = validateString(text, "text"); if (validatedText.length === 0) { return { hasProfanity: false, detectedWords: [], cleanedText: validatedText, severity: ProfanitySeverity.MILD, positions: [], }; } // Check cache first if enabled if ((_a = this.resultCache) === null || _a === void 0 ? void 0 : _a.has(validatedText)) { return this.resultCache.get(validatedText); } let matches = []; const normalizedText = this.caseSensitive ? validatedText : validatedText.toLowerCase(); // Choose matching algorithm based on configuration switch (this.matchingAlgorithm) { case "aho-corasick": matches = this.findMatchesWithAhoCorasick(normalizedText, validatedText); if (this.enableLeetSpeak) { const leetNormalized = this.normalizeLeetSpeak(normalizedText); if (leetNormalized !== normalizedText) { const leetMatches = this.findMatchesWithAhoCorasick(leetNormalized, validatedText); matches.push(...leetMatches); } } break; case "hybrid": matches = this.findMatchesHybrid(normalizedText, validatedText); if (this.enableLeetSpeak) { const leetNormalized = this.normalizeLeetSpeak(normalizedText); if (leetNormalized !== normalizedText) { const leetMatches = this.findMatchesHybrid(leetNormalized, validatedText); matches.push(...leetMatches); } } break; case "trie": default: this.findMatches(normalizedText, validatedText, matches); if (this.enableLeetSpeak) { const leetNormalized = this.normalizeLeetSpeak(normalizedText); if (leetNormalized !== normalizedText) { this.findMatches(leetNormalized, validatedText, matches); } } break; } // Apply context analysis if enabled if (this.contextAnalyzer) { matches = this.applyContextAnalysis(validatedText, matches); } const uniqueMatches = this.deduplicateMatches(matches); const detectedWords = uniqueMatches.map((m) => m.originalWord); const severity = this.calculateSeverity(uniqueMatches); const cleanedText = this.generateCleanedText(validatedText, uniqueMatches); const result = { hasProfanity: uniqueMatches.length > 0, detectedWords, cleanedText, severity, positions: uniqueMatches.map((m) => ({ word: m.originalWord, start: m.start, end: m.end, })), }; // Cache result if caching is enabled if (this.resultCache) { this.resultCache.set(validatedText, result); // Implement simple LRU by clearing cache when it gets too large if (this.resultCache.size > 1000) { const firstKey = this.resultCache.keys().next().value; if (firstKey !== undefined) { this.resultCache.delete(firstKey); } } } return result; } /** * Main matching function, with whole-word logic. * @param searchText - The normalized text to search. * @param originalText - The original text. * @param matches - Array to collect matches. */ findMatches(searchText, originalText, matches) { for (let i = 0; i < searchText.length; i++) { const matchResults = this.profanityTrie.findMatches(searchText, i, this.detectPartialWords); for (const match of matchResults) { const start = i + match.start; const end = i + match.end; if (!this.detectPartialWords && !this.isWholeWord(originalText, start, end)) { continue; } const matchedText = originalText.substring(start, end); if (this.isWhitelistedMatch(match.word, matchedText)) { continue; } if (this.hasWordBoundaries(originalText, start, end)) { matches.push({ word: match.word, start, end, originalWord: matchedText, }); } } } } /** * Generate cleaned text by replacing profane words. * @param originalText - The original text. * @param matches - Array of matches. * @returns Cleaned text. */ generateCleanedText(originalText, matches) { if (matches.length === 0) return originalText; let result = originalText; const sortedMatches = [...this.deduplicateMatches(matches)].sort((a, b) => b.start - a.start); for (const match of sortedMatches) { const replacement = this.defaultPlaceholder.repeat(match.originalWord.length); result = result.substring(0, match.start) + replacement + result.substring(match.end); } return result; } /** * Quick boolean check for profanity presence in text. * * @param {string} text - The text to check for profanity * @returns {boolean} True if profanity is detected, false otherwise * * @throws {TypeError} If text is not a string * * @remarks * - Convenience method that internally calls `detect()` and returns only the boolean result * - For detailed information about matches, use `detect()` instead * - Results are cached if caching is enabled (same cache as `detect()`) * * @example * ```typescript * const filter = new AllProfanity(); * * if (filter.check("This has bad words")) { * console.log("Profanity detected!"); * } * * // Quick validation * const isClean = !filter.check(userInput); * ``` * * @see {@link detect} for detailed profanity analysis */ check(text) { return this.detect(text).hasProfanity; } /** * Cleans text by replacing profanity with a placeholder character. * * @param {string} text - The text to clean * @param {string} [placeholder] - Optional custom placeholder character (uses default if not provided) * @returns {string} The cleaned text with profanity replaced * * @throws {TypeError} If text is not a string * * @remarks * ### Character-level Replacement: * - Each profane character is replaced individually * - "bad" with placeholder "*" becomes "***" * - Preserves text length and structure * * ### Placeholder Behavior: * - If no placeholder provided, uses the instance's default placeholder * - If placeholder provided, uses only the first character * - Empty placeholder throws error * * @example * ```typescript * const filter = new AllProfanity(); * * // Using default placeholder (*) * const cleaned = filter.clean("This has bad words"); * console.log(cleaned); // "This has *** *****" * * // Using custom placeholder * const cleaned = filter.clean("This has bad words", "#"); * console.log(cleaned); // "This has ### #####" * ``` * * @example * ```typescript * // Clean user-generated content for display * const userComment = "Some inappropriate words here"; * const safeComment = filter.clean(userComment); * displayComment(safeComment); * ``` * * @see {@link cleanWithPlaceholder} for word-level replacement * @see {@link setPlaceholder} to change default placeholder */ clean(text, placeholder) { const detection = this.detect(text); if (!placeholder || placeholder === this.defaultPlaceholder) { return detection.cleanedText; } let result = text; const sortedPositions = [ ...this.deduplicateMatches(detection.positions.map((p) => ({ word: p.word, start: p.start, end: p.end, originalWord: text.substring(p.start, p.end), }))), ].sort((a, b) => b.start - a.start); for (const pos of sortedPositions) { const originalWord = text.substring(pos.start, pos.end); const replacement = placeholder.repeat(originalWord.length); result = result.substring(0, pos.start) + replacement + result.substring(pos.end); } return result; } /** * Cleans text by replacing each profane word with a single placeholder string (word-level replacement). * * @param {string} text - The text to clean * @param {string} [placeholder="***"] - The placeholder string to use for each profane word * @returns {string} The cleaned text with each profane word replaced by the placeholder * * @throws {TypeError} If text is not a string * * @remarks * ### Word-level Replacement: * - Each profane word is replaced with the entire placeholder string (not character-by-character) * - "bad words" with placeholder "***" becomes "*** ***" * - Does NOT preserve original text length * * ### Difference from `clean()`: * - `clean()`: Character-level replacement - "bad" becomes "***" (preserves length) * - `cleanWithPlaceholder()`: Word-level replacement - "bad" becomes "***" (fixed placeholder) * * @example * ```typescript * const filter = new AllProfanity(); * * // Default placeholder (***) const text = "This has bad words"; * const cleaned = filter.cleanWithPlaceholder(text); * console.log(cleaned); // "This has *** ***" * * // Custom placeholder * const cleaned2 = filter.cleanWithPlaceholder(text, "[CENSORED]"); * console.log(cleaned2); // "This has [CENSORED] [CENSORED]" * ``` * * @example * ```typescript * // Censoring chat messages * const message = "You are a badword and stupid"; * const censored = filter.cleanWithPlaceholder(message, "[***]"); * // Result: "You are a [***] and [***]" * ``` * * @see {@link clean} for character-level replacement */ cleanWithPlaceholder(text, placeholder = "***") { const detection = this.detect(text); if (detection.positions.length === 0) return text; let result = text; const sortedPositions = [ ...this.deduplicateMatches(detection.positions.map((p) => ({ word: p.word, start: p.start, end: p.end, originalWord: text.substring(p.start, p.end), }))), ].sort((a, b) => b.start - a.start); for (const pos of sortedPositions) { if (!this.isWholeWord(result, pos.start, pos.end)) continue; result = result.substring(0, pos.start) + placeholder + result.substring(pos.end); } return result; } /** * Dynamically adds one or more words to the profanity filter at runtime. * * @param {string | string[]} word - A single word or array of words to add to the filter * @returns {void} * * @remarks * ### Behavior: * - Words are added to all active data structures (Trie, Aho-Corasick, Bloom Filter) * - Automatically normalizes words based on caseSensitive setting * - Skips whitelisted words * - Validates and filters out non-string or empty values * - Changes take effect immediately for subsequent detect/check/clean calls * * ### Use Cases: * - Adding context-specific profanity * - Building dynamic word lists from user reports * - Customizing filters for specific communities/applications * * @example * ```typescript * const filter = new AllProfanity(); * * // Add single word * filter.add('newbadword'); * * // Add multiple words * filter.add(['word1', 'word2', 'word3']); * * // Now these words will be detected * filter.check('newbadword'); // true * ``` * * @example * ```typescript * // Add game-specific slang dynamically * const filter = new AllProfanity(); * const gamingSlang = ['noob', 'trash', 'tryhard']; * filter.add(gamingSlang); * * const message = "You're such a noob"; * console.log(filter.check(message)); // true * ``` * * @see {@link remove} to remove words * @see {@link loadCustomDictionary} for loading named dictionaries */ add(word) { const words = Array.isArray(word) ? word : [word]; const validatedWords = validateStringArray(words, "words to add"); for (const w of validatedWords) { this.dynamicWords.add(w); this.addWordToTrie(w); } } /** * Dynamically removes one or more words from the profanity filter at runtime. * * @param {string | string[]} word - A single word or array of words to remove from the filter * @returns {void} * * @remarks * ### Behavior: * - Removes words from all active data structures (Trie, dynamic words set) * - Normalizes words based on caseSensitive setting before removal * - Only removes dynamically added words, not words from loaded language dictionaries * - Changes take effect immediately for subsequent detect/check/clean calls * * ### Important Notes: * - Cannot remove words from built-in language dictionaries * - To exclude dictionary words, use `addToWhitelist()` instead * - Validates and filters out non-string or empty values * * @example * ```typescript * const filter = new AllProfanity(); * * // Add then remove a word * filter.add('tempword'); * filter.check('tempword'); // true * * filter.remove('tempword'); * filter.check('tempword'); // false * * // Remove multiple words * filter.remove(['word1', 'word2']); * ``` * * @example * ```typescript * // Managing custom word list * const filter = new AllProfanity(); * filter.add(['custom1', 'custom2', 'custom3']); * * // Later, remove one that's no longer needed * filter.remove('custom2'); * ``` * * @see {@link add} to add words * @see {@link addToWhitelist} to exclude dictionary words without removing them */ remove(word) { const words = Array.isArray(word) ? word : [word]; const validatedWords = validateStringArray(words, "words to remove"); for (const w of validatedWords) { const normalizedWord = this.caseSensitive ? w : w.toLowerCase(); this.profanityTrie.removeWord(normalizedWord); this.dynamicWords.delete(w); } } /** * Add words to the whitelist. * @param words - Words to whitelist. */ addToWhitelist(words) { const validatedWords = validateStringArray(words, "whitelist words"); for (const word of validatedWords) { const normalizedWord = this.caseSensitive ? word : word.toLowerCase(); this.whitelistSet.add(normalizedWord); } } /** * Remove words from the whitelist.