UNPKG

word-sensor

Version:

A powerful and flexible word filtering library for JavaScript/TypeScript with advanced features like regex patterns, statistics, and batch processing

438 lines 12.9 kB
// src/index.ts var WordSensor = class { forbiddenWords; regexPatterns; maskChar; caseInsensitive; logDetections; enableRegex; wordBoundary; customReplacer; detectionLogs; detectionStats; constructor(config = {}) { const { words = [], maskChar = "*", caseInsensitive = true, logDetections = false, enableRegex = false, wordBoundary = true, customReplacer } = config; this.forbiddenWords = /* @__PURE__ */ new Map(); this.regexPatterns = /* @__PURE__ */ new Map(); this.maskChar = maskChar; this.caseInsensitive = caseInsensitive; this.logDetections = logDetections; this.enableRegex = enableRegex; this.wordBoundary = wordBoundary; this.customReplacer = customReplacer; this.detectionLogs = []; this.detectionStats = { totalDetections: 0, uniqueWords: [], detectionCounts: {} }; words.forEach((word) => this.addWord(word)); } addWord(word, mask) { const key = this.caseInsensitive ? word.toLowerCase() : word; this.forbiddenWords.set(key, mask ?? null); if (this.enableRegex) { try { const pattern = this.wordBoundary ? `\\b${word.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&")}\\b` : word; const flags = this.caseInsensitive ? "gi" : "g"; this.regexPatterns.set(key, new RegExp(pattern, flags)); } catch (error) { console.warn(`Invalid regex pattern for word "${word}":`, error); } } } addWords(words) { words.forEach((word) => this.addWord(word)); } addRegexPattern(pattern, mask) { if (!this.enableRegex) { throw new Error("Regex patterns are not enabled. Set enableRegex: true in config."); } try { const key = this.caseInsensitive ? pattern.toLowerCase() : pattern; this.forbiddenWords.set(key, mask ?? null); const flags = this.caseInsensitive ? "gi" : "g"; this.regexPatterns.set(key, new RegExp(pattern, flags)); } catch (error) { throw new Error(`Invalid regex pattern: ${error}`); } } removeWord(word) { const key = this.caseInsensitive ? word.toLowerCase() : word; this.forbiddenWords.delete(key); this.regexPatterns.delete(key); } removeWords(words) { words.forEach((word) => this.removeWord(word)); } clearWords() { this.forbiddenWords.clear(); this.regexPatterns.clear(); this.resetStats(); } getWords() { return Array.from(this.forbiddenWords.keys()); } hasWord(word) { const key = this.caseInsensitive ? word.toLowerCase() : word; return this.forbiddenWords.has(key); } applyMask(word, maskType) { if (maskType === "partial" && word.length > 2) { return word[0] + this.maskChar.repeat(word.length - 2) + word[word.length - 1]; } else if (maskType === "smart") { if (word.length <= 2) return this.maskChar.repeat(word.length); return word[0] + this.maskChar.repeat(Math.max(1, Math.floor(word.length * 0.6))) + word[word.length - 1]; } return this.maskChar.repeat(word.length); } updateStats(word) { this.detectionStats.totalDetections++; this.detectionStats.lastDetectionTime = /* @__PURE__ */ new Date(); const key = this.caseInsensitive ? word.toLowerCase() : word; if (!this.detectionStats.uniqueWords.includes(key)) { this.detectionStats.uniqueWords.push(key); } this.detectionStats.detectionCounts[key] = (this.detectionStats.detectionCounts[key] || 0) + 1; } filter(text, mode = "replace", maskType = "full") { if (this.forbiddenWords.size === 0) return text; if (this.enableRegex) { return this.filterWithRegex(text, mode, maskType); } else { return this.filterWithWords(text, mode, maskType); } } filterWithWords(text, mode, maskType) { const pattern = this.wordBoundary ? `\\b(${[...this.forbiddenWords.keys()].map((w) => w.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&")).join("|")})\\b` : `(${[...this.forbiddenWords.keys()].map((w) => w.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&")).join("|")})`; const regex = new RegExp(pattern, this.caseInsensitive ? "gi" : "g"); return text.replace(regex, (match) => { const key = this.caseInsensitive ? match.toLowerCase() : match; if (this.logDetections) { this.detectionLogs.push(match); } this.updateStats(match); if (mode === "remove") return ""; if (mode === "highlight") return `[FILTERED: ${match}]`; if (this.customReplacer) { return this.customReplacer(match, text); } return this.forbiddenWords.get(key) ?? this.applyMask(match, maskType); }); } filterWithRegex(text, mode, maskType) { let result = text; for (const [key, regex] of this.regexPatterns) { result = result.replace(regex, (match) => { if (this.logDetections) { this.detectionLogs.push(match); } this.updateStats(match); if (mode === "remove") return ""; if (mode === "highlight") return `[FILTERED: ${match}]`; if (this.customReplacer) { return this.customReplacer(match, text); } return this.forbiddenWords.get(key) ?? this.applyMask(match, maskType); }); } return result; } detect(text) { if (this.forbiddenWords.size === 0) return []; if (this.enableRegex) { return this.detectWithRegex(text); } else { return this.detectWithWords(text); } } detectWithWords(text) { const pattern = this.wordBoundary ? `\\b(${[...this.forbiddenWords.keys()].map((w) => w.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&")).join("|")})\\b` : `(${[...this.forbiddenWords.keys()].map((w) => w.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&")).join("|")})`; const regex = new RegExp(pattern, this.caseInsensitive ? "gi" : "g"); const matches = []; let match; while ((match = regex.exec(text)) !== null) { matches.push(match[0]); } return matches; } detectWithRegex(text) { const matches = []; for (const regex of this.regexPatterns.values()) { let match; while ((match = regex.exec(text)) !== null) { matches.push(match[0]); } } return matches; } detectWithPositions(text) { if (this.forbiddenWords.size === 0) return []; const positions = []; if (this.enableRegex) { for (const [key, regex] of this.regexPatterns) { let match; while ((match = regex.exec(text)) !== null) { positions.push({ word: match[0], start: match.index, end: match.index + match[0].length }); } } } else { const pattern = this.wordBoundary ? `\\b(${[...this.forbiddenWords.keys()].map((w) => w.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&")).join("|")})\\b` : `(${[...this.forbiddenWords.keys()].map((w) => w.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&")).join("|")})`; const regex = new RegExp(pattern, this.caseInsensitive ? "gi" : "g"); let match; while ((match = regex.exec(text)) !== null) { positions.push({ word: match[0], start: match.index, end: match.index + match[0].length }); } } return positions.sort((a, b) => a.start - b.start); } getDetectionLogs() { return [...this.detectionLogs]; } getStats() { return { ...this.detectionStats }; } resetStats() { this.detectionStats = { totalDetections: 0, uniqueWords: [], detectionCounts: {} }; this.detectionLogs = []; } setMaskChar(char) { this.maskChar = char; } setCaseInsensitive(value) { this.caseInsensitive = value; } setLogDetections(value) { this.logDetections = value; } setCustomReplacer(replacer) { this.customReplacer = replacer; } // Utility methods sanitizeText(text) { return this.filter(text, "replace", "full"); } isClean(text) { return this.detect(text).length === 0; } getCleanPercentage(text) { const detected = this.detect(text); const totalWords = text.split(/\s+/).length; return totalWords > 0 ? (totalWords - detected.length) / totalWords * 100 : 100; } }; var PRESET_WORDS = { profanity: [ "badword", "offensive", "rude", "vulgar", "inappropriate", "curse", "swear", "expletive", "obscene", "lewd" ], spam: [ "buy now", "click here", "free money", "make money fast", "weight loss", "viagra", "casino", "lottery", "winner" ], phishing: [ "verify account", "update password", "security alert", "suspended account", "unusual activity", "login attempt" ] }; function createWordSensor(config = {}) { return new WordSensor(config); } function createProfanityFilter(maskChar = "*") { return new WordSensor({ words: PRESET_WORDS.profanity, maskChar, caseInsensitive: true, logDetections: true }); } function createSpamFilter(maskChar = "#") { return new WordSensor({ words: PRESET_WORDS.spam, maskChar, caseInsensitive: true, logDetections: true, wordBoundary: false }); } function createPhishingFilter(maskChar = "!") { return new WordSensor({ words: PRESET_WORDS.phishing, maskChar, caseInsensitive: true, logDetections: true, wordBoundary: false }); } function getNestedValue(obj, path) { return path.split(".").reduce((acc, key) => acc && acc[key] !== void 0 ? acc[key] : void 0, obj); } async function loadForbiddenWordsFromAPI(url, path, sensor) { try { const response = await fetch(url); if (!response.ok) { throw new Error(`Failed to fetch: ${response.statusText}`); } const data = await response.json(); let words = []; if (Array.isArray(data)) { words = data; } else if (path) { words = getNestedValue(data, path) ?? []; } if (!Array.isArray(words)) { throw new Error("Invalid words format from API"); } sensor.addWords(words); console.log("Forbidden words added from API:", words); return true; } catch (error) { console.error("Error loading forbidden words:", error); return false; } } async function loadWordsFromFile(file) { return new Promise((resolve, reject) => { const reader = new FileReader(); reader.onload = (e) => { try { const content = e.target?.result; const words = content.split("\n").map((line) => line.trim()).filter((line) => line.length > 0 && !line.startsWith("#")); resolve(words); } catch (error) { reject(error); } }; reader.onerror = () => reject(new Error("Failed to read file")); reader.readAsText(file); }); } function validateRegexPattern(pattern) { try { new RegExp(pattern); return true; } catch { return false; } } function escapeRegexSpecialChars(str) { return str.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&"); } function createCustomReplacer(replacementMap) { return (word) => { const key = word.toLowerCase(); return replacementMap[key] || word; }; } function createEmojiReplacer() { const emojiMap = { "badword": "\u{1F92C}", "offensive": "\u{1F624}", "rude": "\u{1F612}", "vulgar": "\u{1F922}", "inappropriate": "\u{1F633}", "curse": "\u{1F4A2}", "swear": "\u{1F620}", "expletive": "\u{1F92F}", "obscene": "\u{1F631}", "lewd": "\u{1F635}" }; return (word) => { const key = word.toLowerCase(); return emojiMap[key] || "\u{1F910}"; }; } function batchFilter(texts, sensor, mode = "replace", maskType = "full") { return texts.map((text) => sensor.filter(text, mode, maskType)); } function batchDetect(texts, sensor) { return texts.map((text) => ({ text, detected: sensor.detect(text) })); } function getBatchStats(texts, sensor) { const results = texts.map((text) => ({ text, detected: sensor.detect(text), cleanPercentage: sensor.getCleanPercentage(text) })); const totalTexts = results.length; const cleanTexts = results.filter((r) => r.detected.length === 0).length; const dirtyTexts = totalTexts - cleanTexts; const totalDetections = results.reduce((sum, r) => sum + r.detected.length, 0); const averageCleanPercentage = results.reduce((sum, r) => sum + r.cleanPercentage, 0) / totalTexts; return { totalTexts, cleanTexts, dirtyTexts, totalDetections, averageCleanPercentage }; } export { PRESET_WORDS, WordSensor, batchDetect, batchFilter, createCustomReplacer, createEmojiReplacer, createPhishingFilter, createProfanityFilter, createSpamFilter, createWordSensor, escapeRegexSpecialChars, getBatchStats, getNestedValue, loadForbiddenWordsFromAPI, loadWordsFromFile, validateRegexPattern }; //# sourceMappingURL=index.mjs.map