word-sensor
Version:
A powerful and flexible word filtering library for JavaScript/TypeScript with advanced features like regex patterns, statistics, and batch processing
438 lines • 12.9 kB
JavaScript
// src/index.ts
var WordSensor = class {
forbiddenWords;
regexPatterns;
maskChar;
caseInsensitive;
logDetections;
enableRegex;
wordBoundary;
customReplacer;
detectionLogs;
detectionStats;
constructor(config = {}) {
const {
words = [],
maskChar = "*",
caseInsensitive = true,
logDetections = false,
enableRegex = false,
wordBoundary = true,
customReplacer
} = config;
this.forbiddenWords = /* @__PURE__ */ new Map();
this.regexPatterns = /* @__PURE__ */ new Map();
this.maskChar = maskChar;
this.caseInsensitive = caseInsensitive;
this.logDetections = logDetections;
this.enableRegex = enableRegex;
this.wordBoundary = wordBoundary;
this.customReplacer = customReplacer;
this.detectionLogs = [];
this.detectionStats = {
totalDetections: 0,
uniqueWords: [],
detectionCounts: {}
};
words.forEach((word) => this.addWord(word));
}
addWord(word, mask) {
const key = this.caseInsensitive ? word.toLowerCase() : word;
this.forbiddenWords.set(key, mask ?? null);
if (this.enableRegex) {
try {
const pattern = this.wordBoundary ? `\\b${word.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&")}\\b` : word;
const flags = this.caseInsensitive ? "gi" : "g";
this.regexPatterns.set(key, new RegExp(pattern, flags));
} catch (error) {
console.warn(`Invalid regex pattern for word "${word}":`, error);
}
}
}
addWords(words) {
words.forEach((word) => this.addWord(word));
}
addRegexPattern(pattern, mask) {
if (!this.enableRegex) {
throw new Error("Regex patterns are not enabled. Set enableRegex: true in config.");
}
try {
const key = this.caseInsensitive ? pattern.toLowerCase() : pattern;
this.forbiddenWords.set(key, mask ?? null);
const flags = this.caseInsensitive ? "gi" : "g";
this.regexPatterns.set(key, new RegExp(pattern, flags));
} catch (error) {
throw new Error(`Invalid regex pattern: ${error}`);
}
}
removeWord(word) {
const key = this.caseInsensitive ? word.toLowerCase() : word;
this.forbiddenWords.delete(key);
this.regexPatterns.delete(key);
}
removeWords(words) {
words.forEach((word) => this.removeWord(word));
}
clearWords() {
this.forbiddenWords.clear();
this.regexPatterns.clear();
this.resetStats();
}
getWords() {
return Array.from(this.forbiddenWords.keys());
}
hasWord(word) {
const key = this.caseInsensitive ? word.toLowerCase() : word;
return this.forbiddenWords.has(key);
}
applyMask(word, maskType) {
if (maskType === "partial" && word.length > 2) {
return word[0] + this.maskChar.repeat(word.length - 2) + word[word.length - 1];
} else if (maskType === "smart") {
if (word.length <= 2)
return this.maskChar.repeat(word.length);
return word[0] + this.maskChar.repeat(Math.max(1, Math.floor(word.length * 0.6))) + word[word.length - 1];
}
return this.maskChar.repeat(word.length);
}
updateStats(word) {
this.detectionStats.totalDetections++;
this.detectionStats.lastDetectionTime = /* @__PURE__ */ new Date();
const key = this.caseInsensitive ? word.toLowerCase() : word;
if (!this.detectionStats.uniqueWords.includes(key)) {
this.detectionStats.uniqueWords.push(key);
}
this.detectionStats.detectionCounts[key] = (this.detectionStats.detectionCounts[key] || 0) + 1;
}
filter(text, mode = "replace", maskType = "full") {
if (this.forbiddenWords.size === 0)
return text;
if (this.enableRegex) {
return this.filterWithRegex(text, mode, maskType);
} else {
return this.filterWithWords(text, mode, maskType);
}
}
filterWithWords(text, mode, maskType) {
const pattern = this.wordBoundary ? `\\b(${[...this.forbiddenWords.keys()].map((w) => w.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&")).join("|")})\\b` : `(${[...this.forbiddenWords.keys()].map((w) => w.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&")).join("|")})`;
const regex = new RegExp(pattern, this.caseInsensitive ? "gi" : "g");
return text.replace(regex, (match) => {
const key = this.caseInsensitive ? match.toLowerCase() : match;
if (this.logDetections) {
this.detectionLogs.push(match);
}
this.updateStats(match);
if (mode === "remove")
return "";
if (mode === "highlight")
return `[FILTERED: ${match}]`;
if (this.customReplacer) {
return this.customReplacer(match, text);
}
return this.forbiddenWords.get(key) ?? this.applyMask(match, maskType);
});
}
filterWithRegex(text, mode, maskType) {
let result = text;
for (const [key, regex] of this.regexPatterns) {
result = result.replace(regex, (match) => {
if (this.logDetections) {
this.detectionLogs.push(match);
}
this.updateStats(match);
if (mode === "remove")
return "";
if (mode === "highlight")
return `[FILTERED: ${match}]`;
if (this.customReplacer) {
return this.customReplacer(match, text);
}
return this.forbiddenWords.get(key) ?? this.applyMask(match, maskType);
});
}
return result;
}
detect(text) {
if (this.forbiddenWords.size === 0)
return [];
if (this.enableRegex) {
return this.detectWithRegex(text);
} else {
return this.detectWithWords(text);
}
}
detectWithWords(text) {
const pattern = this.wordBoundary ? `\\b(${[...this.forbiddenWords.keys()].map((w) => w.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&")).join("|")})\\b` : `(${[...this.forbiddenWords.keys()].map((w) => w.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&")).join("|")})`;
const regex = new RegExp(pattern, this.caseInsensitive ? "gi" : "g");
const matches = [];
let match;
while ((match = regex.exec(text)) !== null) {
matches.push(match[0]);
}
return matches;
}
detectWithRegex(text) {
const matches = [];
for (const regex of this.regexPatterns.values()) {
let match;
while ((match = regex.exec(text)) !== null) {
matches.push(match[0]);
}
}
return matches;
}
detectWithPositions(text) {
if (this.forbiddenWords.size === 0)
return [];
const positions = [];
if (this.enableRegex) {
for (const [key, regex] of this.regexPatterns) {
let match;
while ((match = regex.exec(text)) !== null) {
positions.push({
word: match[0],
start: match.index,
end: match.index + match[0].length
});
}
}
} else {
const pattern = this.wordBoundary ? `\\b(${[...this.forbiddenWords.keys()].map((w) => w.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&")).join("|")})\\b` : `(${[...this.forbiddenWords.keys()].map((w) => w.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&")).join("|")})`;
const regex = new RegExp(pattern, this.caseInsensitive ? "gi" : "g");
let match;
while ((match = regex.exec(text)) !== null) {
positions.push({
word: match[0],
start: match.index,
end: match.index + match[0].length
});
}
}
return positions.sort((a, b) => a.start - b.start);
}
getDetectionLogs() {
return [...this.detectionLogs];
}
getStats() {
return { ...this.detectionStats };
}
resetStats() {
this.detectionStats = {
totalDetections: 0,
uniqueWords: [],
detectionCounts: {}
};
this.detectionLogs = [];
}
setMaskChar(char) {
this.maskChar = char;
}
setCaseInsensitive(value) {
this.caseInsensitive = value;
}
setLogDetections(value) {
this.logDetections = value;
}
setCustomReplacer(replacer) {
this.customReplacer = replacer;
}
// Utility methods
sanitizeText(text) {
return this.filter(text, "replace", "full");
}
isClean(text) {
return this.detect(text).length === 0;
}
getCleanPercentage(text) {
const detected = this.detect(text);
const totalWords = text.split(/\s+/).length;
return totalWords > 0 ? (totalWords - detected.length) / totalWords * 100 : 100;
}
};
var PRESET_WORDS = {
profanity: [
"badword",
"offensive",
"rude",
"vulgar",
"inappropriate",
"curse",
"swear",
"expletive",
"obscene",
"lewd"
],
spam: [
"buy now",
"click here",
"free money",
"make money fast",
"weight loss",
"viagra",
"casino",
"lottery",
"winner"
],
phishing: [
"verify account",
"update password",
"security alert",
"suspended account",
"unusual activity",
"login attempt"
]
};
function createWordSensor(config = {}) {
return new WordSensor(config);
}
function createProfanityFilter(maskChar = "*") {
return new WordSensor({
words: PRESET_WORDS.profanity,
maskChar,
caseInsensitive: true,
logDetections: true
});
}
function createSpamFilter(maskChar = "#") {
return new WordSensor({
words: PRESET_WORDS.spam,
maskChar,
caseInsensitive: true,
logDetections: true,
wordBoundary: false
});
}
function createPhishingFilter(maskChar = "!") {
return new WordSensor({
words: PRESET_WORDS.phishing,
maskChar,
caseInsensitive: true,
logDetections: true,
wordBoundary: false
});
}
function getNestedValue(obj, path) {
return path.split(".").reduce((acc, key) => acc && acc[key] !== void 0 ? acc[key] : void 0, obj);
}
async function loadForbiddenWordsFromAPI(url, path, sensor) {
try {
const response = await fetch(url);
if (!response.ok) {
throw new Error(`Failed to fetch: ${response.statusText}`);
}
const data = await response.json();
let words = [];
if (Array.isArray(data)) {
words = data;
} else if (path) {
words = getNestedValue(data, path) ?? [];
}
if (!Array.isArray(words)) {
throw new Error("Invalid words format from API");
}
sensor.addWords(words);
console.log("Forbidden words added from API:", words);
return true;
} catch (error) {
console.error("Error loading forbidden words:", error);
return false;
}
}
async function loadWordsFromFile(file) {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = (e) => {
try {
const content = e.target?.result;
const words = content.split("\n").map((line) => line.trim()).filter((line) => line.length > 0 && !line.startsWith("#"));
resolve(words);
} catch (error) {
reject(error);
}
};
reader.onerror = () => reject(new Error("Failed to read file"));
reader.readAsText(file);
});
}
function validateRegexPattern(pattern) {
try {
new RegExp(pattern);
return true;
} catch {
return false;
}
}
function escapeRegexSpecialChars(str) {
return str.replace(/[-\/\\^$*+?.()|[\]{}]/g, "\\$&");
}
function createCustomReplacer(replacementMap) {
return (word) => {
const key = word.toLowerCase();
return replacementMap[key] || word;
};
}
function createEmojiReplacer() {
const emojiMap = {
"badword": "\u{1F92C}",
"offensive": "\u{1F624}",
"rude": "\u{1F612}",
"vulgar": "\u{1F922}",
"inappropriate": "\u{1F633}",
"curse": "\u{1F4A2}",
"swear": "\u{1F620}",
"expletive": "\u{1F92F}",
"obscene": "\u{1F631}",
"lewd": "\u{1F635}"
};
return (word) => {
const key = word.toLowerCase();
return emojiMap[key] || "\u{1F910}";
};
}
function batchFilter(texts, sensor, mode = "replace", maskType = "full") {
return texts.map((text) => sensor.filter(text, mode, maskType));
}
function batchDetect(texts, sensor) {
return texts.map((text) => ({
text,
detected: sensor.detect(text)
}));
}
function getBatchStats(texts, sensor) {
const results = texts.map((text) => ({
text,
detected: sensor.detect(text),
cleanPercentage: sensor.getCleanPercentage(text)
}));
const totalTexts = results.length;
const cleanTexts = results.filter((r) => r.detected.length === 0).length;
const dirtyTexts = totalTexts - cleanTexts;
const totalDetections = results.reduce((sum, r) => sum + r.detected.length, 0);
const averageCleanPercentage = results.reduce((sum, r) => sum + r.cleanPercentage, 0) / totalTexts;
return {
totalTexts,
cleanTexts,
dirtyTexts,
totalDetections,
averageCleanPercentage
};
}
export {
PRESET_WORDS,
WordSensor,
batchDetect,
batchFilter,
createCustomReplacer,
createEmojiReplacer,
createPhishingFilter,
createProfanityFilter,
createSpamFilter,
createWordSensor,
escapeRegexSpecialChars,
getBatchStats,
getNestedValue,
loadForbiddenWordsFromAPI,
loadWordsFromFile,
validateRegexPattern
};
//# sourceMappingURL=index.mjs.map