glin-profanity
Version:
Glin-Profanity is a lightweight and efficient npm package designed to detect and filter profane language in text inputs across multiple languages. Whether you’re building a chat application, a comment section, or any platform where user-generated content
137 lines • 6.22 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.Filter = void 0;
const dictionary_1 = __importDefault(require("../data/dictionary"));
class Filter {
constructor(config) {
var _a, _b, _c, _d, _e, _f, _g;
let words = [];
this.caseSensitive = (_a = config === null || config === void 0 ? void 0 : config.caseSensitive) !== null && _a !== void 0 ? _a : false;
this.allowObfuscatedMatch = (_b = config === null || config === void 0 ? void 0 : config.allowObfuscatedMatch) !== null && _b !== void 0 ? _b : false;
this.wordBoundaries = (_c = config === null || config === void 0 ? void 0 : config.wordBoundaries) !== null && _c !== void 0 ? _c : !this.allowObfuscatedMatch; // Turn off word boundaries if obfuscation enabled
this.replaceWith = config === null || config === void 0 ? void 0 : config.replaceWith;
this.severityLevels = (_d = config === null || config === void 0 ? void 0 : config.severityLevels) !== null && _d !== void 0 ? _d : false;
this.ignoreWords = new Set(((_e = config === null || config === void 0 ? void 0 : config.ignoreWords) === null || _e === void 0 ? void 0 : _e.map((word) => word.toLowerCase())) || []);
this.logProfanity = (_f = config === null || config === void 0 ? void 0 : config.logProfanity) !== null && _f !== void 0 ? _f : false;
this.fuzzyToleranceLevel = (_g = config === null || config === void 0 ? void 0 : config.fuzzyToleranceLevel) !== null && _g !== void 0 ? _g : 0.8;
if (config === null || config === void 0 ? void 0 : config.allLanguages) {
for (const lang in dictionary_1.default) {
if (dictionary_1.default.hasOwnProperty(lang)) {
words = [...words, ...dictionary_1.default[lang]];
}
}
}
else {
const languages = (config === null || config === void 0 ? void 0 : config.languages) || ['english'];
const languagesChecks = new Set(languages);
if (languagesChecks.size !== 0) {
languagesChecks.forEach((lang) => {
words = [...words, ...dictionary_1.default[lang]];
});
}
}
if (config === null || config === void 0 ? void 0 : config.customWords) {
words = [...words, ...config.customWords];
}
this.words = new Map(words.map((word) => [word.toLowerCase(), 1]));
}
normalizeObfuscated(text) {
let normalized = text.replace(/([a-zA-Z])\1{1,}/g, '$1$1'); // allow max 2 consecutive
const charMap = {
'@': 'a',
$: 's',
'!': 'i',
'1': 'i',
'*': '',
};
normalized = normalized.replace(/[@$!1*]/g, (m) => charMap[m] || m);
return normalized;
}
getRegex(word) {
const flags = this.caseSensitive ? 'g' : 'gi';
const escapedWord = word.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
const boundary = this.wordBoundaries ? '\\b' : '';
return new RegExp(`${boundary}${escapedWord}${boundary}`, flags);
}
isFuzzyToleranceMatch(word, text) {
const simplifiedText = text.toLowerCase().replace(/[^a-z]/g, '');
const simplifiedWord = word.toLowerCase();
let matchCount = 0;
let index = 0;
for (let i = 0; i < simplifiedText.length; i++) {
if (simplifiedText[i] === simplifiedWord[index]) {
matchCount++;
index++;
if (index === simplifiedWord.length)
break;
}
}
const score = matchCount / simplifiedWord.length;
return score >= this.fuzzyToleranceLevel;
}
evaluateSeverity(word, text) {
if (this.wordBoundaries) {
return this.getRegex(word).test(text) ? 1 : undefined;
}
if (this.getRegex(word).test(text))
return 1;
if (this.isFuzzyToleranceMatch(word, text))
return 2;
return undefined;
}
isProfane(value) {
let input = value;
if (this.allowObfuscatedMatch) {
input = this.normalizeObfuscated(value);
}
for (const word of this.words.keys()) {
if (!this.ignoreWords.has(word.toLowerCase()) &&
this.evaluateSeverity(word, input) !== undefined) {
return true;
}
}
return false;
}
checkProfanity(text) {
let input = text;
if (this.allowObfuscatedMatch) {
input = this.normalizeObfuscated(text);
}
const profaneWords = [];
const severityMap = {};
for (const dictWord of this.words.keys()) {
if (this.ignoreWords.has(dictWord.toLowerCase()))
continue;
const severity = this.evaluateSeverity(dictWord, input);
if (severity !== undefined) {
const regex = this.getRegex(dictWord);
let match;
while ((match = regex.exec(input)) !== null) {
profaneWords.push(match[0]);
severityMap[match[0]] = severity;
}
}
}
let processedText = text;
if (this.replaceWith && profaneWords.length > 0) {
const uniqueWords = Array.from(new Set(profaneWords));
for (const word of uniqueWords) {
const escaped = word.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
processedText = processedText.replace(new RegExp(escaped, 'gi'), this.replaceWith);
}
}
return {
containsProfanity: profaneWords.length > 0,
profaneWords: Array.from(new Set(profaneWords)),
processedText: this.replaceWith ? processedText : undefined,
severityMap: this.severityLevels && Object.keys(severityMap).length > 0
? severityMap
: undefined,
};
}
}
exports.Filter = Filter;
//# sourceMappingURL=Filter.js.map