prompt-bouncer
Version:
A lightweight, customizable content moderation library for AI applications. Filters profanity, explicit content, and inappropriate prompts for text-to-image generation.
314 lines • 10.8 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.AIContentFilter = void 0;
const wordLists_1 = require("./wordLists");
/**
* Default configuration for the content filter
*/
const DEFAULT_CONFIG = {
enableProfanityFilter: true,
enableExplicitFilter: true,
enableViolenceFilter: true,
enableSelfHarmFilter: true,
enableDrugsFilter: false,
enableHateSpeechFilter: true,
enableMildFilter: false, // Disabled by default - allows contextual usage
customBannedWords: [],
allowedWords: [],
caseSensitive: false,
strictWordBoundaries: true,
};
/**
* AI Content Filter - Main class for content moderation
*/
class AIContentFilter {
constructor(config = {}) {
this.config = { ...DEFAULT_CONFIG, ...config };
this.bannedWords = new Set();
this.allowedWords = new Set(this.config.allowedWords);
this.initializeBannedWords();
}
/**
* Initialize the banned words list based on configuration
*/
initializeBannedWords() {
// Add words from enabled categories
Object.entries(wordLists_1.DETECTION_CATEGORIES).forEach(([categoryName, category]) => {
const shouldInclude = this.shouldIncludeCategory(categoryName);
if (shouldInclude) {
category.keywords.forEach((word) => {
const processedWord = this.config.caseSensitive
? word
: word.toLowerCase();
this.bannedWords.add(processedWord);
});
}
});
// Add custom banned words
this.config.customBannedWords.forEach((word) => {
const processedWord = this.config.caseSensitive
? word
: word.toLowerCase();
this.bannedWords.add(processedWord);
});
}
/**
* Check if a category should be included based on config
*/
shouldIncludeCategory(categoryName) {
switch (categoryName) {
case "profanity":
return this.config.enableProfanityFilter;
case "explicit":
return this.config.enableExplicitFilter;
case "violence":
return this.config.enableViolenceFilter;
case "self_harm":
return this.config.enableSelfHarmFilter;
case "drugs":
return this.config.enableDrugsFilter;
case "hate":
return this.config.enableHateSpeechFilter;
case "mild":
return this.config.enableMildFilter;
default:
return false; // Don't include unknown categories
}
}
/**
* Normalize text for processing
*/
normalizeText(text) {
if (!this.config.caseSensitive) {
text = text.toLowerCase();
}
// Replace common character substitutions
const substitutions = {
"@": "a",
"3": "e",
"1": "i",
"0": "o",
"5": "s",
"7": "t",
$: "s",
"!": "i",
};
let normalized = text;
Object.entries(substitutions).forEach(([char, replacement]) => {
// Escape special regex characters
const escapedChar = char.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
normalized = normalized.replace(new RegExp(escapedChar, "g"), replacement);
});
return normalized;
}
/**
* Extract words from text
*/
extractWords(text) {
// Remove punctuation and split by whitespace
const cleanText = text.replace(/[^\w\s]/g, " ");
return cleanText.split(/\s+/).filter((word) => word.length > 0);
}
/**
* Check if a word matches any banned word
*/
isWordBanned(word) {
const normalizedWord = this.normalizeText(word);
// Check if word is in allowed list
if (this.allowedWords.has(normalizedWord)) {
return { isBanned: false };
}
// Check exact matches
if (this.bannedWords.has(normalizedWord)) {
return {
isBanned: true,
matchedWord: normalizedWord,
category: this.getCategoryForWord(normalizedWord),
};
}
// Check for partial matches if strict word boundaries is disabled
if (!this.config.strictWordBoundaries) {
for (const bannedWord of this.bannedWords) {
if (normalizedWord.includes(bannedWord) ||
bannedWord.includes(normalizedWord)) {
return {
isBanned: true,
matchedWord: bannedWord,
category: this.getCategoryForWord(bannedWord),
};
}
}
}
return { isBanned: false };
}
/**
* Get category for a specific word
*/
getCategoryForWord(word) {
for (const [categoryName, category] of Object.entries(wordLists_1.DETECTION_CATEGORIES)) {
if (category.keywords.includes(word)) {
return categoryName;
}
}
return "custom";
}
/**
* Get severity for categories
*/
getSeverityForCategories(categories) {
if (categories.some((cat) => wordLists_1.DETECTION_CATEGORIES[cat]?.severity === "high")) {
return "high";
}
if (categories.some((cat) => wordLists_1.DETECTION_CATEGORIES[cat]?.severity === "medium")) {
return "medium";
}
return "low";
}
/**
* Clean text by replacing banned words
*/
cleanText(text, flaggedWords) {
let cleaned = text;
flaggedWords.forEach((word) => {
const replacement = "*".repeat(word.length);
const regex = new RegExp(`\\b${word}\\b`, this.config.caseSensitive ? "g" : "gi");
cleaned = cleaned.replace(regex, replacement);
});
return cleaned;
}
/**
* Main moderation function
*/
moderate(text) {
if (!text || typeof text !== "string") {
return {
isSafe: true,
flaggedWords: [],
categories: [],
confidence: 0,
originalText: text || "",
cleanedText: text || "",
severity: "low",
};
}
const flaggedWords = [];
const categories = new Set();
const normalizedText = this.normalizeText(text);
// First, check for multi-word phrases (higher priority)
this.bannedWords.forEach((bannedWord) => {
if (bannedWord.includes(" ")) {
// Multi-word phrase
const regex = this.config.strictWordBoundaries
? new RegExp(`\\b${bannedWord.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`, this.config.caseSensitive ? "g" : "gi")
: new RegExp(bannedWord.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), this.config.caseSensitive ? "g" : "gi");
if (regex.test(this.config.caseSensitive ? text : normalizedText)) {
flaggedWords.push(bannedWord);
const category = this.getCategoryForWord(bannedWord);
if (category) {
categories.add(category);
}
}
}
});
// Then check individual words (only if not part of an already flagged phrase)
const words = this.extractWords(text);
words.forEach((word) => {
const result = this.isWordBanned(word);
if (result.isBanned && result.matchedWord) {
// Check if this word is part of an already detected phrase
const isPartOfPhrase = flaggedWords.some((flaggedPhrase) => flaggedPhrase.includes(" ") &&
flaggedPhrase.includes(result.matchedWord));
if (!isPartOfPhrase) {
flaggedWords.push(result.matchedWord);
if (result.category) {
categories.add(result.category);
}
}
}
});
const categoriesArray = Array.from(categories);
const isSafe = flaggedWords.length === 0;
const severity = this.getSeverityForCategories(categoriesArray);
const confidence = flaggedWords.length > 0 ? Math.min(flaggedWords.length * 0.25, 1.0) : 0;
return {
isSafe,
reason: !isSafe
? `Content contains ${categoriesArray.join(", ")} violations`
: undefined,
flaggedWords: [...new Set(flaggedWords)], // Remove duplicates
categories: categoriesArray,
confidence,
originalText: text,
cleanedText: this.cleanText(text, flaggedWords),
severity,
};
}
/**
* Quick boolean check if content is safe
*/
isSafe(text) {
return this.moderate(text).isSafe;
}
/**
* Get only flagged words
*/
getFlaggedWords(text) {
return this.moderate(text).flaggedWords;
}
/**
* Get cleaned version of text
*/
clean(text) {
return this.moderate(text).cleanedText;
}
/**
* Add custom words to banned list
*/
addBannedWords(words) {
words.forEach((word) => {
const processedWord = this.config.caseSensitive
? word
: word.toLowerCase();
this.bannedWords.add(processedWord);
});
}
/**
* Remove words from banned list
*/
removeBannedWords(words) {
words.forEach((word) => {
const processedWord = this.config.caseSensitive
? word
: word.toLowerCase();
this.bannedWords.delete(processedWord);
});
}
/**
* Add words to allowed list
*/
addAllowedWords(words) {
words.forEach((word) => {
const processedWord = this.config.caseSensitive
? word
: word.toLowerCase();
this.allowedWords.add(processedWord);
});
}
/**
* Get current configuration
*/
getConfig() {
return { ...this.config };
}
/**
* Update configuration
*/
updateConfig(newConfig) {
this.config = { ...this.config, ...newConfig };
this.bannedWords.clear();
this.allowedWords = new Set(this.config.allowedWords);
this.initializeBannedWords();
}
}
exports.AIContentFilter = AIContentFilter;
//# sourceMappingURL=filter.js.map