UNPKG

prompt-bouncer

Version:

A lightweight, customizable content moderation library for AI applications. Filters profanity, explicit content, and inappropriate prompts for text-to-image generation.

314 lines 10.8 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.AIContentFilter = void 0; const wordLists_1 = require("./wordLists"); /** * Default configuration for the content filter */ const DEFAULT_CONFIG = { enableProfanityFilter: true, enableExplicitFilter: true, enableViolenceFilter: true, enableSelfHarmFilter: true, enableDrugsFilter: false, enableHateSpeechFilter: true, enableMildFilter: false, // Disabled by default - allows contextual usage customBannedWords: [], allowedWords: [], caseSensitive: false, strictWordBoundaries: true, }; /** * AI Content Filter - Main class for content moderation */ class AIContentFilter { constructor(config = {}) { this.config = { ...DEFAULT_CONFIG, ...config }; this.bannedWords = new Set(); this.allowedWords = new Set(this.config.allowedWords); this.initializeBannedWords(); } /** * Initialize the banned words list based on configuration */ initializeBannedWords() { // Add words from enabled categories Object.entries(wordLists_1.DETECTION_CATEGORIES).forEach(([categoryName, category]) => { const shouldInclude = this.shouldIncludeCategory(categoryName); if (shouldInclude) { category.keywords.forEach((word) => { const processedWord = this.config.caseSensitive ? word : word.toLowerCase(); this.bannedWords.add(processedWord); }); } }); // Add custom banned words this.config.customBannedWords.forEach((word) => { const processedWord = this.config.caseSensitive ? word : word.toLowerCase(); this.bannedWords.add(processedWord); }); } /** * Check if a category should be included based on config */ shouldIncludeCategory(categoryName) { switch (categoryName) { case "profanity": return this.config.enableProfanityFilter; case "explicit": return this.config.enableExplicitFilter; case "violence": return this.config.enableViolenceFilter; case "self_harm": return this.config.enableSelfHarmFilter; case "drugs": return this.config.enableDrugsFilter; case "hate": return this.config.enableHateSpeechFilter; case "mild": return this.config.enableMildFilter; default: return false; // Don't include unknown categories } } /** * Normalize text for processing */ normalizeText(text) { if (!this.config.caseSensitive) { text = text.toLowerCase(); } // Replace common character substitutions const substitutions = { "@": "a", "3": "e", "1": "i", "0": "o", "5": "s", "7": "t", $: "s", "!": "i", }; let normalized = text; Object.entries(substitutions).forEach(([char, replacement]) => { // Escape special regex characters const escapedChar = char.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); normalized = normalized.replace(new RegExp(escapedChar, "g"), replacement); }); return normalized; } /** * Extract words from text */ extractWords(text) { // Remove punctuation and split by whitespace const cleanText = text.replace(/[^\w\s]/g, " "); return cleanText.split(/\s+/).filter((word) => word.length > 0); } /** * Check if a word matches any banned word */ isWordBanned(word) { const normalizedWord = this.normalizeText(word); // Check if word is in allowed list if (this.allowedWords.has(normalizedWord)) { return { isBanned: false }; } // Check exact matches if (this.bannedWords.has(normalizedWord)) { return { isBanned: true, matchedWord: normalizedWord, category: this.getCategoryForWord(normalizedWord), }; } // Check for partial matches if strict word boundaries is disabled if (!this.config.strictWordBoundaries) { for (const bannedWord of this.bannedWords) { if (normalizedWord.includes(bannedWord) || bannedWord.includes(normalizedWord)) { return { isBanned: true, matchedWord: bannedWord, category: this.getCategoryForWord(bannedWord), }; } } } return { isBanned: false }; } /** * Get category for a specific word */ getCategoryForWord(word) { for (const [categoryName, category] of Object.entries(wordLists_1.DETECTION_CATEGORIES)) { if (category.keywords.includes(word)) { return categoryName; } } return "custom"; } /** * Get severity for categories */ getSeverityForCategories(categories) { if (categories.some((cat) => wordLists_1.DETECTION_CATEGORIES[cat]?.severity === "high")) { return "high"; } if (categories.some((cat) => wordLists_1.DETECTION_CATEGORIES[cat]?.severity === "medium")) { return "medium"; } return "low"; } /** * Clean text by replacing banned words */ cleanText(text, flaggedWords) { let cleaned = text; flaggedWords.forEach((word) => { const replacement = "*".repeat(word.length); const regex = new RegExp(`\\b${word}\\b`, this.config.caseSensitive ? "g" : "gi"); cleaned = cleaned.replace(regex, replacement); }); return cleaned; } /** * Main moderation function */ moderate(text) { if (!text || typeof text !== "string") { return { isSafe: true, flaggedWords: [], categories: [], confidence: 0, originalText: text || "", cleanedText: text || "", severity: "low", }; } const flaggedWords = []; const categories = new Set(); const normalizedText = this.normalizeText(text); // First, check for multi-word phrases (higher priority) this.bannedWords.forEach((bannedWord) => { if (bannedWord.includes(" ")) { // Multi-word phrase const regex = this.config.strictWordBoundaries ? new RegExp(`\\b${bannedWord.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`, this.config.caseSensitive ? "g" : "gi") : new RegExp(bannedWord.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), this.config.caseSensitive ? "g" : "gi"); if (regex.test(this.config.caseSensitive ? text : normalizedText)) { flaggedWords.push(bannedWord); const category = this.getCategoryForWord(bannedWord); if (category) { categories.add(category); } } } }); // Then check individual words (only if not part of an already flagged phrase) const words = this.extractWords(text); words.forEach((word) => { const result = this.isWordBanned(word); if (result.isBanned && result.matchedWord) { // Check if this word is part of an already detected phrase const isPartOfPhrase = flaggedWords.some((flaggedPhrase) => flaggedPhrase.includes(" ") && flaggedPhrase.includes(result.matchedWord)); if (!isPartOfPhrase) { flaggedWords.push(result.matchedWord); if (result.category) { categories.add(result.category); } } } }); const categoriesArray = Array.from(categories); const isSafe = flaggedWords.length === 0; const severity = this.getSeverityForCategories(categoriesArray); const confidence = flaggedWords.length > 0 ? Math.min(flaggedWords.length * 0.25, 1.0) : 0; return { isSafe, reason: !isSafe ? `Content contains ${categoriesArray.join(", ")} violations` : undefined, flaggedWords: [...new Set(flaggedWords)], // Remove duplicates categories: categoriesArray, confidence, originalText: text, cleanedText: this.cleanText(text, flaggedWords), severity, }; } /** * Quick boolean check if content is safe */ isSafe(text) { return this.moderate(text).isSafe; } /** * Get only flagged words */ getFlaggedWords(text) { return this.moderate(text).flaggedWords; } /** * Get cleaned version of text */ clean(text) { return this.moderate(text).cleanedText; } /** * Add custom words to banned list */ addBannedWords(words) { words.forEach((word) => { const processedWord = this.config.caseSensitive ? word : word.toLowerCase(); this.bannedWords.add(processedWord); }); } /** * Remove words from banned list */ removeBannedWords(words) { words.forEach((word) => { const processedWord = this.config.caseSensitive ? word : word.toLowerCase(); this.bannedWords.delete(processedWord); }); } /** * Add words to allowed list */ addAllowedWords(words) { words.forEach((word) => { const processedWord = this.config.caseSensitive ? word : word.toLowerCase(); this.allowedWords.add(processedWord); }); } /** * Get current configuration */ getConfig() { return { ...this.config }; } /** * Update configuration */ updateConfig(newConfig) { this.config = { ...this.config, ...newConfig }; this.bannedWords.clear(); this.allowedWords = new Set(this.config.allowedWords); this.initializeBannedWords(); } } exports.AIContentFilter = AIContentFilter; //# sourceMappingURL=filter.js.map