glin-profanity
Version:
Glin-Profanity is a lightweight and efficient npm package designed to detect and filter profane language in text inputs across multiple languages. Whether you’re building a chat application, a comment section, or any platform where user-generated content
259 lines (254 loc) • 9.46 kB
text/typescript
import { P as ProfanityCheckerConfig, a as ProfanityCheckResult } from './types-DI8nzwWc.cjs';
export { F as Filter, F as ProfanityFilter } from './Filter-D34Wsmrj.cjs';
export { C as CheckProfanityResult, b as ContextAwareConfig, F as FilterConfig, a as FilteredProfanityResult, L as Language, M as Match, S as SeverityLevel } from './types-B9c_ik4k.cjs';
export { H as HybridAnalysisResult, a as MLAnalysisResult, M as MLDetectorConfig, T as ToxicityLabel, b as ToxicityPrediction } from './types-Ct_ueYqw.cjs';
declare function checkProfanity(text: string, config?: ProfanityCheckerConfig): ProfanityCheckResult;
declare function checkProfanityAsync(text: string, config?: ProfanityCheckerConfig): Promise<ProfanityCheckResult>;
declare function isWordProfane(word: string, config?: ProfanityCheckerConfig): boolean;
/**
* @fileoverview Leetspeak detection and normalization utilities.
* Converts leetspeak/1337 speak text back to standard characters for profanity detection.
* @module utils/leetspeak
*/
/**
* Leetspeak detection intensity levels.
* - `basic`: Common substitutions only (0→o, 1→i, 3→e, 4→a, 5→s)
* - `moderate`: Basic + symbols (@→a, $→s, !→i) and repeated chars
* - `aggressive`: All known substitutions including multi-char patterns
*/
type LeetspeakLevel = 'basic' | 'moderate' | 'aggressive';
/**
* Configuration options for leetspeak normalization.
*/
interface LeetspeakOptions {
/**
* Detection intensity level.
* @default 'moderate'
*/
level?: LeetspeakLevel;
/**
* Whether to collapse repeated characters (e.g., "fuuuuck" → "fuck").
* @default true
*/
collapseRepeated?: boolean;
/**
* Maximum allowed consecutive repeated characters before collapsing.
* @default 2
*/
maxRepeated?: number;
/**
* Whether to remove spaces between single characters (e.g., "f u c k" → "fuck").
* @default true
*/
removeSpacedChars?: boolean;
}
/**
* Normalizes leetspeak text to standard characters.
*
* @param text - The input text containing potential leetspeak
* @param options - Configuration options for normalization
* @returns The normalized text with leetspeak characters replaced
*
* @example
* ```typescript
* import { normalizeLeetspeak } from 'glin-profanity';
*
* normalizeLeetspeak('f4ck'); // Returns: 'fack'
* normalizeLeetspeak('sh!t'); // Returns: 'shit'
* normalizeLeetspeak('b1tch'); // Returns: 'bitch'
* normalizeLeetspeak('@ss'); // Returns: 'ass'
* normalizeLeetspeak('f u c k'); // Returns: 'fuck'
* normalizeLeetspeak('fuuuuck'); // Returns: 'fuck'
* ```
*/
declare function normalizeLeetspeak(text: string, options?: LeetspeakOptions): string;
/**
* Collapses sequences of spaced single characters into words.
* Handles patterns like "f u c k" → "fuck" and "s h i t" → "shit".
*
* @param text - The input text
* @returns Text with spaced characters collapsed
*
* @example
* ```typescript
* collapseSpacedCharacters('f u c k you'); // Returns: 'fuck you'
* collapseSpacedCharacters('this is s h i t'); // Returns: 'this is shit'
* ```
*/
declare function collapseSpacedCharacters(text: string): string;
/**
* Collapses repeated consecutive characters beyond a threshold.
* Handles patterns like "fuuuuck" → "fuck" and "shiiiit" → "shit".
*
* @param text - The input text
* @param maxRepeated - Maximum allowed consecutive repeated characters
* @returns Text with repeated characters collapsed
*
* @example
* ```typescript
* collapseRepeatedCharacters('fuuuuck', 2); // Returns: 'fuuck'
* collapseRepeatedCharacters('fuuuuck', 1); // Returns: 'fuck'
* ```
*/
declare function collapseRepeatedCharacters(text: string, maxRepeated?: number): string;
/**
* Detects if text contains potential leetspeak patterns.
* Useful for deciding whether to apply leetspeak normalization.
*
* @param text - The input text to analyze
* @returns True if leetspeak patterns are detected
*
* @example
* ```typescript
* containsLeetspeak('hello'); // Returns: false
* containsLeetspeak('h3ll0'); // Returns: true
* containsLeetspeak('f4ck'); // Returns: true
* containsLeetspeak('@ss'); // Returns: true
* ```
*/
declare function containsLeetspeak(text: string): boolean;
/**
* Creates a normalized variant generator for a word.
* Generates all possible leetspeak variants of a dictionary word.
*
* @param word - The base word to generate variants for
* @param level - The leetspeak level to use for variant generation
* @returns Array of possible leetspeak variants
*
* @example
* ```typescript
* generateLeetspeakVariants('ass');
* // Returns: ['ass', '@ss', 'a$$', '@$$', '4ss', '4$$', ...]
* ```
*/
declare function generateLeetspeakVariants(word: string, level?: LeetspeakLevel): string[];
/**
* @fileoverview Unicode normalization utilities for profanity detection.
* Handles homoglyphs, full-width characters, diacritics, and other Unicode tricks.
* @module utils/unicode
*/
/**
* Configuration options for Unicode normalization.
*/
interface UnicodeNormalizationOptions {
/**
* Apply NFKD normalization to decompose characters.
* @default true
*/
nfkd?: boolean;
/**
* Convert homoglyphs (lookalike characters) to ASCII.
* @default true
*/
homoglyphs?: boolean;
/**
* Convert full-width characters to half-width.
* @default true
*/
fullWidth?: boolean;
/**
* Remove diacritical marks (accents, umlauts, etc.).
* @default true
*/
removeDiacritics?: boolean;
/**
* Remove zero-width characters (ZWJ, ZWNJ, etc.).
* @default true
*/
removeZeroWidth?: boolean;
}
/**
* Normalizes Unicode text for consistent profanity detection.
* Handles various Unicode tricks used to evade filters.
*
* @param text - The input text containing potential Unicode obfuscation
* @param options - Configuration options for normalization
* @returns The normalized text
*
* @example
* ```typescript
* import { normalizeUnicode } from 'glin-profanity';
*
* normalizeUnicode('fυck'); // Returns: 'fuck' (Greek upsilon → u)
* normalizeUnicode('fUck'); // Returns: 'fuck' (full-width U → u)
* normalizeUnicode('fück'); // Returns: 'fuck' (ü → u)
* normalizeUnicode('fùck'); // Returns: 'fuck' (ù → u)
* normalizeUnicode('fuck'); // Returns: 'fuck' (removes zero-width space)
* ```
*/
declare function normalizeUnicode(text: string, options?: UnicodeNormalizationOptions): string;
/**
* Removes zero-width and invisible characters from text.
*
* @param text - The input text
* @returns Text with zero-width characters removed
*/
declare function removeZeroWidthCharacters(text: string): string;
/**
* Converts full-width ASCII characters to half-width.
* Full-width characters (U+FF01 to U+FF5E) are used in CJK text
* but can also be used to evade filters.
*
* @param text - The input text
* @returns Text with full-width characters converted
*
* @example
* ```typescript
* convertFullWidth('ABC'); // Returns: 'ABC'
* convertFullWidth('fuck'); // Returns: 'fuck'
* ```
*/
declare function convertFullWidth(text: string): string;
/**
* Converts homoglyph characters to their ASCII equivalents.
*
* @param text - The input text
* @returns Text with homoglyphs converted
*/
declare function convertHomoglyphs(text: string): string;
/**
* Applies NFKD normalization and optionally removes diacritical marks.
* NFKD decomposes characters into base characters and combining marks.
*
* @param text - The input text
* @param removeDiacritics - Whether to remove diacritical marks
* @returns Normalized text
*
* @example
* ```typescript
* normalizeNFKD('fück', true); // Returns: 'fuck'
* normalizeNFKD('café', true); // Returns: 'cafe'
* normalizeNFKD('naïve', true); // Returns: 'naive'
* ```
*/
declare function normalizeNFKD(text: string, removeDiacritics?: boolean): string;
/**
* Detects if text contains potential Unicode obfuscation.
* Useful for deciding whether to apply Unicode normalization.
*
* @param text - The input text to analyze
* @returns True if Unicode obfuscation patterns are detected
*
* @example
* ```typescript
* containsUnicodeObfuscation('hello'); // Returns: false
* containsUnicodeObfuscation('fυck'); // Returns: true (Greek letter)
* containsUnicodeObfuscation('fuck'); // Returns: true (zero-width)
* ```
*/
declare function containsUnicodeObfuscation(text: string): boolean;
/**
* Gets the character set being used in text.
* Helps identify mixed-script attacks (e.g., mixing Latin and Cyrillic).
*
* @param text - The input text
* @returns Object with detected character set information
*/
declare function detectCharacterSets(text: string): {
hasLatin: boolean;
hasCyrillic: boolean;
hasGreek: boolean;
hasFullWidth: boolean;
hasMixed: boolean;
};
export { type LeetspeakLevel, type LeetspeakOptions, ProfanityCheckResult, ProfanityCheckerConfig, type UnicodeNormalizationOptions, checkProfanity, checkProfanityAsync, collapseRepeatedCharacters, collapseSpacedCharacters, containsLeetspeak, containsUnicodeObfuscation, convertFullWidth, convertHomoglyphs, detectCharacterSets, generateLeetspeakVariants, isWordProfane, normalizeLeetspeak, normalizeNFKD, normalizeUnicode, removeZeroWidthCharacters };