glin-profanity

Version:

Glin-Profanity is a lightweight and efficient npm package designed to detect and filter profane language in text inputs across multiple languages. Whether you’re building a chat application, a comment section, or any platform where user-generated content

www.glincker.com/tools/glin-profanity

GLINCKER/glin-profanity

259 lines (254 loc) • 9.46 kB

text/typescript

import { P as ProfanityCheckerConfig, a as ProfanityCheckResult } from './types-DI8nzwWc.cjs'; export { F as Filter, F as ProfanityFilter } from './Filter-D34Wsmrj.cjs'; export { C as CheckProfanityResult, b as ContextAwareConfig, F as FilterConfig, a as FilteredProfanityResult, L as Language, M as Match, S as SeverityLevel } from './types-B9c_ik4k.cjs'; export { H as HybridAnalysisResult, a as MLAnalysisResult, M as MLDetectorConfig, T as ToxicityLabel, b as ToxicityPrediction } from './types-Ct_ueYqw.cjs'; declare function checkProfanity(text: string, config?: ProfanityCheckerConfig): ProfanityCheckResult; declare function checkProfanityAsync(text: string, config?: ProfanityCheckerConfig): Promise<ProfanityCheckResult>; declare function isWordProfane(word: string, config?: ProfanityCheckerConfig): boolean; /** * @fileoverview Leetspeak detection and normalization utilities. * Converts leetspeak/1337 speak text back to standard characters for profanity detection. * @module utils/leetspeak */ /** * Leetspeak detection intensity levels. * - `basic`: Common substitutions only (0→o, 1→i, 3→e, 4→a, 5→s) * - `moderate`: Basic + symbols (@→a, $→s, !→i) and repeated chars * - `aggressive`: All known substitutions including multi-char patterns */ type LeetspeakLevel = 'basic' | 'moderate' | 'aggressive'; /** * Configuration options for leetspeak normalization. */ interface LeetspeakOptions { /** * Detection intensity level. * @default 'moderate' */ level?: LeetspeakLevel; /** * Whether to collapse repeated characters (e.g., "fuuuuck" → "fuck"). * @default true */ collapseRepeated?: boolean; /** * Maximum allowed consecutive repeated characters before collapsing. * @default 2 */ maxRepeated?: number; /** * Whether to remove spaces between single characters (e.g., "f u c k" → "fuck"). * @default true */ removeSpacedChars?: boolean; } /** * Normalizes leetspeak text to standard characters. * * @param text - The input text containing potential leetspeak * @param options - Configuration options for normalization * @returns The normalized text with leetspeak characters replaced * * @example * ```typescript * import { normalizeLeetspeak } from 'glin-profanity'; * * normalizeLeetspeak('f4ck'); // Returns: 'fack' * normalizeLeetspeak('sh!t'); // Returns: 'shit' * normalizeLeetspeak('b1tch'); // Returns: 'bitch' * normalizeLeetspeak('@ss'); // Returns: 'ass' * normalizeLeetspeak('f u c k'); // Returns: 'fuck' * normalizeLeetspeak('fuuuuck'); // Returns: 'fuck' * ``` */ declare function normalizeLeetspeak(text: string, options?: LeetspeakOptions): string; /** * Collapses sequences of spaced single characters into words. * Handles patterns like "f u c k" → "fuck" and "s h i t" → "shit". * * @param text - The input text * @returns Text with spaced characters collapsed * * @example * ```typescript * collapseSpacedCharacters('f u c k you'); // Returns: 'fuck you' * collapseSpacedCharacters('this is s h i t'); // Returns: 'this is shit' * ``` */ declare function collapseSpacedCharacters(text: string): string; /** * Collapses repeated consecutive characters beyond a threshold. * Handles patterns like "fuuuuck" → "fuck" and "shiiiit" → "shit". * * @param text - The input text * @param maxRepeated - Maximum allowed consecutive repeated characters * @returns Text with repeated characters collapsed * * @example * ```typescript * collapseRepeatedCharacters('fuuuuck', 2); // Returns: 'fuuck' * collapseRepeatedCharacters('fuuuuck', 1); // Returns: 'fuck' * ``` */ declare function collapseRepeatedCharacters(text: string, maxRepeated?: number): string; /** * Detects if text contains potential leetspeak patterns. * Useful for deciding whether to apply leetspeak normalization. * * @param text - The input text to analyze * @returns True if leetspeak patterns are detected * * @example * ```typescript * containsLeetspeak('hello'); // Returns: false * containsLeetspeak('h3ll0'); // Returns: true * containsLeetspeak('f4ck'); // Returns: true * containsLeetspeak('@ss'); // Returns: true * ``` */ declare function containsLeetspeak(text: string): boolean; /** * Creates a normalized variant generator for a word. * Generates all possible leetspeak variants of a dictionary word. * * @param word - The base word to generate variants for * @param level - The leetspeak level to use for variant generation * @returns Array of possible leetspeak variants * * @example * ```typescript * generateLeetspeakVariants('ass'); * // Returns: ['ass', '@ss', 'a$$', '@$$', '4ss', '4$$', ...] * ``` */ declare function generateLeetspeakVariants(word: string, level?: LeetspeakLevel): string[]; /** * @fileoverview Unicode normalization utilities for profanity detection. * Handles homoglyphs, full-width characters, diacritics, and other Unicode tricks. * @module utils/unicode */ /** * Configuration options for Unicode normalization. */ interface UnicodeNormalizationOptions { /** * Apply NFKD normalization to decompose characters. * @default true */ nfkd?: boolean; /** * Convert homoglyphs (lookalike characters) to ASCII. * @default true */ homoglyphs?: boolean; /** * Convert full-width characters to half-width. * @default true */ fullWidth?: boolean; /** * Remove diacritical marks (accents, umlauts, etc.). * @default true */ removeDiacritics?: boolean; /** * Remove zero-width characters (ZWJ, ZWNJ, etc.). * @default true */ removeZeroWidth?: boolean; } /** * Normalizes Unicode text for consistent profanity detection. * Handles various Unicode tricks used to evade filters. * * @param text - The input text containing potential Unicode obfuscation * @param options - Configuration options for normalization * @returns The normalized text * * @example * ```typescript * import { normalizeUnicode } from 'glin-profanity'; * * normalizeUnicode('fυck'); // Returns: 'fuck' (Greek upsilon → u) * normalizeUnicode('fＵck'); // Returns: 'fuck' (full-width U → u) * normalizeUnicode('fück'); // Returns: 'fuck' (ü → u) * normalizeUnicode('fùck'); // Returns: 'fuck' (ù → u) * normalizeUnicode('fuck'); // Returns: 'fuck' (removes zero-width space) * ``` */ declare function normalizeUnicode(text: string, options?: UnicodeNormalizationOptions): string; /** * Removes zero-width and invisible characters from text. * * @param text - The input text * @returns Text with zero-width characters removed */ declare function removeZeroWidthCharacters(text: string): string; /** * Converts full-width ASCII characters to half-width. * Full-width characters (U+FF01 to U+FF5E) are used in CJK text * but can also be used to evade filters. * * @param text - The input text * @returns Text with full-width characters converted * * @example * ```typescript * convertFullWidth('ＡＢＣ'); // Returns: 'ABC' * convertFullWidth('ｆｕｃｋ'); // Returns: 'fuck' * ``` */ declare function convertFullWidth(text: string): string; /** * Converts homoglyph characters to their ASCII equivalents. * * @param text - The input text * @returns Text with homoglyphs converted */ declare function convertHomoglyphs(text: string): string; /** * Applies NFKD normalization and optionally removes diacritical marks. * NFKD decomposes characters into base characters and combining marks. * * @param text - The input text * @param removeDiacritics - Whether to remove diacritical marks * @returns Normalized text * * @example * ```typescript * normalizeNFKD('fück', true); // Returns: 'fuck' * normalizeNFKD('café', true); // Returns: 'cafe' * normalizeNFKD('naïve', true); // Returns: 'naive' * ``` */ declare function normalizeNFKD(text: string, removeDiacritics?: boolean): string; /** * Detects if text contains potential Unicode obfuscation. * Useful for deciding whether to apply Unicode normalization. * * @param text - The input text to analyze * @returns True if Unicode obfuscation patterns are detected * * @example * ```typescript * containsUnicodeObfuscation('hello'); // Returns: false * containsUnicodeObfuscation('fυck'); // Returns: true (Greek letter) * containsUnicodeObfuscation('fuck'); // Returns: true (zero-width) * ``` */ declare function containsUnicodeObfuscation(text: string): boolean; /** * Gets the character set being used in text. * Helps identify mixed-script attacks (e.g., mixing Latin and Cyrillic). * * @param text - The input text * @returns Object with detected character set information */ declare function detectCharacterSets(text: string): { hasLatin: boolean; hasCyrillic: boolean; hasGreek: boolean; hasFullWidth: boolean; hasMixed: boolean; }; export { type LeetspeakLevel, type LeetspeakOptions, ProfanityCheckResult, ProfanityCheckerConfig, type UnicodeNormalizationOptions, checkProfanity, checkProfanityAsync, collapseRepeatedCharacters, collapseSpacedCharacters, containsLeetspeak, containsUnicodeObfuscation, convertFullWidth, convertHomoglyphs, detectCharacterSets, generateLeetspeakVariants, isWordProfane, normalizeLeetspeak, normalizeNFKD, normalizeUnicode, removeZeroWidthCharacters };