glin-profanity

Version:

Glin-Profanity is a lightweight and efficient npm package designed to detect and filter profane language in text inputs across multiple languages. Whether you’re building a chat application, a comment section, or any platform where user-generated content

www.glincker.com/tools/glin-profanity

GLINCKER/glin-profanity

233 lines (229 loc) • 6.8 kB

TypeScript

import { F as Filter } from '../Filter-BGcyIAvO.js'; import { F as FilterConfig, C as CheckProfanityResult } from '../types-B9c_ik4k.js'; export { L as Language } from '../types-B9c_ik4k.js'; /** * Transformers.js ML Integration for glin-profanity * * Provides ML-based profanity detection using Hugging Face models * via transformers.js. This is an optional enhancement that adds * context-aware detection on top of the dictionary-based approach. * * @example * ```typescript * import { createMLChecker, createHybridChecker } from 'glin-profanity/ml/transformers'; * * // ML-only checker * const mlChecker = await createMLChecker({ * model: 'tarekziade/pardonmyai', * }); * const result = await mlChecker.check('Some text to check'); * * // Hybrid: Dictionary + ML (recommended) * const hybridChecker = await createHybridChecker({ * model: 'tarekziade/pardonmyai', * mlThreshold: 0.7, // Only use ML if dictionary is uncertain * }); * const result = await hybridChecker.check('Some text to check'); * ``` * * @packageDocumentation * @module glin-profanity/ml/transformers */ /** * ML checker configuration */ interface MLCheckerConfig { /** Hugging Face model ID */ model?: string; /** Confidence threshold (0-1) for flagging as profane */ threshold?: number; /** Label that indicates profanity (model-specific) */ profaneLabel?: string; /** Use quantized model for smaller size */ quantized?: boolean; /** Device to run on ('cpu', 'webgpu', etc.) */ device?: string; } /** * Hybrid checker configuration */ interface HybridCheckerConfig extends MLCheckerConfig { /** Filter configuration for dictionary-based checking */ filterConfig?: Partial<FilterConfig>; /** ML confidence threshold below which to use ML */ mlThreshold?: number; /** Weight for dictionary score (0-1) */ dictionaryWeight?: number; /** Weight for ML score (0-1) */ mlWeight?: number; } /** * ML check result */ interface MLCheckResult { /** Whether profanity was detected */ containsProfanity: boolean; /** Confidence score (0-1) */ confidence: number; /** Raw model output */ rawOutput: Array<{ label: string; score: number; }>; /** Processing time in milliseconds */ processingTimeMs: number; } /** * Hybrid check result */ interface HybridCheckResult { /** Whether profanity was detected */ containsProfanity: boolean; /** Combined confidence score (0-1) */ confidence: number; /** Dictionary check result */ dictionaryResult: CheckProfanityResult; /** ML check result (if used) */ mlResult?: MLCheckResult; /** Whether ML was used */ usedML: boolean; /** Profane words found (from dictionary) */ profaneWords: string[]; /** Processing time in milliseconds */ processingTimeMs: number; } /** * Popular profanity detection models on Hugging Face */ declare const RECOMMENDED_MODELS: { /** High accuracy English model (97.5%) - 67M params */ readonly pardonmyai: "tarekziade/pardonmyai"; /** Smaller version for constrained environments */ readonly pardonmyaiTiny: "tarekziade/pardonmyai-tiny"; /** Multilingual toxicity detection (7 languages) */ readonly toxicBert: "unitary/toxic-bert"; /** Offensive speech detector (DeBERTa-based) */ readonly offensiveSpeech: "KoalaAI/OffensiveSpeechDetector"; }; /** * Creates an ML-based profanity checker using transformers.js * * @example * ```typescript * const checker = await createMLChecker({ * model: 'tarekziade/pardonmyai', * threshold: 0.7, * }); * * const result = await checker.check('Hello world'); * console.log(result.containsProfanity); // false * console.log(result.confidence); // 0.02 * * // Batch check * const results = await checker.checkBatch(['text1', 'text2', 'text3']); * * // Clean up * checker.dispose(); * ``` */ declare function createMLChecker(config?: MLCheckerConfig): Promise<{ /** * Check a single text for profanity */ check(text: string): Promise<MLCheckResult>; /** * Check multiple texts */ checkBatch(texts: string[]): Promise<MLCheckResult[]>; /** * Get the profanity score for text (0-1) */ getScore(text: string): Promise<number>; /** * Get current configuration */ getConfig(): { model: string; threshold: number; profaneLabel: string; quantized: boolean; device: string; }; /** * Dispose of the model (free memory) */ dispose(): void; }>; /** * Creates a hybrid checker that combines dictionary + ML * * Strategy: * 1. Dictionary check first (fast, ~1ms) * 2. If dictionary finds profanity → flag immediately * 3. If dictionary is clean but text is suspicious → use ML * 4. Combine scores with configurable weights * * @example * ```typescript * const checker = await createHybridChecker({ * model: 'tarekziade/pardonmyai', * filterConfig: { languages: ['english'], detectLeetspeak: true }, * mlThreshold: 0.6, * dictionaryWeight: 0.6, * mlWeight: 0.4, * }); * * const result = await checker.check('Hello world'); * console.log(result.containsProfanity); * console.log(result.usedML); // true if ML was invoked * * // Clean up * await checker.dispose(); * ``` */ declare function createHybridChecker(config?: HybridCheckerConfig): Promise<{ /** * Check text using hybrid approach */ check(text: string): Promise<HybridCheckResult>; /** * Check multiple texts */ checkBatch(texts: string[]): Promise<HybridCheckResult[]>; /** * Dictionary-only check (fast, no ML) */ checkFast(text: string): CheckProfanityResult; /** * ML-only check (slower, more accurate) */ checkML(text: string): Promise<MLCheckResult>; /** * Get the underlying filter */ getFilter(): Filter; /** * Dispose of resources */ dispose(): Promise<void>; }>; /** * Check if transformers.js is available */ declare function isTransformersAvailable(): Promise<boolean>; /** * Pre-download a model for faster first inference * * @example * ```typescript * // Pre-load during app initialization * await preloadModel('tarekziade/pardonmyai'); * * // Later, checker will start faster * const checker = await createMLChecker({ model: 'tarekziade/pardonmyai' }); * ``` */ declare function preloadModel(model?: string, options?: { quantized?: boolean; }): Promise<void>; export { CheckProfanityResult, FilterConfig, type HybridCheckResult, type HybridCheckerConfig, type MLCheckResult, type MLCheckerConfig, RECOMMENDED_MODELS, createHybridChecker, createMLChecker, isTransformersAvailable, preloadModel };