glin-profanity
Version:
Glin-Profanity is a lightweight and efficient npm package designed to detect and filter profane language in text inputs across multiple languages. Whether you’re building a chat application, a comment section, or any platform where user-generated content
233 lines (229 loc) • 6.8 kB
TypeScript
import { F as Filter } from '../Filter-BGcyIAvO.js';
import { F as FilterConfig, C as CheckProfanityResult } from '../types-B9c_ik4k.js';
export { L as Language } from '../types-B9c_ik4k.js';
/**
* Transformers.js ML Integration for glin-profanity
*
* Provides ML-based profanity detection using Hugging Face models
* via transformers.js. This is an optional enhancement that adds
* context-aware detection on top of the dictionary-based approach.
*
* @example
* ```typescript
* import { createMLChecker, createHybridChecker } from 'glin-profanity/ml/transformers';
*
* // ML-only checker
* const mlChecker = await createMLChecker({
* model: 'tarekziade/pardonmyai',
* });
* const result = await mlChecker.check('Some text to check');
*
* // Hybrid: Dictionary + ML (recommended)
* const hybridChecker = await createHybridChecker({
* model: 'tarekziade/pardonmyai',
* mlThreshold: 0.7, // Only use ML if dictionary is uncertain
* });
* const result = await hybridChecker.check('Some text to check');
* ```
*
* @packageDocumentation
* @module glin-profanity/ml/transformers
*/
/**
* ML checker configuration
*/
interface MLCheckerConfig {
/** Hugging Face model ID */
model?: string;
/** Confidence threshold (0-1) for flagging as profane */
threshold?: number;
/** Label that indicates profanity (model-specific) */
profaneLabel?: string;
/** Use quantized model for smaller size */
quantized?: boolean;
/** Device to run on ('cpu', 'webgpu', etc.) */
device?: string;
}
/**
* Hybrid checker configuration
*/
interface HybridCheckerConfig extends MLCheckerConfig {
/** Filter configuration for dictionary-based checking */
filterConfig?: Partial<FilterConfig>;
/** ML confidence threshold below which to use ML */
mlThreshold?: number;
/** Weight for dictionary score (0-1) */
dictionaryWeight?: number;
/** Weight for ML score (0-1) */
mlWeight?: number;
}
/**
* ML check result
*/
interface MLCheckResult {
/** Whether profanity was detected */
containsProfanity: boolean;
/** Confidence score (0-1) */
confidence: number;
/** Raw model output */
rawOutput: Array<{
label: string;
score: number;
}>;
/** Processing time in milliseconds */
processingTimeMs: number;
}
/**
* Hybrid check result
*/
interface HybridCheckResult {
/** Whether profanity was detected */
containsProfanity: boolean;
/** Combined confidence score (0-1) */
confidence: number;
/** Dictionary check result */
dictionaryResult: CheckProfanityResult;
/** ML check result (if used) */
mlResult?: MLCheckResult;
/** Whether ML was used */
usedML: boolean;
/** Profane words found (from dictionary) */
profaneWords: string[];
/** Processing time in milliseconds */
processingTimeMs: number;
}
/**
* Popular profanity detection models on Hugging Face
*/
declare const RECOMMENDED_MODELS: {
/** High accuracy English model (97.5%) - 67M params */
readonly pardonmyai: "tarekziade/pardonmyai";
/** Smaller version for constrained environments */
readonly pardonmyaiTiny: "tarekziade/pardonmyai-tiny";
/** Multilingual toxicity detection (7 languages) */
readonly toxicBert: "unitary/toxic-bert";
/** Offensive speech detector (DeBERTa-based) */
readonly offensiveSpeech: "KoalaAI/OffensiveSpeechDetector";
};
/**
* Creates an ML-based profanity checker using transformers.js
*
* @example
* ```typescript
* const checker = await createMLChecker({
* model: 'tarekziade/pardonmyai',
* threshold: 0.7,
* });
*
* const result = await checker.check('Hello world');
* console.log(result.containsProfanity); // false
* console.log(result.confidence); // 0.02
*
* // Batch check
* const results = await checker.checkBatch(['text1', 'text2', 'text3']);
*
* // Clean up
* checker.dispose();
* ```
*/
declare function createMLChecker(config?: MLCheckerConfig): Promise<{
/**
* Check a single text for profanity
*/
check(text: string): Promise<MLCheckResult>;
/**
* Check multiple texts
*/
checkBatch(texts: string[]): Promise<MLCheckResult[]>;
/**
* Get the profanity score for text (0-1)
*/
getScore(text: string): Promise<number>;
/**
* Get current configuration
*/
getConfig(): {
model: string;
threshold: number;
profaneLabel: string;
quantized: boolean;
device: string;
};
/**
* Dispose of the model (free memory)
*/
dispose(): void;
}>;
/**
* Creates a hybrid checker that combines dictionary + ML
*
* Strategy:
* 1. Dictionary check first (fast, ~1ms)
* 2. If dictionary finds profanity → flag immediately
* 3. If dictionary is clean but text is suspicious → use ML
* 4. Combine scores with configurable weights
*
* @example
* ```typescript
* const checker = await createHybridChecker({
* model: 'tarekziade/pardonmyai',
* filterConfig: { languages: ['english'], detectLeetspeak: true },
* mlThreshold: 0.6,
* dictionaryWeight: 0.6,
* mlWeight: 0.4,
* });
*
* const result = await checker.check('Hello world');
* console.log(result.containsProfanity);
* console.log(result.usedML); // true if ML was invoked
*
* // Clean up
* await checker.dispose();
* ```
*/
declare function createHybridChecker(config?: HybridCheckerConfig): Promise<{
/**
* Check text using hybrid approach
*/
check(text: string): Promise<HybridCheckResult>;
/**
* Check multiple texts
*/
checkBatch(texts: string[]): Promise<HybridCheckResult[]>;
/**
* Dictionary-only check (fast, no ML)
*/
checkFast(text: string): CheckProfanityResult;
/**
* ML-only check (slower, more accurate)
*/
checkML(text: string): Promise<MLCheckResult>;
/**
* Get the underlying filter
*/
getFilter(): Filter;
/**
* Dispose of resources
*/
dispose(): Promise<void>;
}>;
/**
* Check if transformers.js is available
*/
declare function isTransformersAvailable(): Promise<boolean>;
/**
* Pre-download a model for faster first inference
*
* @example
* ```typescript
* // Pre-load during app initialization
* await preloadModel('tarekziade/pardonmyai');
*
* // Later, checker will start faster
* const checker = await createMLChecker({ model: 'tarekziade/pardonmyai' });
* ```
*/
declare function preloadModel(model?: string, options?: {
quantized?: boolean;
}): Promise<void>;
export { CheckProfanityResult, FilterConfig, type HybridCheckResult, type HybridCheckerConfig, type MLCheckResult, type MLCheckerConfig, RECOMMENDED_MODELS, createHybridChecker, createMLChecker, isTransformersAvailable, preloadModel };