glin-profanity

Version:

Glin-Profanity is a lightweight and efficient npm package designed to detect and filter profane language in text inputs across multiple languages. Whether you’re building a chat application, a comment section, or any platform where user-generated content

www.glincker.com/tools/glin-profanity

GLINCKER/glin-profanity

361 lines (355 loc) • 10.9 kB

text/typescript

import { T as ToxicityLabel, M as MLDetectorConfig, a as MLAnalysisResult, H as HybridAnalysisResult } from '../types-Ct_ueYqw.cjs'; export { b as ToxicityPrediction } from '../types-Ct_ueYqw.cjs'; import { F as Filter } from '../Filter-D34Wsmrj.cjs'; import { F as FilterConfig, C as CheckProfanityResult } from '../types-B9c_ik4k.cjs'; export { HybridCheckResult, HybridCheckerConfig, MLCheckResult, MLCheckerConfig, RECOMMENDED_MODELS, createHybridChecker, createMLChecker, isTransformersAvailable, preloadModel } from './transformers.cjs'; /** * ML-based toxicity detection using TensorFlow.js. * * This module provides optional ML-based profanity/toxicity detection * using the TensorFlow.js toxicity model trained on the civil comments dataset. * * IMPORTANT: This requires optional peer dependencies: * - @tensorflow/tfjs * - @tensorflow-models/toxicity * * Install with: npm install @tensorflow/tfjs @tensorflow-models/toxicity * * @example * ```typescript * import { ToxicityDetector } from 'glin-profanity/ml'; * * const detector = new ToxicityDetector({ threshold: 0.9 }); * await detector.loadModel(); * * const result = await detector.analyze('some text to check'); * console.log(result.isToxic); * ``` */ interface ToxicityModelPrediction { label: string; results: Array<{ match: boolean | null; probabilities: Float32Array | number[]; }>; } interface ToxicityModel { classify(sentences: string[]): Promise<ToxicityModelPrediction[]>; } /** * ML-based toxicity detector using TensorFlow.js. * * This class provides neural network-based toxicity detection that can * identify various types of harmful content including insults, threats, * identity attacks, and obscenity. * * The model is loaded lazily and cached for subsequent calls. */ declare class ToxicityDetector { private model; private loadingPromise; private config; private isAvailable; /** * All available toxicity labels. */ static readonly ALL_LABELS: ToxicityLabel[]; /** * Creates a new ToxicityDetector instance. * * @param config - Configuration options * * @example * ```typescript * // Basic usage with default threshold (0.85) * const detector = new ToxicityDetector(); * * // Custom threshold for higher precision * const strictDetector = new ToxicityDetector({ threshold: 0.95 }); * * // Check only specific categories * const customDetector = new ToxicityDetector({ * threshold: 0.8, * labels: ['insult', 'threat', 'obscene'], * }); * ``` */ constructor(config?: MLDetectorConfig); /** * Dynamic import wrapper to avoid TypeScript static analysis issues. * Uses Function constructor to bypass module resolution at compile time. * @internal */ private dynamicImport; /** * Checks if TensorFlow.js and the toxicity model are available. * This performs a lazy check on first call and caches the result. * * @returns True if ML dependencies are available */ checkAvailability(): Promise<boolean>; /** * Loads the toxicity model. * This is called automatically on first analyze() call if not called explicitly. * * @returns The loaded model * @throws Error if TensorFlow.js dependencies are not installed * * @example * ```typescript * const detector = new ToxicityDetector(); * * // Explicitly preload model (optional) * await detector.loadModel(); * * // Or let it load automatically on first use * const result = await detector.analyze('text'); * ``` */ loadModel(): Promise<ToxicityModel>; private doLoadModel; /** * Analyzes text for toxicity using the ML model. * * @param text - Text to analyze * @returns Analysis result with predictions and scores * * @example * ```typescript * const detector = new ToxicityDetector(); * const result = await detector.analyze('you are stupid'); * * console.log(result.isToxic); // true * console.log(result.overallScore); // 0.92 * console.log(result.matchedCategories); // ['insult', 'toxicity'] * ``` */ analyze(text: string): Promise<MLAnalysisResult>; /** * Analyzes multiple texts in a batch for better performance. * * @param texts - Array of texts to analyze * @returns Array of analysis results * * @example * ```typescript * const detector = new ToxicityDetector(); * const results = await detector.analyzeBatch([ * 'hello friend', * 'you are terrible', * 'great work!', * ]); * * results.forEach((result, i) => { * console.log(`Text ${i}: ${result.isToxic ? 'toxic' : 'clean'}`); * }); * ``` */ analyzeBatch(texts: string[]): Promise<MLAnalysisResult[]>; /** * Simple boolean check for toxicity. * * @param text - Text to check * @returns True if text is detected as toxic * * @example * ```typescript * const detector = new ToxicityDetector(); * * if (await detector.isToxic('some user input')) { * console.log('Content flagged as toxic'); * } * ``` */ isToxic(text: string): Promise<boolean>; /** * Gets the toxicity score for text (0-1). * * @param text - Text to score * @returns Toxicity score from 0 (clean) to 1 (highly toxic) */ getScore(text: string): Promise<number>; /** * Disposes of the model to free memory. * The model will be reloaded on next analyze() call. */ dispose(): void; /** * Gets the current configuration. */ getConfig(): Required<MLDetectorConfig>; /** * Checks if the model is currently loaded. */ isModelLoaded(): boolean; } /** * Hybrid filter combining rule-based and ML-based detection. * * This class provides the best of both worlds: * - Fast rule-based detection for common profanity * - ML-based detection for contextual toxicity * * @example * ```typescript * import { HybridFilter } from 'glin-profanity/ml'; * * const filter = new HybridFilter({ * // Rule-based config * languages: ['english'], * detectLeetspeak: true, * // ML config * enableML: true, * mlThreshold: 0.85, * }); * * await filter.initialize(); * * const result = await filter.checkProfanityAsync('some text'); * console.log(result.isToxic); * ``` */ /** * Hybrid filter configuration. */ interface HybridFilterConfig extends FilterConfig { /** * Enable ML-based detection. * Requires @tensorflow/tfjs and @tensorflow-models/toxicity. * @default false */ enableML?: boolean; /** * ML confidence threshold. * @default 0.85 */ mlThreshold?: number; /** * Specific ML toxicity categories to check. */ mlLabels?: ToxicityLabel[]; /** * Preload ML model on initialization. * @default false */ preloadML?: boolean; /** * How to combine rule-based and ML results. * - 'or': Flag if either method detects toxicity (more sensitive) * - 'and': Flag only if both methods detect toxicity (more precise) * - 'ml-override': Use ML result if available, fallback to rules * - 'rules-first': Use rules for speed, ML for borderline cases * @default 'or' */ combinationMode?: 'or' | 'and' | 'ml-override' | 'rules-first'; /** * Score threshold for "borderline" cases in rules-first mode. * If rule-based detection is uncertain (near this threshold), * ML will be used for confirmation. * @default 0.5 */ borderlineThreshold?: number; } /** * Hybrid profanity filter combining rule-based and ML detection. */ declare class HybridFilter { private ruleFilter; private mlDetector; private config; private mlInitialized; /** * Creates a new HybridFilter instance. * * @param config - Configuration options */ constructor(config?: HybridFilterConfig); /** * Initializes the hybrid filter, loading the ML model if enabled. * Call this before using async methods for best performance. * * @example * ```typescript * const filter = new HybridFilter({ enableML: true }); * await filter.initialize(); * // Now ready for fast async checks * ``` */ initialize(): Promise<void>; /** * Checks if ML is available and initialized. */ isMLReady(): boolean; /** * Synchronous profanity check using only rule-based detection. * Use this for fast, synchronous checks when ML isn't needed. * * @param text - Text to check * @returns True if profanity detected */ isProfane(text: string): boolean; /** * Synchronous detailed check using only rule-based detection. * * @param text - Text to check * @returns Detailed profanity check result */ checkProfanity(text: string): CheckProfanityResult; /** * Async profanity check using both rule-based and ML detection. * * @param text - Text to check * @returns Combined analysis result * * @example * ```typescript * const filter = new HybridFilter({ * enableML: true, * combinationMode: 'or', * }); * await filter.initialize(); * * const result = await filter.checkProfanityAsync('some text'); * if (result.isToxic) { * console.log('Reason:', result.reason); * console.log('Confidence:', result.confidence); * } * ``` */ checkProfanityAsync(text: string): Promise<HybridAnalysisResult>; /** * Simple async boolean check for toxicity. * * @param text - Text to check * @returns True if toxic */ isToxicAsync(text: string): Promise<boolean>; /** * Analyzes text with ML only (if available). * * @param text - Text to analyze * @returns ML analysis result or null if ML not available */ analyzeWithML(text: string): Promise<MLAnalysisResult | null>; /** * Batch analysis for multiple texts. * * @param texts - Array of texts to analyze * @returns Array of hybrid analysis results */ checkProfanityBatchAsync(texts: string[]): Promise<HybridAnalysisResult[]>; private combineResults; /** * Gets the underlying rule-based filter. */ getRuleFilter(): Filter; /** * Gets the underlying ML detector (if enabled). */ getMLDetector(): ToxicityDetector | null; /** * Disposes of resources (ML model). */ dispose(): void; } export { HybridAnalysisResult, HybridFilter, type HybridFilterConfig, MLAnalysisResult, MLDetectorConfig, ToxicityDetector, ToxicityLabel };