glin-profanity

Version:

Glin-Profanity is a lightweight and efficient npm package designed to detect and filter profane language in text inputs across multiple languages. Whether you’re building a chat application, a comment section, or any platform where user-generated content

www.glincker.com/tools/glin-profanity

GLINCKER/glin-profanity

256 lines (252 loc) • 7.29 kB

TypeScript

import { F as Filter } from '../Filter-BGcyIAvO.js'; import { L as Language, F as FilterConfig, C as CheckProfanityResult } from '../types-B9c_ik4k.js'; /** * Audio Pipeline Utilities for glin-profanity * * Provides utilities for checking profanity in audio content. * This module does NOT include speech-to-text - users bring their own * transcription from Whisper, Google STT, Azure Speech, etc. * * @example * ```typescript * import { createAudioPipeline } from 'glin-profanity/audio'; * import OpenAI from 'openai'; * * const openai = new OpenAI(); * const pipeline = createAudioPipeline({ * transcriber: async (audioBuffer) => { * const response = await openai.audio.transcriptions.create({ * file: audioBuffer, * model: 'whisper-1', * }); * return response.text; * }, * }); * * const result = await pipeline.checkAudio(audioFile); * console.log(result.containsProfanity); * ``` * * @packageDocumentation * @module glin-profanity/audio */ /** * Transcription function type * Users provide their own transcription implementation */ type TranscriberFunction = (audio: AudioInput) => Promise<string>; /** * Audio input types */ type AudioInput = Buffer | Uint8Array | Blob | File | string; /** * Audio pipeline configuration */ interface AudioPipelineConfig { /** Custom transcription function (REQUIRED) */ transcriber: TranscriberFunction; /** Languages for profanity detection */ languages?: Language[]; /** Enable leetspeak detection */ detectLeetspeak?: boolean; /** Enable Unicode normalization */ normalizeUnicode?: boolean; /** Custom filter configuration */ filterConfig?: Partial<FilterConfig>; } /** * Audio check result */ interface AudioCheckResult { /** Whether profanity was found */ containsProfanity: boolean; /** Transcribed text from audio */ transcribedText: string; /** Profane words found */ profaneWords: string[]; /** Full profanity check result */ profanityResult: CheckProfanityResult; /** Processing time in milliseconds */ processingTimeMs: number; /** Transcription time in milliseconds */ transcriptionTimeMs: number; /** Profanity check time in milliseconds */ checkTimeMs: number; } /** * Segment result for timestamped audio */ interface AudioSegmentResult { /** Segment index */ index: number; /** Start time in seconds */ startTime: number; /** End time in seconds */ endTime: number; /** Transcribed text for this segment */ text: string; /** Whether this segment contains profanity */ containsProfanity: boolean; /** Profane words in this segment */ profaneWords: string[]; } /** * Creates an audio profanity checking pipeline * * @example * ```typescript * // With OpenAI Whisper * const pipeline = createAudioPipeline({ * transcriber: async (audio) => { * const formData = new FormData(); * formData.append('file', audio); * formData.append('model', 'whisper-1'); * * const response = await fetch('https://api.openai.com/v1/audio/transcriptions', { * method: 'POST', * headers: { 'Authorization': `Bearer ${process.env.OPENAI_API_KEY}` }, * body: formData, * }); * const data = await response.json(); * return data.text; * }, * }); * * // With Google Cloud Speech-to-Text * const pipeline = createAudioPipeline({ * transcriber: async (audio) => { * // Your Google STT implementation * return transcribedText; * }, * }); * ``` */ declare function createAudioPipeline(config: AudioPipelineConfig): { /** * Check audio for profanity */ checkAudio(audio: AudioInput): Promise<AudioCheckResult>; /** * Check multiple audio files */ checkMultiple(audios: AudioInput[]): Promise<AudioCheckResult[]>; /** * Check pre-transcribed text (if you already have transcription) */ checkTranscript(text: string): CheckProfanityResult; /** * Check timestamped segments (for Whisper with timestamps) * * @example * ```typescript * const segments = [ * { startTime: 0, endTime: 5, text: 'Hello everyone' }, * { startTime: 5, endTime: 10, text: 'This is a test' }, * ]; * const results = pipeline.checkSegments(segments); * const flaggedSegments = results.filter(s => s.containsProfanity); * ``` */ checkSegments(segments: Array<{ startTime: number; endTime: number; text: string; }>): AudioSegmentResult[]; /** * Censor transcribed text */ censorTranscript(text: string, replacement?: string): string; /** * Get the underlying filter instance */ getFilter(): Filter; }; /** * Creates a transcriber function for OpenAI Whisper API * * @example * ```typescript * const transcriber = createWhisperTranscriber({ * apiKey: process.env.OPENAI_API_KEY, * model: 'whisper-1', * }); * * const pipeline = createAudioPipeline({ transcriber }); * ``` */ declare function createWhisperTranscriber(config: { apiKey: string; model?: string; baseUrl?: string; language?: string; }): TranscriberFunction; /** * Creates a transcriber function for Google Cloud Speech-to-Text * * @example * ```typescript * const transcriber = createGoogleSTTTranscriber({ * apiKey: process.env.GOOGLE_API_KEY, * languageCode: 'en-US', * }); * * const pipeline = createAudioPipeline({ transcriber }); * ``` */ declare function createGoogleSTTTranscriber(config: { apiKey: string; languageCode?: string; enableAutomaticPunctuation?: boolean; profanityFilter?: boolean; }): TranscriberFunction; /** * Real-time audio stream checker * For live audio moderation (e.g., voice chat, podcasts) * * @example * ```typescript * const streamChecker = createRealtimeChecker({ * transcriber: myTranscriber, * onProfanityDetected: (result) => { * console.log('Profanity detected:', result.profaneWords); * // Trigger beep, mute, or warning * }, * }); * * // Feed audio chunks as they arrive * audioStream.on('data', (chunk) => { * streamChecker.processChunk(chunk); * }); * * // Get summary when done * const summary = streamChecker.getSummary(); * ``` */ declare function createRealtimeChecker(config: { transcriber: TranscriberFunction; onProfanityDetected?: (result: AudioCheckResult) => void; bufferDurationMs?: number; languages?: Language[]; detectLeetspeak?: boolean; }): { /** * Process an audio chunk */ processChunk(chunk: AudioInput): Promise<AudioCheckResult>; /** * Get summary of all processed chunks */ getSummary(): { totalChunks: number; flaggedChunks: number; cleanChunks: number; flagRate: number; allProfaneWords: string[]; fullTranscript: string; }; /** * Reset the checker state */ reset(): void; }; export { type AudioCheckResult, type AudioInput, type AudioPipelineConfig, type AudioSegmentResult, CheckProfanityResult, FilterConfig, Language, type TranscriberFunction, createAudioPipeline, createGoogleSTTTranscriber, createRealtimeChecker, createWhisperTranscriber };