glin-profanity
Version:
Glin-Profanity is a lightweight and efficient npm package designed to detect and filter profane language in text inputs across multiple languages. Whether you’re building a chat application, a comment section, or any platform where user-generated content
256 lines (252 loc) • 7.29 kB
TypeScript
import { F as Filter } from '../Filter-BGcyIAvO.js';
import { L as Language, F as FilterConfig, C as CheckProfanityResult } from '../types-B9c_ik4k.js';
/**
* Audio Pipeline Utilities for glin-profanity
*
* Provides utilities for checking profanity in audio content.
* This module does NOT include speech-to-text - users bring their own
* transcription from Whisper, Google STT, Azure Speech, etc.
*
* @example
* ```typescript
* import { createAudioPipeline } from 'glin-profanity/audio';
* import OpenAI from 'openai';
*
* const openai = new OpenAI();
* const pipeline = createAudioPipeline({
* transcriber: async (audioBuffer) => {
* const response = await openai.audio.transcriptions.create({
* file: audioBuffer,
* model: 'whisper-1',
* });
* return response.text;
* },
* });
*
* const result = await pipeline.checkAudio(audioFile);
* console.log(result.containsProfanity);
* ```
*
* @packageDocumentation
* @module glin-profanity/audio
*/
/**
* Transcription function type
* Users provide their own transcription implementation
*/
type TranscriberFunction = (audio: AudioInput) => Promise<string>;
/**
* Audio input types
*/
type AudioInput = Buffer | Uint8Array | Blob | File | string;
/**
* Audio pipeline configuration
*/
interface AudioPipelineConfig {
/** Custom transcription function (REQUIRED) */
transcriber: TranscriberFunction;
/** Languages for profanity detection */
languages?: Language[];
/** Enable leetspeak detection */
detectLeetspeak?: boolean;
/** Enable Unicode normalization */
normalizeUnicode?: boolean;
/** Custom filter configuration */
filterConfig?: Partial<FilterConfig>;
}
/**
* Audio check result
*/
interface AudioCheckResult {
/** Whether profanity was found */
containsProfanity: boolean;
/** Transcribed text from audio */
transcribedText: string;
/** Profane words found */
profaneWords: string[];
/** Full profanity check result */
profanityResult: CheckProfanityResult;
/** Processing time in milliseconds */
processingTimeMs: number;
/** Transcription time in milliseconds */
transcriptionTimeMs: number;
/** Profanity check time in milliseconds */
checkTimeMs: number;
}
/**
* Segment result for timestamped audio
*/
interface AudioSegmentResult {
/** Segment index */
index: number;
/** Start time in seconds */
startTime: number;
/** End time in seconds */
endTime: number;
/** Transcribed text for this segment */
text: string;
/** Whether this segment contains profanity */
containsProfanity: boolean;
/** Profane words in this segment */
profaneWords: string[];
}
/**
* Creates an audio profanity checking pipeline
*
* @example
* ```typescript
* // With OpenAI Whisper
* const pipeline = createAudioPipeline({
* transcriber: async (audio) => {
* const formData = new FormData();
* formData.append('file', audio);
* formData.append('model', 'whisper-1');
*
* const response = await fetch('https://api.openai.com/v1/audio/transcriptions', {
* method: 'POST',
* headers: { 'Authorization': `Bearer ${process.env.OPENAI_API_KEY}` },
* body: formData,
* });
* const data = await response.json();
* return data.text;
* },
* });
*
* // With Google Cloud Speech-to-Text
* const pipeline = createAudioPipeline({
* transcriber: async (audio) => {
* // Your Google STT implementation
* return transcribedText;
* },
* });
* ```
*/
declare function createAudioPipeline(config: AudioPipelineConfig): {
/**
* Check audio for profanity
*/
checkAudio(audio: AudioInput): Promise<AudioCheckResult>;
/**
* Check multiple audio files
*/
checkMultiple(audios: AudioInput[]): Promise<AudioCheckResult[]>;
/**
* Check pre-transcribed text (if you already have transcription)
*/
checkTranscript(text: string): CheckProfanityResult;
/**
* Check timestamped segments (for Whisper with timestamps)
*
* @example
* ```typescript
* const segments = [
* { startTime: 0, endTime: 5, text: 'Hello everyone' },
* { startTime: 5, endTime: 10, text: 'This is a test' },
* ];
* const results = pipeline.checkSegments(segments);
* const flaggedSegments = results.filter(s => s.containsProfanity);
* ```
*/
checkSegments(segments: Array<{
startTime: number;
endTime: number;
text: string;
}>): AudioSegmentResult[];
/**
* Censor transcribed text
*/
censorTranscript(text: string, replacement?: string): string;
/**
* Get the underlying filter instance
*/
getFilter(): Filter;
};
/**
* Creates a transcriber function for OpenAI Whisper API
*
* @example
* ```typescript
* const transcriber = createWhisperTranscriber({
* apiKey: process.env.OPENAI_API_KEY,
* model: 'whisper-1',
* });
*
* const pipeline = createAudioPipeline({ transcriber });
* ```
*/
declare function createWhisperTranscriber(config: {
apiKey: string;
model?: string;
baseUrl?: string;
language?: string;
}): TranscriberFunction;
/**
* Creates a transcriber function for Google Cloud Speech-to-Text
*
* @example
* ```typescript
* const transcriber = createGoogleSTTTranscriber({
* apiKey: process.env.GOOGLE_API_KEY,
* languageCode: 'en-US',
* });
*
* const pipeline = createAudioPipeline({ transcriber });
* ```
*/
declare function createGoogleSTTTranscriber(config: {
apiKey: string;
languageCode?: string;
enableAutomaticPunctuation?: boolean;
profanityFilter?: boolean;
}): TranscriberFunction;
/**
* Real-time audio stream checker
* For live audio moderation (e.g., voice chat, podcasts)
*
* @example
* ```typescript
* const streamChecker = createRealtimeChecker({
* transcriber: myTranscriber,
* onProfanityDetected: (result) => {
* console.log('Profanity detected:', result.profaneWords);
* // Trigger beep, mute, or warning
* },
* });
*
* // Feed audio chunks as they arrive
* audioStream.on('data', (chunk) => {
* streamChecker.processChunk(chunk);
* });
*
* // Get summary when done
* const summary = streamChecker.getSummary();
* ```
*/
declare function createRealtimeChecker(config: {
transcriber: TranscriberFunction;
onProfanityDetected?: (result: AudioCheckResult) => void;
bufferDurationMs?: number;
languages?: Language[];
detectLeetspeak?: boolean;
}): {
/**
* Process an audio chunk
*/
processChunk(chunk: AudioInput): Promise<AudioCheckResult>;
/**
* Get summary of all processed chunks
*/
getSummary(): {
totalChunks: number;
flaggedChunks: number;
cleanChunks: number;
flagRate: number;
allProfaneWords: string[];
fullTranscript: string;
};
/**
* Reset the checker state
*/
reset(): void;
};
export { type AudioCheckResult, type AudioInput, type AudioPipelineConfig, type AudioSegmentResult, CheckProfanityResult, FilterConfig, Language, type TranscriberFunction, createAudioPipeline, createGoogleSTTTranscriber, createRealtimeChecker, createWhisperTranscriber };