glin-profanity

Version:

Glin-Profanity is a lightweight and efficient npm package designed to detect and filter profane language in text inputs across multiple languages. Whether you’re building a chat application, a comment section, or any platform where user-generated content

www.glincker.com/tools/glin-profanity

GLINCKER/glin-profanity

269 lines (265 loc) • 8.67 kB

text/typescript

import { F as FilterConfig, C as CheckProfanityResult } from '../types-B9c_ik4k.cjs'; export { L as Language } from '../types-B9c_ik4k.cjs'; /** * Semantic Analysis Hooks for glin-profanity * * Provides hooks and utilities for combining profanity detection with * semantic analysis using embeddings. Useful for advanced content moderation * that goes beyond keyword matching. * * @example * ```typescript * import { createSemanticAnalyzer } from 'glin-profanity/ai/semantic'; * * const analyzer = createSemanticAnalyzer({ * embeddingProvider: async (text) => { * // Your embedding provider (OpenAI, Cohere, etc.) * const response = await openai.embeddings.create({ * model: 'text-embedding-3-small', * input: text, * }); * return response.data[0].embedding; * }, * }); * * const result = await analyzer.analyze('This is a test message'); * console.log(result.combinedScore); // 0.0 - 1.0 * ``` * * @packageDocumentation * @module glin-profanity/ai/semantic */ /** * Embedding provider function type */ type EmbeddingProvider = (text: string) => Promise<number[]>; /** * Semantic analyzer configuration */ interface SemanticAnalyzerConfig { /** Function to generate embeddings for text */ embeddingProvider: EmbeddingProvider; /** Base filter configuration */ filterConfig?: Partial<FilterConfig>; /** Weight for keyword-based detection (0-1). Default: 0.6 */ keywordWeight?: number; /** Weight for semantic similarity (0-1). Default: 0.4 */ semanticWeight?: number; /** Threshold for flagging content (0-1). Default: 0.5 */ threshold?: number; /** Reference toxic content embeddings for comparison */ toxicReferenceEmbeddings?: number[][]; } /** * Semantic analysis result */ interface SemanticAnalysisResult { /** Combined moderation score (0-1, higher = more problematic) */ combinedScore: number; /** Keyword-based profanity score (0-1) */ keywordScore: number; /** Semantic similarity score to toxic content (0-1) */ semanticScore: number; /** Whether content should be flagged based on threshold */ shouldFlag: boolean; /** Detailed profanity check result */ profanityResult: CheckProfanityResult; /** Breakdown of scoring components */ breakdown: { profaneWordCount: number; averageSeverity: number; maxSemanticSimilarity: number; contextScore?: number; }; } /** * Creates a semantic analyzer that combines keyword-based profanity detection * with embedding-based semantic analysis. * * @example * ```typescript * import OpenAI from 'openai'; * import { createSemanticAnalyzer } from 'glin-profanity/ai/semantic'; * * const openai = new OpenAI(); * * const analyzer = createSemanticAnalyzer({ * embeddingProvider: async (text) => { * const response = await openai.embeddings.create({ * model: 'text-embedding-3-small', * input: text, * }); * return response.data[0].embedding; * }, * keywordWeight: 0.6, * semanticWeight: 0.4, * threshold: 0.5, * }); * * const result = await analyzer.analyze('Hello world'); * console.log(result.shouldFlag); // false * ``` */ declare function createSemanticAnalyzer(config: SemanticAnalyzerConfig): { /** * Analyze text for both keyword profanity and semantic toxicity */ analyze(text: string): Promise<SemanticAnalysisResult>; /** * Batch analyze multiple texts */ analyzeBatch(texts: string[]): Promise<SemanticAnalysisResult[]>; /** * Add custom toxic reference patterns */ addToxicPatterns(patterns: string[]): Promise<void>; /** * Clear cached toxic embeddings */ clearCache(): void; /** * Get current configuration */ getConfig(): { keywordWeight: number; semanticWeight: number; threshold: number; filterConfig: FilterConfig; }; }; /** * Hooks for integrating semantic analysis into application flows */ declare const semanticHooks: { /** * Pre-process hook for chat messages * * @example * ```typescript * const { shouldBlock, reason, sanitized } = await semanticHooks.preProcessMessage( * message, * analyzer, * { autoSanitize: true } * ); * ``` */ preProcessMessage(message: string, analyzer: ReturnType<typeof createSemanticAnalyzer>, options?: { autoSanitize?: boolean; threshold?: number; }): Promise<{ shouldBlock: boolean; reason: string; sanitized: string; analysis: SemanticAnalysisResult; }>; /** * Post-process hook for AI-generated content * * @example * ```typescript * const { isSafe, analysis } = await semanticHooks.postProcessAIResponse( * aiResponse, * analyzer * ); * ``` */ postProcessAIResponse(response: string, analyzer: ReturnType<typeof createSemanticAnalyzer>): Promise<{ isSafe: boolean; analysis: SemanticAnalysisResult; warnings: string[]; }>; /** * Conversation monitoring hook * * @example * ```typescript * const monitor = semanticHooks.createConversationMonitor(analyzer); * monitor.addMessage('user', 'Hello'); * monitor.addMessage('assistant', 'Hi there!'); * const report = await monitor.getReport(); * ``` */ createConversationMonitor(analyzer: ReturnType<typeof createSemanticAnalyzer>): { addMessage(role: string, content: string): Promise<SemanticAnalysisResult>; getMessages(): { role: string; content: string; timestamp: Date; }[]; getReport(): Promise<{ totalMessages: number; flaggedMessages: number; averageScore: number; isHealthy: boolean; flaggedIndices: number[]; }>; clear(): void; }; }; /** * Configuration for creating a fetch-based embedding provider */ interface FetchEmbeddingProviderConfig { /** API key for authentication (optional for local models) */ apiKey?: string; /** Model name or deployment name - REQUIRED, no defaults to stay model-agnostic */ model: string; /** Base URL for the API (default: https://api.openai.com/v1) */ baseUrl?: string; /** Endpoint path (default: /embeddings) */ endpoint?: string; /** Custom headers to include in requests */ headers?: Record<string, string>; /** Custom response parser - extracts embedding array from API response */ parseResponse?: (response: unknown) => number[]; } /** * Utility function to create an embedding provider using fetch * Works with any OpenAI-compatible API (OpenAI, Azure, Ollama, vLLM, etc.) * * @example * ```typescript * // OpenAI * const openaiProvider = createFetchEmbeddingProvider({ * apiKey: process.env.OPENAI_API_KEY, * model: process.env.EMBEDDING_MODEL || 'text-embedding-3-small', * }); * * // Azure OpenAI * const azureProvider = createFetchEmbeddingProvider({ * apiKey: process.env.AZURE_OPENAI_KEY, * model: process.env.AZURE_EMBEDDING_DEPLOYMENT, * baseUrl: `https://${process.env.AZURE_RESOURCE}.openai.azure.com/openai/deployments/${process.env.AZURE_EMBEDDING_DEPLOYMENT}`, * headers: { 'api-version': '2024-02-01' }, * }); * * // Local Ollama * const ollamaProvider = createFetchEmbeddingProvider({ * model: 'nomic-embed-text', * baseUrl: 'http://localhost:11434', * endpoint: '/api/embeddings', * parseResponse: (data) => (data as { embedding: number[] }).embedding, * }); * * // Cohere * const cohereProvider = createFetchEmbeddingProvider({ * apiKey: process.env.COHERE_API_KEY, * model: 'embed-english-v3.0', * baseUrl: 'https://api.cohere.ai/v1', * endpoint: '/embed', * parseResponse: (data) => (data as { embeddings: number[][] }).embeddings[0], * }); * * const analyzer = createSemanticAnalyzer({ embeddingProvider: openaiProvider }); * ``` */ declare function createFetchEmbeddingProvider(config: FetchEmbeddingProviderConfig): EmbeddingProvider; /** * @deprecated Use createFetchEmbeddingProvider instead for better flexibility */ declare function createOpenAIEmbeddingProvider(config: { apiKey: string; model: string; baseUrl?: string; }): EmbeddingProvider; export { CheckProfanityResult, type EmbeddingProvider, type FetchEmbeddingProviderConfig, FilterConfig, type SemanticAnalysisResult, type SemanticAnalyzerConfig, createFetchEmbeddingProvider, createOpenAIEmbeddingProvider, createSemanticAnalyzer, semanticHooks };