glin-profanity
Version:
Glin-Profanity is a lightweight and efficient npm package designed to detect and filter profane language in text inputs across multiple languages. Whether you’re building a chat application, a comment section, or any platform where user-generated content
269 lines (265 loc) • 8.67 kB
text/typescript
import { F as FilterConfig, C as CheckProfanityResult } from '../types-B9c_ik4k.cjs';
export { L as Language } from '../types-B9c_ik4k.cjs';
/**
* Semantic Analysis Hooks for glin-profanity
*
* Provides hooks and utilities for combining profanity detection with
* semantic analysis using embeddings. Useful for advanced content moderation
* that goes beyond keyword matching.
*
* @example
* ```typescript
* import { createSemanticAnalyzer } from 'glin-profanity/ai/semantic';
*
* const analyzer = createSemanticAnalyzer({
* embeddingProvider: async (text) => {
* // Your embedding provider (OpenAI, Cohere, etc.)
* const response = await openai.embeddings.create({
* model: 'text-embedding-3-small',
* input: text,
* });
* return response.data[0].embedding;
* },
* });
*
* const result = await analyzer.analyze('This is a test message');
* console.log(result.combinedScore); // 0.0 - 1.0
* ```
*
* @packageDocumentation
* @module glin-profanity/ai/semantic
*/
/**
* Embedding provider function type
*/
type EmbeddingProvider = (text: string) => Promise<number[]>;
/**
* Semantic analyzer configuration
*/
interface SemanticAnalyzerConfig {
/** Function to generate embeddings for text */
embeddingProvider: EmbeddingProvider;
/** Base filter configuration */
filterConfig?: Partial<FilterConfig>;
/** Weight for keyword-based detection (0-1). Default: 0.6 */
keywordWeight?: number;
/** Weight for semantic similarity (0-1). Default: 0.4 */
semanticWeight?: number;
/** Threshold for flagging content (0-1). Default: 0.5 */
threshold?: number;
/** Reference toxic content embeddings for comparison */
toxicReferenceEmbeddings?: number[][];
}
/**
* Semantic analysis result
*/
interface SemanticAnalysisResult {
/** Combined moderation score (0-1, higher = more problematic) */
combinedScore: number;
/** Keyword-based profanity score (0-1) */
keywordScore: number;
/** Semantic similarity score to toxic content (0-1) */
semanticScore: number;
/** Whether content should be flagged based on threshold */
shouldFlag: boolean;
/** Detailed profanity check result */
profanityResult: CheckProfanityResult;
/** Breakdown of scoring components */
breakdown: {
profaneWordCount: number;
averageSeverity: number;
maxSemanticSimilarity: number;
contextScore?: number;
};
}
/**
* Creates a semantic analyzer that combines keyword-based profanity detection
* with embedding-based semantic analysis.
*
* @example
* ```typescript
* import OpenAI from 'openai';
* import { createSemanticAnalyzer } from 'glin-profanity/ai/semantic';
*
* const openai = new OpenAI();
*
* const analyzer = createSemanticAnalyzer({
* embeddingProvider: async (text) => {
* const response = await openai.embeddings.create({
* model: 'text-embedding-3-small',
* input: text,
* });
* return response.data[0].embedding;
* },
* keywordWeight: 0.6,
* semanticWeight: 0.4,
* threshold: 0.5,
* });
*
* const result = await analyzer.analyze('Hello world');
* console.log(result.shouldFlag); // false
* ```
*/
declare function createSemanticAnalyzer(config: SemanticAnalyzerConfig): {
/**
* Analyze text for both keyword profanity and semantic toxicity
*/
analyze(text: string): Promise<SemanticAnalysisResult>;
/**
* Batch analyze multiple texts
*/
analyzeBatch(texts: string[]): Promise<SemanticAnalysisResult[]>;
/**
* Add custom toxic reference patterns
*/
addToxicPatterns(patterns: string[]): Promise<void>;
/**
* Clear cached toxic embeddings
*/
clearCache(): void;
/**
* Get current configuration
*/
getConfig(): {
keywordWeight: number;
semanticWeight: number;
threshold: number;
filterConfig: FilterConfig;
};
};
/**
* Hooks for integrating semantic analysis into application flows
*/
declare const semanticHooks: {
/**
* Pre-process hook for chat messages
*
* @example
* ```typescript
* const { shouldBlock, reason, sanitized } = await semanticHooks.preProcessMessage(
* message,
* analyzer,
* { autoSanitize: true }
* );
* ```
*/
preProcessMessage(message: string, analyzer: ReturnType<typeof createSemanticAnalyzer>, options?: {
autoSanitize?: boolean;
threshold?: number;
}): Promise<{
shouldBlock: boolean;
reason: string;
sanitized: string;
analysis: SemanticAnalysisResult;
}>;
/**
* Post-process hook for AI-generated content
*
* @example
* ```typescript
* const { isSafe, analysis } = await semanticHooks.postProcessAIResponse(
* aiResponse,
* analyzer
* );
* ```
*/
postProcessAIResponse(response: string, analyzer: ReturnType<typeof createSemanticAnalyzer>): Promise<{
isSafe: boolean;
analysis: SemanticAnalysisResult;
warnings: string[];
}>;
/**
* Conversation monitoring hook
*
* @example
* ```typescript
* const monitor = semanticHooks.createConversationMonitor(analyzer);
* monitor.addMessage('user', 'Hello');
* monitor.addMessage('assistant', 'Hi there!');
* const report = await monitor.getReport();
* ```
*/
createConversationMonitor(analyzer: ReturnType<typeof createSemanticAnalyzer>): {
addMessage(role: string, content: string): Promise<SemanticAnalysisResult>;
getMessages(): {
role: string;
content: string;
timestamp: Date;
}[];
getReport(): Promise<{
totalMessages: number;
flaggedMessages: number;
averageScore: number;
isHealthy: boolean;
flaggedIndices: number[];
}>;
clear(): void;
};
};
/**
* Configuration for creating a fetch-based embedding provider
*/
interface FetchEmbeddingProviderConfig {
/** API key for authentication (optional for local models) */
apiKey?: string;
/** Model name or deployment name - REQUIRED, no defaults to stay model-agnostic */
model: string;
/** Base URL for the API (default: https://api.openai.com/v1) */
baseUrl?: string;
/** Endpoint path (default: /embeddings) */
endpoint?: string;
/** Custom headers to include in requests */
headers?: Record<string, string>;
/** Custom response parser - extracts embedding array from API response */
parseResponse?: (response: unknown) => number[];
}
/**
* Utility function to create an embedding provider using fetch
* Works with any OpenAI-compatible API (OpenAI, Azure, Ollama, vLLM, etc.)
*
* @example
* ```typescript
* // OpenAI
* const openaiProvider = createFetchEmbeddingProvider({
* apiKey: process.env.OPENAI_API_KEY,
* model: process.env.EMBEDDING_MODEL || 'text-embedding-3-small',
* });
*
* // Azure OpenAI
* const azureProvider = createFetchEmbeddingProvider({
* apiKey: process.env.AZURE_OPENAI_KEY,
* model: process.env.AZURE_EMBEDDING_DEPLOYMENT,
* baseUrl: `https://${process.env.AZURE_RESOURCE}.openai.azure.com/openai/deployments/${process.env.AZURE_EMBEDDING_DEPLOYMENT}`,
* headers: { 'api-version': '2024-02-01' },
* });
*
* // Local Ollama
* const ollamaProvider = createFetchEmbeddingProvider({
* model: 'nomic-embed-text',
* baseUrl: 'http://localhost:11434',
* endpoint: '/api/embeddings',
* parseResponse: (data) => (data as { embedding: number[] }).embedding,
* });
*
* // Cohere
* const cohereProvider = createFetchEmbeddingProvider({
* apiKey: process.env.COHERE_API_KEY,
* model: 'embed-english-v3.0',
* baseUrl: 'https://api.cohere.ai/v1',
* endpoint: '/embed',
* parseResponse: (data) => (data as { embeddings: number[][] }).embeddings[0],
* });
*
* const analyzer = createSemanticAnalyzer({ embeddingProvider: openaiProvider });
* ```
*/
declare function createFetchEmbeddingProvider(config: FetchEmbeddingProviderConfig): EmbeddingProvider;
/**
* @deprecated Use createFetchEmbeddingProvider instead for better flexibility
*/
declare function createOpenAIEmbeddingProvider(config: {
apiKey: string;
model: string;
baseUrl?: string;
}): EmbeddingProvider;
export { CheckProfanityResult, type EmbeddingProvider, type FetchEmbeddingProviderConfig, FilterConfig, type SemanticAnalysisResult, type SemanticAnalyzerConfig, createFetchEmbeddingProvider, createOpenAIEmbeddingProvider, createSemanticAnalyzer, semanticHooks };