allprofanity
Version:
A blazing-fast, multi-language profanity filter with advanced algorithms (Aho-Corasick, Bloom Filters) delivering 664% faster performance on large texts, intelligent leet-speak detection, and pattern-based context analysis
89 lines (88 loc) • 2.55 kB
TypeScript
/**
* Universal context patterns for multi-language profanity detection
*/
export interface UniversalContextPattern {
type: "negation" | "possessive" | "compound" | "proper_noun" | "article" | "quotation" | "medical" | "anatomical";
pattern: RegExp;
weight: number;
languages: string[];
description: string;
examples: string[];
}
export interface ContextRule {
pattern: RegExp;
action: "reduce_score" | "increase_score" | "whitelist" | "flag";
weight: number;
priority: number;
}
/**
* Universal context patterns that work across multiple languages
*/
export declare const UNIVERSAL_CONTEXT_PATTERNS: UniversalContextPattern[];
/**
* Language-specific context patterns
*/
export declare const LANGUAGE_SPECIFIC_PATTERNS: Record<string, UniversalContextPattern[]>;
/**
* Context rule generator
*/
export declare class ContextPatternMatcher {
private patterns;
private languagePatterns;
constructor(languages?: string[]);
/**
* Generate context rules for a specific word
*/
generateRules(word: string, languages?: string[]): ContextRule[];
/**
* Get priority for pattern type
*/
private getPriority;
/**
* Escape regex special characters
*/
private escapeRegex;
/**
* Add custom pattern
*/
addPattern(pattern: UniversalContextPattern): void;
/**
* Add language-specific pattern
*/
addLanguagePattern(language: string, pattern: UniversalContextPattern): void;
/**
* Get all patterns for debugging
*/
getAllPatterns(): {
universal: UniversalContextPattern[];
languageSpecific: Map<string, UniversalContextPattern[]>;
};
}
/**
* Context analyzer for scoring matches
*/
export declare class ContextAnalyzer {
private patternMatcher;
private contextWindow;
constructor(languages?: string[]);
/**
* Analyze context around a potential profanity match
*/
analyzeContext(text: string, matchStart: number, matchEnd: number, word: string): {
score: number;
confidence: "high" | "medium" | "low";
appliedRules: Array<{
rule: ContextRule;
matched: boolean;
}>;
context: string;
};
/**
* Set context window size
*/
setContextWindow(size: number): void;
/**
* Add custom pattern to the analyzer
*/
addCustomPattern(pattern: UniversalContextPattern): void;
}