obscenity
Version:
Robust, extensible profanity filter.
112 lines (111 loc) • 4.05 kB
TypeScript
import type { TransformerContainer } from '../../transformer/Transformers';
import type { BlacklistedTerm } from '../BlacklistedTerm';
import type { Matcher } from '../Matcher';
import type { MatchPayload } from '../MatchPayload';
/**
* An implementation of the [[Matcher]] interface using regular expressions and
* string searching methods.
*/
export declare class RegExpMatcher implements Matcher {
private readonly blacklistedTerms;
private readonly whitelistedTerms;
private readonly blacklistMatcherTransformers;
private readonly whitelistMatcherTransformers;
/**
* Creates a new [[RegExpMatcher]] with the options given.
*
* @example
* ```typescript
* // Use the options provided by the English preset.
* const matcher = new RegExpMatcher({
* ...englishDataset.build(),
* ...englishRecommendedTransformers,
* });
* ```
* @example
* ```typescript
* // Simple matcher that only has blacklisted patterns.
* const matcher = new RegExpMatcher({
* blacklistedTerms: assignIncrementingIds([
* pattern`fuck`,
* pattern`f?uck`, // wildcards (?)
* pattern`bitch`,
* pattern`b[i]tch` // optionals ([i] matches either "i" or "")
* ]),
* });
*
* // Check whether some string matches any of the patterns.
* const doesMatch = matcher.hasMatch('fuck you bitch');
* ```
* @example
* ```typescript
* // A more advanced example, with transformers and whitelisted terms.
* const matcher = new RegExpMatcher({
* blacklistedTerms: [
* { id: 1, pattern: pattern`penis` },
* { id: 2, pattern: pattern`fuck` },
* ],
* whitelistedTerms: ['pen is'],
* blacklistMatcherTransformers: [
* resolveConfusablesTransformer(), // '🅰' => 'a'
* resolveLeetSpeakTransformer(), // '$' => 's'
* foldAsciiCharCaseTransformer(), // case insensitive matching
* skipNonAlphabeticTransformer(), // 'f.u...c.k' => 'fuck'
* collapseDuplicatesTransformer(), // 'aaaa' => 'a'
* ],
* });
*
* // Output all matches.
* console.log(matcher.getAllMatches('fu.....uuuuCK the pen is mightier than the sword!'));
* ```
* @param options - Options to use.
*/
constructor({ blacklistedTerms, whitelistedTerms, blacklistMatcherTransformers, whitelistMatcherTransformers, }: RegExpMatcherOptions);
getAllMatches(input: string, sorted?: boolean): MatchPayload[];
hasMatch(input: string): boolean;
private getWhitelistedIntervals;
private applyTransformers;
private compileTerms;
private validateWhitelistedTerms;
}
/**
* Options for the [[RegExpMatcher]].
*/
export interface RegExpMatcherOptions {
/**
* A set of transformers that should be applied to the input text before
* blacklisted patterns are matched. This does not affect the matching of
* whitelisted terms.
*
* Transformers will be applied in the order they appear.
*
* @default []
*/
blacklistMatcherTransformers?: TransformerContainer[];
/**
* A list of blacklisted terms.
*/
blacklistedTerms: BlacklistedTerm[];
/**
* A set of transformers that should be applied to the input text before
* whitelisted terms are matched. This does not affect the matching of
* blacklisted terms.
*
* Transformers will be applied in the order they appear.
*
* @default []
*/
whitelistMatcherTransformers?: TransformerContainer[];
/**
* A list of whitelisted terms. If a whitelisted term matches some part of
* the text, a match of a blacklisted pattern within that part of the text
* will not be emitted.
*
* For example, if we had a pattern `penis` and a whitelisted term `pen is`,
* only no matches would be reported for the input text `the pen is mightier
* than the sword.`
*
* @default []
*/
whitelistedTerms?: string[];
}