UNPKG

obscenity

Version:

Robust, extensible profanity filter.

112 lines (111 loc) 4.05 kB
import type { TransformerContainer } from '../../transformer/Transformers'; import type { BlacklistedTerm } from '../BlacklistedTerm'; import type { Matcher } from '../Matcher'; import type { MatchPayload } from '../MatchPayload'; /** * An implementation of the [[Matcher]] interface using regular expressions and * string searching methods. */ export declare class RegExpMatcher implements Matcher { private readonly blacklistedTerms; private readonly whitelistedTerms; private readonly blacklistMatcherTransformers; private readonly whitelistMatcherTransformers; /** * Creates a new [[RegExpMatcher]] with the options given. * * @example * ```typescript * // Use the options provided by the English preset. * const matcher = new RegExpMatcher({ * ...englishDataset.build(), * ...englishRecommendedTransformers, * }); * ``` * @example * ```typescript * // Simple matcher that only has blacklisted patterns. * const matcher = new RegExpMatcher({ * blacklistedTerms: assignIncrementingIds([ * pattern`fuck`, * pattern`f?uck`, // wildcards (?) * pattern`bitch`, * pattern`b[i]tch` // optionals ([i] matches either "i" or "") * ]), * }); * * // Check whether some string matches any of the patterns. * const doesMatch = matcher.hasMatch('fuck you bitch'); * ``` * @example * ```typescript * // A more advanced example, with transformers and whitelisted terms. * const matcher = new RegExpMatcher({ * blacklistedTerms: [ * { id: 1, pattern: pattern`penis` }, * { id: 2, pattern: pattern`fuck` }, * ], * whitelistedTerms: ['pen is'], * blacklistMatcherTransformers: [ * resolveConfusablesTransformer(), // '🅰' => 'a' * resolveLeetSpeakTransformer(), // '$' => 's' * foldAsciiCharCaseTransformer(), // case insensitive matching * skipNonAlphabeticTransformer(), // 'f.u...c.k' => 'fuck' * collapseDuplicatesTransformer(), // 'aaaa' => 'a' * ], * }); * * // Output all matches. * console.log(matcher.getAllMatches('fu.....uuuuCK the pen is mightier than the sword!')); * ``` * @param options - Options to use. */ constructor({ blacklistedTerms, whitelistedTerms, blacklistMatcherTransformers, whitelistMatcherTransformers, }: RegExpMatcherOptions); getAllMatches(input: string, sorted?: boolean): MatchPayload[]; hasMatch(input: string): boolean; private getWhitelistedIntervals; private applyTransformers; private compileTerms; private validateWhitelistedTerms; } /** * Options for the [[RegExpMatcher]]. */ export interface RegExpMatcherOptions { /** * A set of transformers that should be applied to the input text before * blacklisted patterns are matched. This does not affect the matching of * whitelisted terms. * * Transformers will be applied in the order they appear. * * @default [] */ blacklistMatcherTransformers?: TransformerContainer[]; /** * A list of blacklisted terms. */ blacklistedTerms: BlacklistedTerm[]; /** * A set of transformers that should be applied to the input text before * whitelisted terms are matched. This does not affect the matching of * blacklisted terms. * * Transformers will be applied in the order they appear. * * @default [] */ whitelistMatcherTransformers?: TransformerContainer[]; /** * A list of whitelisted terms. If a whitelisted term matches some part of * the text, a match of a blacklisted pattern within that part of the text * will not be emitted. * * For example, if we had a pattern `penis` and a whitelisted term `pen is`, * only no matches would be reported for the input text `the pen is mightier * than the sword.` * * @default [] */ whitelistedTerms?: string[]; }