UNPKG

obscenity

Version:

Robust, extensible profanity filter.

172 lines (171 loc) 5.48 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.PhraseBuilder = exports.DataSet = void 0; const BlacklistedTerm_1 = require("../matcher/BlacklistedTerm"); /** * Holds phrases (groups of patterns and whitelisted terms), optionally * associating metadata with them. * * @typeParam MetadataType - Metadata type for phrases. Note that the metadata * type is implicitly nullable. */ class DataSet { containers = []; patternCount = 0; patternIdToPhraseContainer = new Map(); // pattern ID => index of its container /** * Adds all the phrases from the dataset provided to this one. * * @example * ```typescript * const customDataset = new DataSet().addAll(englishDataset); * ``` * @param other - Other dataset. */ addAll(other) { for (const container of other.containers) this.registerContainer(container); return this; } /** * Removes phrases that match the predicate given. * * @example * ```typescript * const customDataset = new DataSet<{ originalWord: string }>() * .addAll(englishDataset) * .removePhrasesIf((phrase) => phrase.metadata.originalWord === 'fuck'); * ``` * @param predicate - A predicate that determines whether or not a phrase should be removed. * Return `true` to remove, `false` to keep. */ removePhrasesIf(predicate) { // Clear the internal state, then gradually rebuild it by adding the // containers that should be kept. this.patternCount = 0; this.patternIdToPhraseContainer.clear(); const containers = this.containers.splice(0); for (const container of containers) { const remove = predicate(container); if (!remove) this.registerContainer(container); } return this; } /** * Adds a phrase to this dataset. * * @example * ```typescript * const data = new DataSet<{ originalWord: string }>() * .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'fuck' }) * .addPattern(pattern`fuck`) * .addPattern(pattern`f[?]ck`) * .addWhitelistedTerm('Afck')) * .build(); * ``` * @param fn - A function that takes a [[PhraseBuilder]], adds * patterns/whitelisted terms/metadata to it, and returns it. */ addPhrase(fn) { const container = fn(new PhraseBuilder()).build(); this.registerContainer(container); return this; } /** * Retrieves the phrase metadata associated with a pattern and returns a * copy of the match payload with said metadata attached to it. * * @example * ```typescript * const matches = matcher.getAllMatches(input); * const matchesWithPhraseMetadata = matches.map((match) => dataset.getPayloadWithPhraseMetadata(match)); * // Now we can access the 'phraseMetadata' property: * const phraseMetadata = matchesWithPhraseMetadata[0].phraseMetadata; * ``` * @param payload - Original match payload. */ getPayloadWithPhraseMetadata(payload) { const offset = this.patternIdToPhraseContainer.get(payload.termId); if (offset === undefined) { throw new Error(`The pattern with ID ${payload.termId} does not exist in this dataset.`); } return { ...payload, phraseMetadata: this.containers[offset].metadata, }; } /** * Returns the dataset in a format suitable for usage with the [[RegExpMatcher]]. * * @example * ```typescript * // With the RegExpMatcher: * const matcher = new RegExpMatcher({ * ...dataset.build(), * // additional options here * }); * ``` */ build() { return { blacklistedTerms: (0, BlacklistedTerm_1.assignIncrementingIds)(this.containers.flatMap((p) => p.patterns)), whitelistedTerms: this.containers.flatMap((p) => p.whitelistedTerms), }; } registerContainer(container) { const offset = this.containers.push(container) - 1; for (let i = 0, phraseId = this.patternCount; i < container.patterns.length; i++, phraseId++) { this.patternIdToPhraseContainer.set(phraseId, offset); this.patternCount++; } } } exports.DataSet = DataSet; /** * Builder for phrases. */ class PhraseBuilder { patterns = []; whitelistedTerms = []; metadata; /** * Associates a pattern with this phrase. * * @param pattern - Pattern to add. */ addPattern(pattern) { this.patterns.push(pattern); return this; } /** * Associates a whitelisted pattern with this phrase. * * @param term - Whitelisted term to add. */ addWhitelistedTerm(term) { this.whitelistedTerms.push(term); return this; } /** * Associates some metadata with this phrase. * * @param metadata - Metadata to use. */ setMetadata(metadata) { this.metadata = metadata; return this; } /** * Builds the phrase, returning a [[PhraseContainer]] for use with the * [[DataSet]]. */ build() { return { patterns: this.patterns, whitelistedTerms: this.whitelistedTerms, metadata: this.metadata, }; } } exports.PhraseBuilder = PhraseBuilder;