obscenity
Version:
Robust, extensible profanity filter.
172 lines (171 loc) • 5.48 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.PhraseBuilder = exports.DataSet = void 0;
const BlacklistedTerm_1 = require("../matcher/BlacklistedTerm");
/**
* Holds phrases (groups of patterns and whitelisted terms), optionally
* associating metadata with them.
*
* @typeParam MetadataType - Metadata type for phrases. Note that the metadata
* type is implicitly nullable.
*/
class DataSet {
containers = [];
patternCount = 0;
patternIdToPhraseContainer = new Map(); // pattern ID => index of its container
/**
* Adds all the phrases from the dataset provided to this one.
*
* @example
* ```typescript
* const customDataset = new DataSet().addAll(englishDataset);
* ```
* @param other - Other dataset.
*/
addAll(other) {
for (const container of other.containers)
this.registerContainer(container);
return this;
}
/**
* Removes phrases that match the predicate given.
*
* @example
* ```typescript
* const customDataset = new DataSet<{ originalWord: string }>()
* .addAll(englishDataset)
* .removePhrasesIf((phrase) => phrase.metadata.originalWord === 'fuck');
* ```
* @param predicate - A predicate that determines whether or not a phrase should be removed.
* Return `true` to remove, `false` to keep.
*/
removePhrasesIf(predicate) {
// Clear the internal state, then gradually rebuild it by adding the
// containers that should be kept.
this.patternCount = 0;
this.patternIdToPhraseContainer.clear();
const containers = this.containers.splice(0);
for (const container of containers) {
const remove = predicate(container);
if (!remove)
this.registerContainer(container);
}
return this;
}
/**
* Adds a phrase to this dataset.
*
* @example
* ```typescript
* const data = new DataSet<{ originalWord: string }>()
* .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'fuck' })
* .addPattern(pattern`fuck`)
* .addPattern(pattern`f[?]ck`)
* .addWhitelistedTerm('Afck'))
* .build();
* ```
* @param fn - A function that takes a [[PhraseBuilder]], adds
* patterns/whitelisted terms/metadata to it, and returns it.
*/
addPhrase(fn) {
const container = fn(new PhraseBuilder()).build();
this.registerContainer(container);
return this;
}
/**
* Retrieves the phrase metadata associated with a pattern and returns a
* copy of the match payload with said metadata attached to it.
*
* @example
* ```typescript
* const matches = matcher.getAllMatches(input);
* const matchesWithPhraseMetadata = matches.map((match) => dataset.getPayloadWithPhraseMetadata(match));
* // Now we can access the 'phraseMetadata' property:
* const phraseMetadata = matchesWithPhraseMetadata[0].phraseMetadata;
* ```
* @param payload - Original match payload.
*/
getPayloadWithPhraseMetadata(payload) {
const offset = this.patternIdToPhraseContainer.get(payload.termId);
if (offset === undefined) {
throw new Error(`The pattern with ID ${payload.termId} does not exist in this dataset.`);
}
return {
...payload,
phraseMetadata: this.containers[offset].metadata,
};
}
/**
* Returns the dataset in a format suitable for usage with the [[RegExpMatcher]].
*
* @example
* ```typescript
* // With the RegExpMatcher:
* const matcher = new RegExpMatcher({
* ...dataset.build(),
* // additional options here
* });
* ```
*/
build() {
return {
blacklistedTerms: (0, BlacklistedTerm_1.assignIncrementingIds)(this.containers.flatMap((p) => p.patterns)),
whitelistedTerms: this.containers.flatMap((p) => p.whitelistedTerms),
};
}
registerContainer(container) {
const offset = this.containers.push(container) - 1;
for (let i = 0, phraseId = this.patternCount; i < container.patterns.length; i++, phraseId++) {
this.patternIdToPhraseContainer.set(phraseId, offset);
this.patternCount++;
}
}
}
exports.DataSet = DataSet;
/**
* Builder for phrases.
*/
class PhraseBuilder {
patterns = [];
whitelistedTerms = [];
metadata;
/**
* Associates a pattern with this phrase.
*
* @param pattern - Pattern to add.
*/
addPattern(pattern) {
this.patterns.push(pattern);
return this;
}
/**
* Associates a whitelisted pattern with this phrase.
*
* @param term - Whitelisted term to add.
*/
addWhitelistedTerm(term) {
this.whitelistedTerms.push(term);
return this;
}
/**
* Associates some metadata with this phrase.
*
* @param metadata - Metadata to use.
*/
setMetadata(metadata) {
this.metadata = metadata;
return this;
}
/**
* Builds the phrase, returning a [[PhraseContainer]] for use with the
* [[DataSet]].
*/
build() {
return {
patterns: this.patterns,
whitelistedTerms: this.whitelistedTerms,
metadata: this.metadata,
};
}
}
exports.PhraseBuilder = PhraseBuilder;