UNPKG

obscenity

Version:

Robust, extensible profanity filter.

343 lines (342 loc) 17.3 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.englishDataset = exports.englishRecommendedTransformers = exports.englishRecommendedWhitelistMatcherTransformers = exports.englishRecommendedBlacklistMatcherTransformers = void 0; const DataSet_1 = require("../dataset/DataSet"); const Pattern_1 = require("../pattern/Pattern"); const collapse_duplicates_1 = require("../transformer/collapse-duplicates"); const resolve_confusables_1 = require("../transformer/resolve-confusables"); const resolve_leetspeak_1 = require("../transformer/resolve-leetspeak"); const to_ascii_lowercase_1 = require("../transformer/to-ascii-lowercase"); /** * A set of transformers to be used when matching blacklisted patterns with the * [[englishDataset | english word dataset]]. */ exports.englishRecommendedBlacklistMatcherTransformers = [ (0, resolve_confusables_1.resolveConfusablesTransformer)(), (0, resolve_leetspeak_1.resolveLeetSpeakTransformer)(), (0, to_ascii_lowercase_1.toAsciiLowerCaseTransformer)(), // See #23 and #46. // skipNonAlphabeticTransformer(), (0, collapse_duplicates_1.collapseDuplicatesTransformer)({ defaultThreshold: 1, customThresholds: new Map([ ['b', 2], ['e', 2], ['o', 2], ['l', 2], ['s', 2], ['g', 2], // ni_gg_er ]), }), ]; /** * A set of transformers to be used when matching whitelisted terms with the * [[englishDataset | english word dataset]]. */ exports.englishRecommendedWhitelistMatcherTransformers = [ (0, to_ascii_lowercase_1.toAsciiLowerCaseTransformer)(), (0, collapse_duplicates_1.collapseDuplicatesTransformer)({ defaultThreshold: Number.POSITIVE_INFINITY, customThresholds: new Map([[' ', 1]]), // collapse spaces }), ]; /** * Recommended transformers to be used with the [[englishDataset | english word * dataset]] and the [[RegExpMatcher]]. */ exports.englishRecommendedTransformers = { blacklistMatcherTransformers: exports.englishRecommendedBlacklistMatcherTransformers, whitelistMatcherTransformers: exports.englishRecommendedWhitelistMatcherTransformers, }; /** * A dataset of profane English words. * * @example * ```typescript * const matcher = new RegExpMatcher({ * ...englishDataset.build(), * ...englishRecommendedTransformers, * }); * ``` * @example * ```typescript * // Extending the data-set by adding a new word and removing an existing one. * const myDataset = new DataSet() * .addAll(englishDataset) * .removePhrasesIf((phrase) => phrase.metadata.originalWord === 'vagina') * .addPhrase((phrase) => phrase.addPattern(pattern`|balls|`)); * ``` * @copyright * The words are taken from the [cuss](https://github.com/words/cuss) project, * with some modifications. * * ```text * (The MIT License) * * Copyright (c) 2016 Titus Wormer <tituswormer@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * 'Software'), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * ``` */ exports.englishDataset = new DataSet_1.DataSet() .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'abo' }).addPattern((0, Pattern_1.pattern) `|ab[b]o[s]|`)) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'abeed' }).addPattern((0, Pattern_1.pattern) `ab[b]eed`)) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'africoon' }).addPattern((0, Pattern_1.pattern) `africoon`)) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'anal' }) .addPattern((0, Pattern_1.pattern) `|anal`) .addWhitelistedTerm('analabos') .addWhitelistedTerm('analagous') .addWhitelistedTerm('analav') .addWhitelistedTerm('analy') .addWhitelistedTerm('analog') .addWhitelistedTerm('an al') .addPattern((0, Pattern_1.pattern) `danal`) .addPattern((0, Pattern_1.pattern) `eanal`) .addPattern((0, Pattern_1.pattern) `fanal`) .addWhitelistedTerm('fan al') .addPattern((0, Pattern_1.pattern) `ganal`) .addWhitelistedTerm('gan al') .addPattern((0, Pattern_1.pattern) `ianal`) .addWhitelistedTerm('ian al') .addPattern((0, Pattern_1.pattern) `janal`) .addWhitelistedTerm('trojan al') .addPattern((0, Pattern_1.pattern) `kanal`) .addPattern((0, Pattern_1.pattern) `lanal`) .addWhitelistedTerm('lan al') .addPattern((0, Pattern_1.pattern) `lanal`) .addWhitelistedTerm('lan al') .addPattern((0, Pattern_1.pattern) `oanal|`) .addPattern((0, Pattern_1.pattern) `panal`) .addWhitelistedTerm('pan al') .addPattern((0, Pattern_1.pattern) `qanal`) .addPattern((0, Pattern_1.pattern) `ranal`) .addPattern((0, Pattern_1.pattern) `sanal`) .addPattern((0, Pattern_1.pattern) `tanal`) .addWhitelistedTerm('tan al') .addPattern((0, Pattern_1.pattern) `uanal`) .addWhitelistedTerm('uan al') .addPattern((0, Pattern_1.pattern) `vanal`) .addWhitelistedTerm('van al') .addPattern((0, Pattern_1.pattern) `wanal`) .addPattern((0, Pattern_1.pattern) `xanal`) .addWhitelistedTerm('texan al') .addPattern((0, Pattern_1.pattern) `yanal`) .addPattern((0, Pattern_1.pattern) `zanal`)) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'anus' }) .addPattern((0, Pattern_1.pattern) `anus`) .addWhitelistedTerm('an us') .addWhitelistedTerm('tetanus') .addWhitelistedTerm('uranus') .addWhitelistedTerm('janus') .addWhitelistedTerm('manus')) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'arabush' }).addPattern((0, Pattern_1.pattern) `arab[b]ush`)) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'arse' }) .addPattern((0, Pattern_1.pattern) `|ars[s]e`) .addWhitelistedTerm('arsen')) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'ass' }) .addPattern((0, Pattern_1.pattern) `|ass`) .addWhitelistedTerm('assa') .addWhitelistedTerm('assem') .addWhitelistedTerm('assen') .addWhitelistedTerm('asser') .addWhitelistedTerm('asset') .addWhitelistedTerm('assev') .addWhitelistedTerm('assi') .addWhitelistedTerm('assoc') .addWhitelistedTerm('assoi') .addWhitelistedTerm('assu')) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'bastard' }).addPattern((0, Pattern_1.pattern) `bas[s]tard`)) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'bestiality' }).addPattern((0, Pattern_1.pattern) `be[e][a]s[s]tial`)) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'bitch' }) .addPattern((0, Pattern_1.pattern) `bitch`) .addPattern((0, Pattern_1.pattern) `bich|`)) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'blowjob' }).addPattern((0, Pattern_1.pattern) `b[b]l[l][o]wj[o]b`)) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'bollocks' }).addPattern((0, Pattern_1.pattern) `bol[l]ock`)) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'boob' }).addPattern((0, Pattern_1.pattern) `boob`)) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'boonga' }) .addPattern((0, Pattern_1.pattern) `boonga`) .addWhitelistedTerm('baboon ga')) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'buttplug' }).addPattern((0, Pattern_1.pattern) `buttplug`)) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'chingchong' }).addPattern((0, Pattern_1.pattern) `chingchong`)) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'chink' }) .addPattern((0, Pattern_1.pattern) `chink`) .addWhitelistedTerm('chin k')) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'cock' }) .addPattern((0, Pattern_1.pattern) `|cock|`) .addPattern((0, Pattern_1.pattern) `|cocks`) .addPattern((0, Pattern_1.pattern) `|cockp`) .addPattern((0, Pattern_1.pattern) `|cocke[e]|`) .addWhitelistedTerm('cockney')) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'cuck' }) .addPattern((0, Pattern_1.pattern) `cuck`) .addWhitelistedTerm('cuckoo')) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'cum' }) .addPattern((0, Pattern_1.pattern) `|cum`) .addWhitelistedTerm('cumu') .addWhitelistedTerm('cumb')) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'cunt' }) .addPattern((0, Pattern_1.pattern) `|cunt`) .addPattern((0, Pattern_1.pattern) `cunt|`)) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'deepthroat' }) .addPattern((0, Pattern_1.pattern) `deepthro[o]at`) .addPattern((0, Pattern_1.pattern) `deepthro[o]t`)) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'dick' }) .addPattern((0, Pattern_1.pattern) `|dck|`) .addPattern((0, Pattern_1.pattern) `dick`) .addWhitelistedTerm('benedick') .addWhitelistedTerm('dickens')) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'dildo' }).addPattern((0, Pattern_1.pattern) `dildo`)) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'doggystyle' }).addPattern((0, Pattern_1.pattern) `d[o]g[g]ys[s]t[y]l[l]`)) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'double penetration' }).addPattern((0, Pattern_1.pattern) `double penetra`)) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'dyke' }) .addPattern((0, Pattern_1.pattern) `dyke`) .addWhitelistedTerm('van dyke')) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'ejaculate' }) .addPattern((0, Pattern_1.pattern) `e[e]jacul`) .addPattern((0, Pattern_1.pattern) `e[e]jakul`) .addPattern((0, Pattern_1.pattern) `e[e]acul[l]ate`)) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'fag' }) .addPattern((0, Pattern_1.pattern) `|fag`) .addPattern((0, Pattern_1.pattern) `fggot`)) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'felch' }).addPattern((0, Pattern_1.pattern) `fe[e]l[l]ch`)) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'fellatio' }).addPattern((0, Pattern_1.pattern) `f[e][e]llat`)) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'finger bang' }).addPattern((0, Pattern_1.pattern) `fingerbang`)) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'fisting' }).addPattern((0, Pattern_1.pattern) `fistin`)) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'fuck' }) .addPattern((0, Pattern_1.pattern) `f[?]ck`) .addPattern((0, Pattern_1.pattern) `|fk`) .addPattern((0, Pattern_1.pattern) `|fu|`) .addPattern((0, Pattern_1.pattern) `|fuk`) .addWhitelistedTerm('fick') .addWhitelistedTerm('kung-fu') .addWhitelistedTerm('kung fu')) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'gangbang' }).addPattern((0, Pattern_1.pattern) `g[?]ngbang`)) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'handjob' }).addPattern((0, Pattern_1.pattern) `h[?]ndjob`)) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'hentai' }).addPattern((0, Pattern_1.pattern) `h[e][e]ntai`)) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'hooker' }).addPattern((0, Pattern_1.pattern) `hooker`)) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'incest' }).addPattern((0, Pattern_1.pattern) `incest`)) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'jerk off' }).addPattern((0, Pattern_1.pattern) `jerkoff`)) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'jizz' }).addPattern((0, Pattern_1.pattern) `jizz`)) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'kike' }).addPattern((0, Pattern_1.pattern) `kike`)) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'lubejob' }).addPattern((0, Pattern_1.pattern) `lubejob`)) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'masturbate' }) .addPattern((0, Pattern_1.pattern) `m[?]sturbate`) .addPattern((0, Pattern_1.pattern) `masterbate`)) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'negro' }) .addPattern((0, Pattern_1.pattern) `negro`) .addWhitelistedTerm('montenegro') .addWhitelistedTerm('negron') .addWhitelistedTerm('stoneground') .addWhitelistedTerm('winegrow')) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'nigger' }) .addPattern((0, Pattern_1.pattern) `n[i]gger`) .addPattern((0, Pattern_1.pattern) `n[i]gga`) .addPattern((0, Pattern_1.pattern) `|nig|`) .addPattern((0, Pattern_1.pattern) `|nigs|`) .addWhitelistedTerm('snigger')) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'orgasm' }) .addPattern((0, Pattern_1.pattern) `[or]gasm`) .addWhitelistedTerm('gasma')) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'orgy' }) .addPattern((0, Pattern_1.pattern) `orgy`) .addPattern((0, Pattern_1.pattern) `orgies`) .addWhitelistedTerm('porgy')) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'penis' }) .addPattern((0, Pattern_1.pattern) `pe[e]nis`) .addPattern((0, Pattern_1.pattern) `|pnis`) .addWhitelistedTerm('pen is')) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'piss' }).addPattern((0, Pattern_1.pattern) `|piss`)) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'porn' }) .addPattern((0, Pattern_1.pattern) `|prn|`) .addPattern((0, Pattern_1.pattern) `porn`) .addWhitelistedTerm('p orna')) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'prick' }).addPattern((0, Pattern_1.pattern) `|prick[s]|`)) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'pussy' }).addPattern((0, Pattern_1.pattern) `p[u]ssy`)) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'rape' }) .addPattern((0, Pattern_1.pattern) `|rape`) .addPattern((0, Pattern_1.pattern) `|rapis[s]t`)) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'retard' }).addPattern((0, Pattern_1.pattern) `retard`)) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'scat' }).addPattern((0, Pattern_1.pattern) `|s[s]cat|`)) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'semen' }).addPattern((0, Pattern_1.pattern) `|s[s]e[e]me[e]n`)) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'sex' }) .addPattern((0, Pattern_1.pattern) `|s[s]e[e]x|`) .addPattern((0, Pattern_1.pattern) `|s[s]e[e]xy|`)) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'shit' }) .addPattern((0, Pattern_1.pattern) `|shit`) .addPattern((0, Pattern_1.pattern) `shit|`) .addWhitelistedTerm('s hit') .addWhitelistedTerm('sh it') .addWhitelistedTerm('shi t') .addWhitelistedTerm('shitake')) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'slut' }).addPattern((0, Pattern_1.pattern) `s[s]lut`)) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'spastic' }).addPattern((0, Pattern_1.pattern) `|spastic`)) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'tit' }) .addPattern((0, Pattern_1.pattern) `|tit|`) .addPattern((0, Pattern_1.pattern) `|tits|`) .addPattern((0, Pattern_1.pattern) `|titt`) .addPattern((0, Pattern_1.pattern) `|tiddies`) .addPattern((0, Pattern_1.pattern) `|tities`)) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'tranny' }).addPattern((0, Pattern_1.pattern) `tranny`)) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'turd' }) .addPattern((0, Pattern_1.pattern) `|turd`) .addWhitelistedTerm('turducken')) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'twat' }) .addPattern((0, Pattern_1.pattern) `|twat`) .addWhitelistedTerm('twattle')) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'vagina' }) .addPattern((0, Pattern_1.pattern) `vagina`) .addPattern((0, Pattern_1.pattern) `|v[?]gina`)) .addPhrase((phrase) => phrase.setMetadata({ originalWord: 'wank' }).addPattern((0, Pattern_1.pattern) `|wank`)) .addPhrase((phrase) => phrase .setMetadata({ originalWord: 'whore' }) .addPattern((0, Pattern_1.pattern) `|wh[o]re|`) .addPattern((0, Pattern_1.pattern) `|who[o]res[s]|`) .addWhitelistedTerm("who're"));