@kpachbiu/censure-js
Version:
Dirty russian/english words filter
120 lines (119 loc) • 3.85 kB
JavaScript
import { patternsRu, patternsEn, replacePatternsRu } from './patterns/index';
/**
* //////////////
* // CENSURE //
* ////////////
*
* Dirty words - filter.
*
* A JavaScript lib to filter out dirty, vulgar, obscene, profane words in russian or english texts.
*
* Key features:
* - Find profanity (in Russian and English texts) and hide it with *** symbols.
* - Find profanity (in Russian texts) and replace it with normative vocabulary.
*
* Some examples:
* isBad('Original text with abusive words'); // return: bool
* replace('Original text with abusive words'); // return: string (text without abusive words)
* fix('Original phrase with abusive words'); // return: string (fixed text)
*
* @author jzavorohina@yandex.ru
*
* List of replacements partly - by the book "Русский мат.Толковый словарь." Составитель Ахметова Т.В., Москва "КОЛОКОЛ-ПРЕСС", 2000
*
*/
class Censure {
REPLACEMENT = '***';
/**
* Searches if there any abusive words in the text
*
* @param {String} string - original text
* @return {Boolean} - is there any abusive words in our string
*/
isBad(string) {
const patterns = this.getPatterns(string);
for (const p of patterns) {
let regexp = this.prepare(p.replace(/^\^|\$$/g, ''));
if (regexp.test(string)) {
return true;
}
}
return false;
}
/**
* Replace abusive words from string
*
* @param {String} string - original text
* @return {String} - cleaned text
*/
replace(string) {
const words = string.split(" ");
for (let i = 0; i < words.length; i++) {
const w = words[i].replace(/[!?,.]/g, '');
if (w.length < 3)
continue;
const patterns = this.getPatterns(w);
for (const p of patterns) {
if (/\[а-я\]\+|\||\^/ui.test(p)) { // pattern тупиц[а-я]+ or ^тупиц[а-я]+
const regexp = this.prepare(p);
if (regexp.test(w)) {
words[i] = this.REPLACEMENT;
break;
}
}
else { // whole word
if (w.toLowerCase() === p) {
words[i] = this.REPLACEMENT;
break;
}
}
}
}
return words.join(' ');
}
/**
* Fixing abusive words inside string
*
* @param {String} string - original text
* @return {String} - fixed text
*/
fix(string) {
let result = string;
const patternKeys = Object.keys(replacePatternsRu).reverse();
for (const p of patternKeys) {
const pattern = this.prepare(p);
const replace = replacePatternsRu[p];
if (pattern.test(string)) {
result = string.replace(pattern, replace);
if (this.checkFirstChar(string)) {
result = this.upFirstChar(result);
}
}
}
return result;
}
prepare(pattern) {
return new RegExp(pattern, 'ui');
}
getPatterns(string) {
if (/[а-я]+/ui.test(string)) {
return patternsRu;
}
else if (/[a-z]+/.test(string)) {
return patternsEn;
}
else {
return [];
}
}
checkFirstChar(string) {
const first = string.substr(0, 1);
return (first.toLowerCase() !== first);
}
upFirstChar(string) {
const words = string.split(' ');
words[0] = words[0].slice(0, 1).toUpperCase() + words[0].slice(1);
return words.join(' ');
}
}
export default Censure;