uncensor
Version:
Fill in censored words with their corresponding profanities.
166 lines (130 loc) • 4.08 kB
JavaScript
const levenshtein = require('fast-levenshtein');
const path = require('path');
const _ = require('lodash');
const fs = require('fs');
var index = {};
var srcFolder = path.join(__dirname,'..','src');
index = require( path.join(srcFolder,'profanities-index.json') );
module.exports = {
unmask : unmask,
unmask_phrase : unmask_phrase
};
function unmask_phrase(phrase){
if(typeof phrase !== 'string'){
throw new Error('Censored phrase entered must be a string!');
}
//split to words by whitespace...
var profanities = phrase.split(/[\s\.\?\/\\!,:;]/)
//filter out only the profanities
.filter(a=>/[^a-z]/i.test(a))
//unmask the profanities
.map(unmask);
//loop thru each
profanities.forEach(function(res){
//if we have found something
if(res.results.word && (profanity=res.results.word.profanity)){
phrase = phrase.replace(res.censored, profanity);
}
});
return phrase;
}
function unmask(censored, count){
count = Number(count) ? count : 10;
if(typeof censored !== 'string'){
throw new Error('Censored value entered must be a string!');
}
//lowercase...
censored = censored.toLowerCase();
//now the filler...
//first, we get the length, first & last letters
var s=0, l=0, len=0, pos=0, lev=0,
arr=[], arr2=[], words=[], steps=[], levs=[],
results = {};
arr = censored.split('');
len = arr.length;
s = arr[0];
l = arr[arr.length-1];
// console.log(arr,len,s,l);
// now attempt to find best set of words
if(index[len]){
words = index[len].ww;
steps.push('Length Check');
//check first letter
if(index[len][s]){
words = index[len][s].ww;
steps.push('Start Letter Match');
//now check the last letters
if(index[len][s][l]){
words = index[len][s][l].ww;
steps.push('Last Letter Match');
}
}
// console.log(words);
var filtered = false;
var unfiltered_words_arr = _.union([],words);
//remove words that dont match other given chars
words.forEach(function(o,j){
arr2 = o.profanity.split('');
//loop thru & test
for(var i in arr2){
i = Number(i);
//remove word if there are non matching letters
if(/[a-z0-9]/.test(arr[i]) && arr2[i]!==arr[i]){
delete words[j];
filtered = true;
//break as soon as we have found a word to disqualify/remove
break;
}
}
});
//remove nulls
words = _.compact(words);
// console.log(censored, words);
if(words.length === 0){
words = unfiltered_words_arr;
}
else if(filtered){ steps.push("Word Filtering"); }
}
//if just one word, return it
if(words.length<2){
results = {
word : words[0] || null,
other_words : []
};
}
else{
steps.push('Levenshtein Ordering [' + words.length + ' words]');
// console.log(words);
//
//run levenshtein where we have more than one word
words.forEach(function(word){
// console.log(word.profanity);
lev = levenshtein.get(word.profanity,censored);
pos = ((len-lev)/len) * word.popularity;
if(!levs[lev]){
levs[lev] = _.merge(word, { pos:pos });
}
else{
levs.push( _.merge(word, { pos:pos }) );
}
});
//sort levs...
levs = _.orderBy(_.compact(levs), ['pos'], ['desc'])
//reduce the results
.slice( 0, count )
//map return keys we wanna return
.map(a=>_.pick(a,['profanity','popularity']));
// console.log(levs)
results = {
word : levs.shift() || null,
other_words : levs
};
}
results.meta = {
count : levs.length+1,
steps: steps.join(' > '),
};
return { censored : censored,
results : results
};
}