compromise
Version:
natural language processing in the browser
223 lines (219 loc) • 5.01 kB
JavaScript
;
const fns = require('../fns');
//names with a distinctive signal that they identify as a female, internationally
//compressed by frequent suffixes
//comprssed with
//https://github.com/nlp-compromise/thumb/blob/master/src/compress/compress.js
const compressed = {
stine: 'chri,erne,ju,kri',
rlene: 'a,cha,da,ma',
eline: 'ad,ang,jacqu,mad',
nette: 'an,antoi,jean,ly',
elia: 'ad,am,ang,cec,c,corn,d,of,sh',
anne: ',di,je,jo,le,mari,rox,sus,suz',
elle: 'dani,est,gabri,isab,jan,mich,rach,roch',
ella: 'd,est,isab,lu,marc,st',
rina: 'kata,kat,ma,sab,t',
icia: 'al,fel,let,patr,tr',
ette: 'bernad,b,claud,paul,yv',
leen: 'ai,cath,col,ei,kath',
ndra: 'alexa,cassa,ke,sa,so',
elma: ',s,th,v',
anda: 'am,mir,w,yol',
etta: ',henri,lor,ros',
isha: 'al,ke,lat,tr',
tina: 'cris,mar,,valen',
inda: 'bel,l,luc,mel',
arla: 'c,d,k,m',
lena: 'e,je,,magda',
ine: 'carol,cather,cel,ela,franc,gerald,jan,jasm,jeann,joseph,kathar,kather,lorra,max,nad,paul',
ice: 'al,beatr,bern,cand,clar,eun,jan,patr',
ela: 'andj,ang,carm,gabri,graci,l,manu,pam',
ara: 'barb,c,cl,k,l,tam,t,z',
ora: 'c,d,fl,isid,len,l,n,teod',
ina: 'am,catal,d,georg,g,josef,n',
ita: 'an,arp,bon,juan,kav,margar,r',
nna: 'dea,do,gle,je,joha,lado,sha',
lyn: 'caro,eve,gwendo,jac,jacque,joce,mari',
ica: 'angel,er,jess,mil,mon,patr,veron',
ene: 'adri,hel,imog,ir,jol,lor',
ana: 'adri,d,jov,ju,l,sus',
nda: 'bre,gle,ly,rho,ro',
nia: 'anto,euge,so,to,virgi',
ley: 'ash,kel,kimber,les,shir',
sha: 'lata,mar,nata,ta',
ian: 'jill,lill,mar,viv',
isa: 'al,el,l,lu',
ann: ',jo,le,mary',
ise: 'den,el,elo,lou',
ida: 'a,,rach,sa',
nya: 'lato,so,ta,to',
ssa: 'aly,mari,meli,vane',
tha: 'ber,mar,saman,tabi',
ia: 'cecil,claud,cynth,dam,georg,glor,jul,luc,lyd,marc,mar,nad,oliv,silv,sof,soph,sylv,victor',
la: 'eu,kay,lei,leo,li,lo,pau,priscil,shei,ursu,vio,wil',
na: 'de,ed,leo,lor,mo,myr,ramo,re,shau,shaw,shee,ver',
le: 'ade,camil,caro,ceci,ga,gay,lucil,mab,myrt,nicho,nico',
en: 'carm,dore,ell,gretch,gw,hel,kar,kirst,krist,laur,maure',
ra: 'aud,barb,deb,elvi,javie,lau,may,my,pet,ve',
ma: 'al,em,er,fati,ir,kari,nai,nor,wil',
el: 'eth,isab,laur,mab,marib,muri,racha,rach,raqu',
ta: 'alber,al,chris,ek,kris,mandakran,mar,rober',
ey: 'audr,brittn,courtn,linds,stac,trac,whitn',
ri: 'je,kanyakuma,ka,ker,sha,she,ter',
ne: 'corin,daph,ja,laver,lyn,simo,yvon',
th: 'be,edi,elisabe,elizabe,judi,meredi,ru',
ah: 'aish,beul,debor,hann,le,rebek,sar',
is: 'delor,dor,jan,lo,mav,phyll',
da: 'a,fre,frie,hil,matil,priyamva',
ce: 'canda,constan,floren,gra,joy',
es: 'agn,delor,dolor,franc,merced',
er: 'amb,est,esth,heath,jennif',
et: 'bridg,harri,jan,margar,margr',
ca: 'bian,blan,francis,rebec',
ja: 'an,khadi,mari,son',
sa: 'el,ro,tere,there',
ee: 'aim,d,desir,ren',
va: 'a,el,e,i',
in: 'caitl,er,kar,krist',
on: 'alis,man,shann,shar',
an: 'meag,meg,megh,sus'
};
let list = [
'abigail',
'aicha',
'alya',
'andrea',
'beatriz',
'bettye',
'brandi',
'brooke',
'carol',
'celeste',
'chelsea',
'cheryl',
'chloe',
'claire',
'cleo',
'constanza',
'consuelo',
'crystal',
'dominique',
'dorothea',
'eleanor',
'eliza',
'erika',
'fay',
'faye',
'fern',
'gail',
'genevieve',
'gertrude',
'gladys',
'heidi',
'ingrid',
'jade',
'jill',
'jo',
'joni',
'kate',
'katie',
'kathryn',
'kay',
'kim',
'krystal',
'latoya',
'laxmi',
'leigh',
'lindsay',
'lupe',
'lynn',
'mae',
'malika',
'margo',
'marguerite',
'marisol',
'maritza',
'maude',
'maya',
'mildred',
'miriam',
'monique',
'mrignayani',
'naomi',
'nell',
'nikki',
'olga',
'paige',
'pam',
'parvati',
'pearl',
'reba',
'robyn',
'rosalind',
'sheryl',
'sue',
'sybil',
'tami',
'tamika',
'therese',
'toni',
'gisele'
];
list = fns.uncompress_suffixes(list, compressed);
for (let i = 0; i < list.length; i++) {
let str = list[i];
if (str.match(/[^ea]y$/)) {
list.push(str.replace(/y$/, 'i'));
}
if (str.match(/ll/)) {
list.push(str.replace(/ll/, 'l'));
}
if (str.match(/nn/)) {
list.push(str.replace(/nn/, 'n'));
}
if (str.match(/ah/)) {
list.push(str.replace(/ah/, 'a'));
}
if (str.match(/t$/)) {
list.push(str.replace(/t$/, 'tte'));
}
if (str.match(/ey$/)) {
list.push(str.replace(/ey$/, 'y'));
}
if (str.match(/ie$/)) {
list.push(str.replace(/ie$/, 'y'));
}
if (str.match(/ne$/)) {
list.push(str.replace(/ne$/, 'na'));
}
if (str.match(/ss/)) {
list.push(str.replace(/ss/, 's'));
}
if (str.match(/rr/)) {
list.push(str.replace(/rr/, 'r'));
}
}
const no_change = [
'amy',
'becky',
'betty',
'beverly',
'cathy',
'dolly',
'dorothy',
'hilary',
'hillary',
'kimberly',
'rosemary',
'sally',
'shelly',
'trudy',
'tammy',
'wendy',
'ruby',
'susi'
];
list = list.concat(no_change);
module.exports = list;
// console.log(list.indexOf('kelley'));