UNPKG

gender

Version:

guess the gender of a name based on U.S. census data

63 lines (53 loc) 1.6 kB
exports.guess = function(fullName) { var firstName = getFirstNameFromFullName(fullName); firstName = firstName.toLowerCase(); var freqMale = frequencyInFile(firstName, 'census/male.txt'); var freqFemale = frequencyInFile(firstName, 'census/female.txt'); var pMale = freqMale / (freqMale + freqFemale); var gender, confidence; if (freqMale > freqFemale) { gender = 'male'; confidence = pMale; } else if (freqFemale > freqMale) { gender = 'female'; confidence = 1 - pMale; } else { gender = 'unknown'; confidence = null; } return {gender: gender, confidence: confidence}; } function frequencyInFile(firstName, file) { frequencies = parseFile(file); if (frequencies[firstName] == undefined) { return 0.0005; } else { return frequencies[firstName]; } } function parseFile(file) { if (file in parseFile.cache) { return parseFile.cache[file]; } frequencies = {}; var fs = require('fs'); var path = require('path'); var filePath = path.join(path.dirname(fs.realpathSync(__filename)), file); var array = fs.readFileSync(filePath).toString().split('\n'); for(i in array) { var parts = array[i].split(/\s+/); var name = parts[0]; var frequency = parts[1]; frequencies[name.toLowerCase()] = parseFloat(frequency); } parseFile.cache[file] = frequencies; return frequencies; } parseFile.cache = {}; function getFirstNameFromFullName(fullName) { var commaIndex = fullName.indexOf(','); if (commaIndex >= 0) { fullName = fullName.split(/,(.+)/)[1].trim(); } return fullName.split(/\s/)[0]; }