twitterjerkdetector
Version:
Given an array of Twitter user ids, gives back an array of the subset of those user ids that aren't jerks.
144 lines (121 loc) • 2.97 kB
JavaScript
var _ = require('lodash');
var jsonfile = require('jsonfile');
var callNextTick = require('call-next-tick');
var jerkProfileKeywords = jsonfile.readFileSync(
__dirname + '/data/jerk-profile-keywords.json'
);
function createFilter(opts) {
var twit;
var blacklist;
if (opts) {
twit = opts.twit;
blacklist = opts.blacklist;
}
if (!blacklist) {
blacklist = [];
}
function filterJerkAccounts(userIds, done) {
var nonBlacklisted = _.without.apply(
_.without, [userIds].concat(blacklist)
);
var reports = [];
if (userIds.length > 0) {
filterNextBatch();
}
function filterNextBatch() {
var nextBatch = userIds.slice(0, 100);
userIds.splice(0, 100);
var lookupOpts = {
user_id: nextBatch.join(','),
include_entities: false
};
twit.post('users/lookup', lookupOpts, runFilterOnUserObjects);
}
function runFilterOnUserObjects(error, users) {
if (error) {
done(error);
}
else {
reports = reports.concat(users.map(filterJerkAccount));
if (userIds.length > 0) {
callNextTick(filterNextBatch);
}
else {
sortAndFinish();
}
}
}
function sortAndFinish() {
var sortedReports = {
jerks: blacklist
};
if (reports) {
sortedReports = splitReportsIntoUserIdsByJerkiness(reports);
sortedReports.jerks = sortedReports.jerks.concat(blacklist);
}
done(null, sortedReports);
}
}
return filterJerkAccounts;
}
function filterJerkAccount(user) {
var report = {
userId: user.id_str,
isJerk: true
};
var profile = '';
if (user.description) {
profile = user.description.toLowerCase();
}
var username = '';
if (user.screen_name) {
username = user.screen_name.toLowerCase();
}
var name = '';
if (user.name) {
name = user.name.toLowerCase();
}
if (followerRatioLooksHuman(
user.friends_count, user.followers_count
) &&
jerkProfileKeywords.every(userIsFreeOfJerkKeyword)) {
report.isJerk = false;
}
else {
console.log('Filtering user as spam:', report.userId);
}
return report;
function userIsFreeOfJerkKeyword(keyword) {
return username.indexOf(keyword) === -1 &&
name.indexOf(keyword) === -1 &&
profile.indexOf(keyword) === -1;
}
}
function followerRatioLooksHuman(following, followedBy) {
if (following < 50) {
return true;
}
if (followedBy / following > 0.2) {
return true;
}
return false;
}
function splitReportsIntoUserIdsByJerkiness(reports) {
var jerks = [];
var coolguys = [];
reports.forEach(sortIdIntoBucket);
function sortIdIntoBucket(report) {
var bucket = coolguys;
if (report.isJerk) {
bucket = jerks;
}
bucket.push(report.userId);
}
return {
coolguys: coolguys,
jerks: jerks
};
}
module.exports = {
createFilter: createFilter
};