azrael-antismurf
Version:
Lightweight smurf/dupe-detection scoring for signups: disposable domains, fuzzy matches, entropy, IP/device reuse. Includes a daily disposable-domain refresher, CLI, and multi-source merge.
340 lines (288 loc) • 10.8 kB
JavaScript
// Lightweight, dependency-free Node module to score signups for smurf/dupe risk
// Adds daily disposable-domain refresher via HTTPS and multi-source merge.
const https = require('node:https');
const { URL } = require('node:url');
const defaultWeights = {
disposableDomain: 30,
emailVerySimilar: 25,
emailSimilar: 10,
usernameVerySimilar: 15,
entropyVeryHigh: 15,
entropyVeryLow: 5,
vowelWeirdness: 5,
reuseMany: 30,
reuseSome: 15,
};
const defaultThresholds = {
allowBelow: 20,
challengeBelow: 50,
reviewBelow: 80,
};
const starterDisposableDomains = new Set([
'mailinator.com', 'guerrillamail.com', '10minutemail.com', 'trashmail.com',
'dispostable.com', 'yopmail.com', 'tempmailo.com', 'mintemail.com'
]);
let disposableDomains = new Set(starterDisposableDomains);
function setDisposableDomains(domainsIterable) {
disposableDomains = new Set(domainsIterable || []);
}
function addDisposableDomains(domainsIterable) {
for (const d of domainsIterable || []) disposableDomains.add(String(d).toLowerCase());
}
function getDisposableDomains() {
return Array.from(disposableDomains);
}
function normalizeEmail(email) {
if (!email) return '';
email = String(email).trim().toLowerCase();
const parts = email.split('@');
if (parts.length !== 2) return email;
let [local, domain] = parts;
local = local.replace(/\+.*$/, '');
if (domain === 'gmail.com' || domain === 'googlemail.com') {
local = local.replace(/\./g, '');
}
return `${local}@${domain}`;
}
function normalizeUsername(name) {
if (!name) return '';
return String(name)
.normalize('NFKD')
.replace(/[̀-ͯ]/g, '')
.toLowerCase()
.replace(/[^a-z0-9]/g, '');
}
function levenshtein(a = '', b = '') {
const m = a.length, n = b.length;
if (m === 0) return n;
if (n === 0) return m;
let v0 = new Array(n + 1);
let v1 = new Array(n + 1);
for (let i = 0; i <= n; i++) v0[i] = i;
for (let i = 0; i < m; i++) {
v1[0] = i + 1;
const ai = a.charCodeAt(i);
for (let j = 0; j < n; j++) {
const cost = ai === b.charCodeAt(j) ? 0 : 1;
const del = v0[j + 1] + 1;
const ins = v1[j] + 1;
const sub = v0[j] + cost;
v1[j + 1] = del < ins ? (del < sub ? del : sub) : (ins < sub ? ins : sub);
}
const tmp = v0; v0 = v1; v1 = tmp;
}
return v0[n];
}
function normalizedDistance(a, b) {
a = a || ''; b = b || '';
if (!a && !b) return 0;
const d = levenshtein(a, b);
return d / Math.max(a.length, b.length);
}
function shannonEntropy(str) {
if (!str) return 0;
const freq = new Map();
for (const c of str) freq.set(c, (freq.get(c) || 0) + 1);
const len = str.length;
let ent = 0;
for (const [, count] of freq) {
const p = count / len;
ent -= p * Math.log2(p);
}
return ent;
}
function vowelRatio(str) {
if (!str) return 0;
const letters = (str.match(/[a-z]/g) || []).length;
if (!letters) return 0;
const vowels = (str.match(/[aeiou]/g) || []).length;
return vowels / letters;
}
function isDisposableDomain(domain) {
return disposableDomains.has(String(domain || '').toLowerCase());
}
function getEmailParts(email) {
const e = normalizeEmail(email);
const [local, domain] = e.split('@');
return { local: local || '', domain: domain || '' };
}
async function hasMXRecord(domain) {
try {
const dns = await import('node:dns').then(m => m.promises || m);
const mx = await dns.resolveMx(domain);
return Array.isArray(mx) && mx.length > 0;
} catch {
return false;
}
}
// --------- HTTP fetch with basic redirect support ---------
function httpsGet(url, redirectCount = 0) {
return new Promise((resolve, reject) => {
const u = new URL(url);
const req = https.get({
hostname: u.hostname,
path: u.pathname + (u.search || ''),
protocol: u.protocol,
port: u.port || (u.protocol === 'https:' ? 443 : 80),
headers: { 'user-agent': 'azrael/0.3' }
}, res => {
const code = res.statusCode || 0;
if (code >= 300 && code < 400 && res.headers.location && redirectCount < 3) {
res.resume();
resolve(httpsGet(new URL(res.headers.location, u).toString(), redirectCount + 1));
return;
}
if (code >= 400) {
reject(new Error(`HTTP ${code}`));
res.resume();
return;
}
const chunks = [];
res.on('data', d => chunks.push(d));
res.on('end', () => resolve(Buffer.concat(chunks)));
});
req.on('error', reject);
});
}
function parseListDefault(bufOrStr) {
const text = Buffer.isBuffer(bufOrStr) ? bufOrStr.toString('utf8') : String(bufOrStr || '');
let items = [];
try {
const json = JSON.parse(text);
if (Array.isArray(json)) items = json.map(x => String(x).toLowerCase());
} catch {
items = text
.split(/?/)
.map(l => l.trim())
.filter(l => l && !l.startsWith('#'))
.map(l => l.toLowerCase());
}
return items;
}
function mergeDisposableDomainLists(lists) {
const set = new Set();
for (const list of lists) {
for (const d of list || []) {
const s = String(d).toLowerCase();
if (s) set.add(s);
}
}
return Array.from(set).sort();
}
async function refreshDisposableDomainsFromUrl(url, { logger = null, parse = parseListDefault } = {}) {
const buf = await httpsGet(url);
const list = parse(buf);
if (!Array.isArray(list) || list.length === 0) throw new Error('Empty disposable list');
setDisposableDomains(list);
if (logger?.info) logger.info(`azrael: refreshed ${list.length} disposable domains (single source)`);
return list.length;
}
async function refreshDisposableDomainsFromUrls(urls, { logger = null, parse = parseListDefault } = {}) {
if (!Array.isArray(urls) || urls.length === 0) throw new Error('No URLs provided');
const results = await Promise.allSettled(urls.map(u => httpsGet(u)));
const lists = [];
results.forEach((r, i) => {
if (r.status === 'fulfilled') {
try { lists.push(parse(r.value)); }
catch (e) { if (logger?.warn) logger.warn(`azrael: parse failed for ${urls[i]}: ${e.message || e}`); }
} else {
if (logger?.warn) logger.warn(`azrael: fetch failed for ${urls[i]}: ${r.reason?.message || r.reason}`);
}
});
const merged = mergeDisposableDomainLists(lists);
if (merged.length === 0) throw new Error('Merged disposable list is empty');
setDisposableDomains(merged);
if (logger?.info) logger.info(`azrael: refreshed ${merged.length} disposable domains from ${lists.length}/${urls.length} sources`);
return merged.length;
}
// ---------------------------
// Scoring & decision
// ---------------------------
function scoreAccount({ email, username, ip, fingerprint, existingAccounts = [] }, weights = defaultWeights) {
let score = 0;
const normEmail = normalizeEmail(email);
const normUser = normalizeUsername(username);
const { local, domain } = getEmailParts(normEmail);
if (isDisposableDomain(domain)) score += weights.disposableDomain;
for (const acc of existingAccounts) {
const accEmail = normalizeEmail(acc.email);
const accUser = normalizeUsername(acc.username);
const accLocal = getEmailParts(accEmail).local;
const emailSim = normalizedDistance(local, accLocal);
if (emailSim < 0.15) score += weights.emailVerySimilar;
else if (emailSim < 0.30) score += weights.emailSimilar;
const nameSim = normalizedDistance(normUser, accUser);
if (nameSim < 0.15) score += weights.usernameVerySimilar;
}
const basis = local || normUser;
const ent = shannonEntropy(basis);
if (ent > 4.0) score += weights.entropyVeryHigh;
if (ent < 1.5 && basis.length >= 4) score += weights.entropyVeryLow;
const ratio = vowelRatio(normUser);
if (ratio > 0.5 || ratio < 0.10) score += weights.vowelWeirdness;
if (ip || fingerprint) {
const reuseCount = existingAccounts.filter(a => (ip && a.ip === ip) || (fingerprint && a.fingerprint === fingerprint)).length;
if (reuseCount >= 3) score += weights.reuseMany;
else if (reuseCount === 2) score += weights.reuseSome;
}
return Math.min(100, Math.max(0, Math.round(score)));
}
function decideAction(score, thresholds = defaultThresholds) {
if (score < thresholds.allowBelow) return 'allow';
if (score < thresholds.challengeBelow) return 'challenge';
if (score < thresholds.reviewBelow) return 'review';
return 'block';
}
// ---------------------------
// Refresher (multi-source)
// ---------------------------
let _refreshTimer = null;
function startDisposableRefresher({ urls, url, intervalMs = 86_400_000, initial = true, logger = null, parse = parseListDefault } = {}) {
const sources = Array.isArray(urls) && urls.length ? urls : (url ? [url] : null);
if (!sources) throw new Error('startDisposableRefresher: urls or url is required');
stopDisposableRefresher();
const tick = async () => {
try {
await refreshDisposableDomainsFromUrls(sources, { logger, parse });
} catch (e) {
if (logger?.warn) logger.warn(`azrael: refresh failed: ${e.message || e}`);
}
};
if (initial) tick();
const jitter = Math.floor(Math.random() * 5 * 60 * 1000);
_refreshTimer = setInterval(tick, intervalMs + jitter);
if (_refreshTimer.unref) _refreshTimer.unref();
if (logger?.info) logger.info(`azrael: refresher started (${Math.round(intervalMs / 3600000)}h interval) with ${sources.length} source(s)`);
}
function stopDisposableRefresher() {
if (_refreshTimer) clearInterval(_refreshTimer);
_refreshTimer = null;
}
module.exports = {
// main
scoreAccount,
decideAction,
// helpers
normalizeEmail,
normalizeUsername,
levenshtein,
normalizedDistance,
shannonEntropy,
vowelRatio,
isDisposableDomain,
getEmailParts,
hasMXRecord,
// list management
setDisposableDomains,
addDisposableDomains,
getDisposableDomains,
mergeDisposableDomainLists,
// refresh
startDisposableRefresher,
stopDisposableRefresher,
refreshDisposableDomainsFromUrl,
refreshDisposableDomainsFromUrls,
// defaults
defaultWeights,
defaultThresholds,
};