UNPKG

azrael-antismurf

Version:

Lightweight smurf/dupe-detection scoring for signups: disposable domains, fuzzy matches, entropy, IP/device reuse. Includes a daily disposable-domain refresher, CLI, and multi-source merge.

340 lines (288 loc) 10.8 kB
// Lightweight, dependency-free Node module to score signups for smurf/dupe risk // Adds daily disposable-domain refresher via HTTPS and multi-source merge. const https = require('node:https'); const { URL } = require('node:url'); const defaultWeights = { disposableDomain: 30, emailVerySimilar: 25, emailSimilar: 10, usernameVerySimilar: 15, entropyVeryHigh: 15, entropyVeryLow: 5, vowelWeirdness: 5, reuseMany: 30, reuseSome: 15, }; const defaultThresholds = { allowBelow: 20, challengeBelow: 50, reviewBelow: 80, }; const starterDisposableDomains = new Set([ 'mailinator.com', 'guerrillamail.com', '10minutemail.com', 'trashmail.com', 'dispostable.com', 'yopmail.com', 'tempmailo.com', 'mintemail.com' ]); let disposableDomains = new Set(starterDisposableDomains); function setDisposableDomains(domainsIterable) { disposableDomains = new Set(domainsIterable || []); } function addDisposableDomains(domainsIterable) { for (const d of domainsIterable || []) disposableDomains.add(String(d).toLowerCase()); } function getDisposableDomains() { return Array.from(disposableDomains); } function normalizeEmail(email) { if (!email) return ''; email = String(email).trim().toLowerCase(); const parts = email.split('@'); if (parts.length !== 2) return email; let [local, domain] = parts; local = local.replace(/\+.*$/, ''); if (domain === 'gmail.com' || domain === 'googlemail.com') { local = local.replace(/\./g, ''); } return `${local}@${domain}`; } function normalizeUsername(name) { if (!name) return ''; return String(name) .normalize('NFKD') .replace(/[̀-ͯ]/g, '') .toLowerCase() .replace(/[^a-z0-9]/g, ''); } function levenshtein(a = '', b = '') { const m = a.length, n = b.length; if (m === 0) return n; if (n === 0) return m; let v0 = new Array(n + 1); let v1 = new Array(n + 1); for (let i = 0; i <= n; i++) v0[i] = i; for (let i = 0; i < m; i++) { v1[0] = i + 1; const ai = a.charCodeAt(i); for (let j = 0; j < n; j++) { const cost = ai === b.charCodeAt(j) ? 0 : 1; const del = v0[j + 1] + 1; const ins = v1[j] + 1; const sub = v0[j] + cost; v1[j + 1] = del < ins ? (del < sub ? del : sub) : (ins < sub ? ins : sub); } const tmp = v0; v0 = v1; v1 = tmp; } return v0[n]; } function normalizedDistance(a, b) { a = a || ''; b = b || ''; if (!a && !b) return 0; const d = levenshtein(a, b); return d / Math.max(a.length, b.length); } function shannonEntropy(str) { if (!str) return 0; const freq = new Map(); for (const c of str) freq.set(c, (freq.get(c) || 0) + 1); const len = str.length; let ent = 0; for (const [, count] of freq) { const p = count / len; ent -= p * Math.log2(p); } return ent; } function vowelRatio(str) { if (!str) return 0; const letters = (str.match(/[a-z]/g) || []).length; if (!letters) return 0; const vowels = (str.match(/[aeiou]/g) || []).length; return vowels / letters; } function isDisposableDomain(domain) { return disposableDomains.has(String(domain || '').toLowerCase()); } function getEmailParts(email) { const e = normalizeEmail(email); const [local, domain] = e.split('@'); return { local: local || '', domain: domain || '' }; } async function hasMXRecord(domain) { try { const dns = await import('node:dns').then(m => m.promises || m); const mx = await dns.resolveMx(domain); return Array.isArray(mx) && mx.length > 0; } catch { return false; } } // --------- HTTP fetch with basic redirect support --------- function httpsGet(url, redirectCount = 0) { return new Promise((resolve, reject) => { const u = new URL(url); const req = https.get({ hostname: u.hostname, path: u.pathname + (u.search || ''), protocol: u.protocol, port: u.port || (u.protocol === 'https:' ? 443 : 80), headers: { 'user-agent': 'azrael/0.3' } }, res => { const code = res.statusCode || 0; if (code >= 300 && code < 400 && res.headers.location && redirectCount < 3) { res.resume(); resolve(httpsGet(new URL(res.headers.location, u).toString(), redirectCount + 1)); return; } if (code >= 400) { reject(new Error(`HTTP ${code}`)); res.resume(); return; } const chunks = []; res.on('data', d => chunks.push(d)); res.on('end', () => resolve(Buffer.concat(chunks))); }); req.on('error', reject); }); } function parseListDefault(bufOrStr) { const text = Buffer.isBuffer(bufOrStr) ? bufOrStr.toString('utf8') : String(bufOrStr || ''); let items = []; try { const json = JSON.parse(text); if (Array.isArray(json)) items = json.map(x => String(x).toLowerCase()); } catch { items = text .split(/?/) .map(l => l.trim()) .filter(l => l && !l.startsWith('#')) .map(l => l.toLowerCase()); } return items; } function mergeDisposableDomainLists(lists) { const set = new Set(); for (const list of lists) { for (const d of list || []) { const s = String(d).toLowerCase(); if (s) set.add(s); } } return Array.from(set).sort(); } async function refreshDisposableDomainsFromUrl(url, { logger = null, parse = parseListDefault } = {}) { const buf = await httpsGet(url); const list = parse(buf); if (!Array.isArray(list) || list.length === 0) throw new Error('Empty disposable list'); setDisposableDomains(list); if (logger?.info) logger.info(`azrael: refreshed ${list.length} disposable domains (single source)`); return list.length; } async function refreshDisposableDomainsFromUrls(urls, { logger = null, parse = parseListDefault } = {}) { if (!Array.isArray(urls) || urls.length === 0) throw new Error('No URLs provided'); const results = await Promise.allSettled(urls.map(u => httpsGet(u))); const lists = []; results.forEach((r, i) => { if (r.status === 'fulfilled') { try { lists.push(parse(r.value)); } catch (e) { if (logger?.warn) logger.warn(`azrael: parse failed for ${urls[i]}: ${e.message || e}`); } } else { if (logger?.warn) logger.warn(`azrael: fetch failed for ${urls[i]}: ${r.reason?.message || r.reason}`); } }); const merged = mergeDisposableDomainLists(lists); if (merged.length === 0) throw new Error('Merged disposable list is empty'); setDisposableDomains(merged); if (logger?.info) logger.info(`azrael: refreshed ${merged.length} disposable domains from ${lists.length}/${urls.length} sources`); return merged.length; } // --------------------------- // Scoring & decision // --------------------------- function scoreAccount({ email, username, ip, fingerprint, existingAccounts = [] }, weights = defaultWeights) { let score = 0; const normEmail = normalizeEmail(email); const normUser = normalizeUsername(username); const { local, domain } = getEmailParts(normEmail); if (isDisposableDomain(domain)) score += weights.disposableDomain; for (const acc of existingAccounts) { const accEmail = normalizeEmail(acc.email); const accUser = normalizeUsername(acc.username); const accLocal = getEmailParts(accEmail).local; const emailSim = normalizedDistance(local, accLocal); if (emailSim < 0.15) score += weights.emailVerySimilar; else if (emailSim < 0.30) score += weights.emailSimilar; const nameSim = normalizedDistance(normUser, accUser); if (nameSim < 0.15) score += weights.usernameVerySimilar; } const basis = local || normUser; const ent = shannonEntropy(basis); if (ent > 4.0) score += weights.entropyVeryHigh; if (ent < 1.5 && basis.length >= 4) score += weights.entropyVeryLow; const ratio = vowelRatio(normUser); if (ratio > 0.5 || ratio < 0.10) score += weights.vowelWeirdness; if (ip || fingerprint) { const reuseCount = existingAccounts.filter(a => (ip && a.ip === ip) || (fingerprint && a.fingerprint === fingerprint)).length; if (reuseCount >= 3) score += weights.reuseMany; else if (reuseCount === 2) score += weights.reuseSome; } return Math.min(100, Math.max(0, Math.round(score))); } function decideAction(score, thresholds = defaultThresholds) { if (score < thresholds.allowBelow) return 'allow'; if (score < thresholds.challengeBelow) return 'challenge'; if (score < thresholds.reviewBelow) return 'review'; return 'block'; } // --------------------------- // Refresher (multi-source) // --------------------------- let _refreshTimer = null; function startDisposableRefresher({ urls, url, intervalMs = 86_400_000, initial = true, logger = null, parse = parseListDefault } = {}) { const sources = Array.isArray(urls) && urls.length ? urls : (url ? [url] : null); if (!sources) throw new Error('startDisposableRefresher: urls or url is required'); stopDisposableRefresher(); const tick = async () => { try { await refreshDisposableDomainsFromUrls(sources, { logger, parse }); } catch (e) { if (logger?.warn) logger.warn(`azrael: refresh failed: ${e.message || e}`); } }; if (initial) tick(); const jitter = Math.floor(Math.random() * 5 * 60 * 1000); _refreshTimer = setInterval(tick, intervalMs + jitter); if (_refreshTimer.unref) _refreshTimer.unref(); if (logger?.info) logger.info(`azrael: refresher started (${Math.round(intervalMs / 3600000)}h interval) with ${sources.length} source(s)`); } function stopDisposableRefresher() { if (_refreshTimer) clearInterval(_refreshTimer); _refreshTimer = null; } module.exports = { // main scoreAccount, decideAction, // helpers normalizeEmail, normalizeUsername, levenshtein, normalizedDistance, shannonEntropy, vowelRatio, isDisposableDomain, getEmailParts, hasMXRecord, // list management setDisposableDomains, addDisposableDomains, getDisposableDomains, mergeDisposableDomainLists, // refresh startDisposableRefresher, stopDisposableRefresher, refreshDisposableDomainsFromUrl, refreshDisposableDomainsFromUrls, // defaults defaultWeights, defaultThresholds, };