UNPKG

@fanboynz/network-scanner

Version:

A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.

1,186 lines (1,073 loc) 84.2 kB
/** * Network tools module for whois and dig lookups - COMPLETE FIXED VERSION * Provides domain analysis capabilities with proper timeout handling, custom whois servers, and retry logic */ // execFile (no shell) for whois/dig invocations -- arguments are passed // directly to the executable as an argv array, so shell metacharacters in // config-supplied hostnames or server names CANNOT execute commands. The // prior `exec(string)` approach interpolated tainted values into a shell // string protected only by double-quoting, which doesn't stop $()/backticks. // execSync is retained ONLY for the version-probe helpers below, where // commands are constant string literals with no user-controlled inputs. const { execFile, execSync } = require('child_process'); const fs = require('fs'); const path = require('path'); const { formatLogMessage, messageColors } = require('./colorize'); const ANSI_REGEX = /\x1b\[[0-9;]*m/g; // Cycling index for whois server rotation let whoisServerCycleIndex = 0; // Global dig result cache — shared across ALL handler instances and processUrl calls // Key: `${domain}-${recordType}`, Value: { result, timestamp } // DNS records don't change based on what terms you're searching for, // so we cache the raw dig output and let each handler check its own terms against it const globalDigResultCache = new Map(); const GLOBAL_DIG_CACHE_TTL = 72000000; // 20 hours (persisted to disk between runs) const GLOBAL_DIG_CACHE_MAX = 2000; // Global whois result cache — shared across ALL handler instances and processUrl calls // Whois data is per root domain and doesn't change based on search terms const globalWhoisResultCache = new Map(); const GLOBAL_WHOIS_CACHE_TTL = 72000000; // 20 hours (persisted to disk between runs) const GLOBAL_WHOIS_CACHE_MAX = 2000; // Persistent disk cache file paths const DIG_CACHE_FILE = path.join(__dirname, '..', '.digcache'); const WHOIS_CACHE_FILE = path.join(__dirname, '..', '.whoiscache'); // Index of hostnames known to resolve, populated as a side effect of // positive dig/whois cache writes AND cache hits. nwss.js's DNS pre-check // reads this via domainKnownToResolve() so it can skip its own resolve4 // call on hosts that dig or whois have already proven live within the // 20-hour TTL window. Populating on cache HITS (not just writes) handles // the --dns-cache disk-load case where entries arrive without going // through the in-process write path. Stale entries -- hostname in Set but // the dig/whois entry has since been evicted -- are harmless: worst case // is one wasted pre-check next time the hostname comes through. const knownResolvedHostnames = new Set(); const MAX_RESOLVED_HOSTNAMES = 5000; function markResolved(hostname) { if (!hostname) return; if (knownResolvedHostnames.size >= MAX_RESOLVED_HOSTNAMES) { // FIFO eviction -- Set iteration order is insertion order. knownResolvedHostnames.delete(knownResolvedHostnames.values().next().value); } knownResolvedHostnames.add(hostname); } /** * Returns true if dig or whois has produced a verifiable-positive result * for this hostname during the current process lifetime. nwss.js's DNS * pre-check uses this to skip resolve4 calls on hosts we already know * are live. False does NOT mean "unresolvable" -- it means "we have no * recent evidence either way; do the pre-check". */ function domainKnownToResolve(hostname) { return knownResolvedHostnames.has(hostname); } // Dig responses with success:true can still represent NXDOMAIN -- the dig // COMMAND succeeded but the DNS RESPONSE is "no such name". The output // string is the only reliable signal. NOERROR + non-zero answer count = // the hostname genuinely resolved. function digOutputIndicatesResolution(output) { if (!output) return false; if (!output.includes('status: NOERROR')) return false; // ANSWER: 0 means NOERROR but no records of the requested type -- the // hostname exists at this label but doesn't have THIS record type. // For our purposes (proving the name is live) that's still useful, but // strictly "domain has nameservers and returned authoritative empty" // is weaker than "domain returned an actual A/AAAA". Conservative // choice: require non-zero answer count. if (/ANSWER:\s*0\b/.test(output)) return false; return true; } /** * Load persistent cache from disk into in-memory Map * Skips expired entries and enforces max size * @param {string} filePath - Path to cache file * @param {Map} cache - In-memory cache Map to populate * @param {number} ttl - TTL in milliseconds * @param {number} maxSize - Maximum cache entries */ function loadDiskCache(filePath, cache, ttl, maxSize) { // Also clean up any stray .tmp files from a prior interrupted save. // The atomic-write path (saveDiskCache below) writes to `${filePath}.tmp` // then renames; a process killed mid-write leaves the .tmp behind. The // real file remains intact (rename is atomic), so we just sweep the // stray on load. try { const tmpPath = filePath + '.tmp'; if (fs.existsSync(tmpPath)) { try { fs.unlinkSync(tmpPath); } catch {} } } catch {} try { if (!fs.existsSync(filePath)) return; const data = JSON.parse(fs.readFileSync(filePath, 'utf-8')); const now = Date.now(); let loaded = 0; for (const [key, entry] of Object.entries(data)) { if (loaded >= maxSize) break; if (now - entry.timestamp < ttl) { cache.set(key, entry); loaded++; } } } catch (err) { // Corrupt or unreadable cache file — delete and start fresh. // Surface the event so the user knows they lost their warm cache; // previously this was a silent reset, which made "why did my dns // cache stop helping?" hard to diagnose. // eslint-disable-next-line no-console console.warn(`${messageColors.highlight('[dns-cache]')} ${path.basename(filePath)} was unreadable (${err.message}); starting fresh`); try { fs.unlinkSync(filePath); } catch {} } } /** * Save in-memory cache to disk, evicting oldest entries if over max size * @param {string} filePath - Path to cache file * @param {Map} cache - In-memory cache Map to persist * @param {number} ttl - TTL in milliseconds * @param {number} maxSize - Maximum cache entries */ function saveDiskCache(filePath, cache, ttl, maxSize) { try { const now = Date.now(); const entries = {}; let count = 0; // Collect valid entries, skip expired for (const [key, entry] of cache) { if (now - entry.timestamp < ttl) { entries[key] = entry; count++; } } // Build the final payload (with trimming if over cap). Compact JSON // -- saveDiskCache runs on the synchronous 'exit' handler when // --dns-cache is set, so any work here directly delays scan exit. // Several times faster than pretty-print on multi-megabyte caches // and the file is not intended for human reading. let payload; if (count > maxSize) { const sorted = Object.entries(entries) .sort((a, b) => b[1].timestamp - a[1].timestamp) .slice(0, maxSize); const trimmed = {}; for (const [key, entry] of sorted) { trimmed[key] = entry; } payload = JSON.stringify(trimmed); } else { payload = JSON.stringify(entries); } // Atomic write: writeFileSync to a sibling .tmp path, then rename. // If the process is killed mid-write (SIGKILL, OOM, power loss) the // .tmp is left as garbage but the real filePath is either complete // or absent -- never half-written. loadDiskCache sweeps stray .tmp // files on next startup. // Matches the pattern already used in lib/adblock-rust.js per the // CLAUDE.md convention. We deliberately omit the pid suffix used // there because saveDiskCache only ever runs from the single 'exit' // handler -- no concurrent-process race to disambiguate. const tmpPath = filePath + '.tmp'; fs.writeFileSync(tmpPath, payload); fs.renameSync(tmpPath, filePath); } catch { // Disk write failed -- non-fatal, in-memory cache still works. // Best-effort cleanup of any stray tmp file from this attempt so // it doesn't accumulate over repeated failures. try { fs.unlinkSync(filePath + '.tmp'); } catch {} } } // Track in-flight lookups to prevent duplicate concurrent requests const pendingDigLookups = new Map(); const pendingWhoisLookups = new Map(); /** * Enforce a hard size cap on the dig/whois global caches. Evicts expired * entries first; if the cache is still over cap after that (i.e. every * remaining entry is within its TTL but there are simply too many), * deletes the oldest entries by timestamp until size <= max. Without the * second pass the caches could grow unbounded on scans of many unique * hostnames whose entries hadn't expired yet. * * @param {Map} cache - cache Map to prune * @param {number} maxSize - desired hard cap * @param {number} ttl - TTL in ms; entries older than this are evicted first * @returns {{expired: number, overflow: number}} eviction counts */ function enforceCacheCap(cache, maxSize, ttl) { if (cache.size <= maxSize) return { expired: 0, overflow: 0 }; const now = Date.now(); let expired = 0; for (const [key, entry] of cache.entries()) { if (now - entry.timestamp > ttl) { cache.delete(key); expired++; } } let overflow = 0; if (cache.size > maxSize) { // Snapshot timestamps and sort ascending, evict the oldest few. const byAge = Array.from(cache.entries()) .sort((a, b) => a[1].timestamp - b[1].timestamp); const toDrop = cache.size - maxSize; for (let i = 0; i < toDrop; i++) { cache.delete(byAge[i][0]); overflow++; } } return { expired, overflow }; } // DNS cache statistics. freshDig / freshWhois are sample lists for // end-of-scan visibility; capped at MAX_FRESH_LIST entries (FIFO) so // they can't grow unbounded on scans with thousands of unique fresh // lookups. digMisses/whoisMisses retain the full count, so callers // who want totals can read those; freshDig/freshWhois are intended as // "show me which domains" diagnostic samples. const MAX_FRESH_LIST = 1000; const dnsCacheStats = { digHits: 0, digMisses: 0, whoisHits: 0, whoisMisses: 0, freshDig: [], freshWhois: [] }; function pushFreshSample(arr, item) { if (arr.length >= MAX_FRESH_LIST) arr.shift(); arr.push(item); } /** * Get DNS cache statistics for end-of-scan reporting * @returns {Object} Cache hit/miss counts and fresh domain lists */ function getDnsCacheStats() { return { ...dnsCacheStats }; } // Disk cache is opt-in via --dns-cache flag let diskCacheEnabled = false; /** * Enable persistent disk caching for dig/whois results. * Call this when --dns-cache flag is set. Idempotent — repeated calls * are no-ops, which prevents double-loading the cache files and double- * registering the 'exit' handler that flushes them on shutdown. */ function enableDiskCache() { if (diskCacheEnabled) return; diskCacheEnabled = true; loadDiskCache(DIG_CACHE_FILE, globalDigResultCache, GLOBAL_DIG_CACHE_TTL, GLOBAL_DIG_CACHE_MAX); loadDiskCache(WHOIS_CACHE_FILE, globalWhoisResultCache, GLOBAL_WHOIS_CACHE_TTL, GLOBAL_WHOIS_CACHE_MAX); // Warm knownResolvedHostnames from disk-loaded entries so the very // first URL per cached domain also skips the c-ares pre-check (instead // of waiting for the cache-hit handler to fire later in the URL's // pipeline). Entries written by older versions of this module lack the // `hostname` field -- they're skipped here and fall back to lazy // on-hit population. Same positive-resolution gates apply as the live // write/hit paths (dig: NOERROR + non-zero answers; whois: success). let digWarm = 0; let whoisWarm = 0; for (const entry of globalDigResultCache.values()) { if (entry.hostname && entry.result && entry.result.success && digOutputIndicatesResolution(entry.result.output)) { markResolved(entry.hostname); digWarm++; } } for (const entry of globalWhoisResultCache.values()) { if (entry.hostname && entry.result && entry.result.success) { markResolved(entry.hostname); whoisWarm++; } } // Debug log only if anything was actually warmed; silent on fresh // installs / empty disk caches. if (digWarm > 0 || whoisWarm > 0) { // eslint-disable-next-line no-console console.log(`${messageColors.highlight('[dns-cache]')} Warmed resolved-hostnames index from disk: ${digWarm} dig + ${whoisWarm} whois entries`); } // Save caches to disk once on process exit instead of per-lookup. The // 'exit' handler fires synchronously regardless of how the process exits // (normal completion, signal, uncaught exception), so a separate signal // handler is redundant. We deliberately do NOT install SIGINT/SIGTERM // handlers here — nwss.js installs its own async ones that perform // browser/VPN cleanup, and a sync handler here would call process.exit(0) // first and skip that cleanup entirely. const flushCaches = () => { saveDiskCache(DIG_CACHE_FILE, globalDigResultCache, GLOBAL_DIG_CACHE_TTL, GLOBAL_DIG_CACHE_MAX); saveDiskCache(WHOIS_CACHE_FILE, globalWhoisResultCache, GLOBAL_WHOIS_CACHE_TTL, GLOBAL_WHOIS_CACHE_MAX); }; process.on('exit', flushCaches); } /** * Strips ANSI color codes from a string for clean file logging * @param {string} text - Text that may contain ANSI codes * @returns {string} Text with ANSI codes removed */ function stripAnsiColors(text) { // Remove ANSI escape sequences (color codes) ANSI_REGEX.lastIndex = 0; return text.replace(ANSI_REGEX, ''); } /** * Validates if whois command is available on the system * @returns {Object} Object with isAvailable boolean and version/error info */ function validateWhoisAvailability() { if (validateWhoisAvailability._cached) return validateWhoisAvailability._cached; try { const result = execSync('whois --version 2>&1', { encoding: 'utf8' }); validateWhoisAvailability._cached = { isAvailable: true, version: result.trim() }; } catch (error) { try { execSync('which whois', { encoding: 'utf8' }); validateWhoisAvailability._cached = { isAvailable: true, version: 'whois (version unknown)' }; } catch (e) { validateWhoisAvailability._cached = { isAvailable: false, error: 'whois command not found' }; } } return validateWhoisAvailability._cached; } /** * Validates if dig command is available on the system * @returns {Object} Object with isAvailable boolean and version/error info */ function validateDigAvailability() { if (validateDigAvailability._cached) return validateDigAvailability._cached; try { const result = execSync('dig -v 2>&1', { encoding: 'utf8' }); validateDigAvailability._cached = { isAvailable: true, version: result.split('\n')[0].trim() }; } catch (error) { validateDigAvailability._cached = { isAvailable: false, error: 'dig command not found' }; } return validateDigAvailability._cached; } /** * Spawn a process with execFile (no shell) and a hard timeout. Arguments * are passed directly as argv -- shell metacharacters in any element * cannot execute commands. Replaces the prior exec(string)-based helper * whose double-quote-only protection failed against $()/backticks. * * @param {string} cmd - Executable name or path * @param {string[]} args - Argument vector (each element a separate arg) * @param {number} timeout - Timeout in milliseconds * @returns {Promise<{stdout:string, stderr:string}>} -- rejects on timeout/error */ function execFileWithTimeout(cmd, args, timeout = 10000) { return new Promise((resolve, reject) => { // Hoisted before the callbacks that reference it. Previously `const // timer = setTimeout(...)` was declared after the exec callback / // 'error' listener that both did `if (timer) clearTimeout(timer)` — // worked in practice because exec defers callbacks via nextTick, but // structurally fragile (a synchronous exec failure would TDZ-throw). let timer = null; const child = execFile(cmd, args, { encoding: 'utf8' }, (error, stdout, stderr) => { if (timer) clearTimeout(timer); if (error) { reject(error); } else { resolve({ stdout, stderr }); } }); timer = setTimeout(() => { child.kill('SIGTERM'); // Force kill after 2 seconds if SIGTERM doesn't work. unref() so this // tail timer doesn't keep the event loop alive past scan completion — // a dig that times out near the end of a scan would otherwise delay // exit by ~2 seconds. const killTimer = setTimeout(() => { if (!child.killed) { child.kill('SIGKILL'); } }, 2000); killTimer.unref(); reject(new Error(`Command timeout after ${timeout}ms: ${cmd} ${args.join(' ')}`)); }, timeout); // Handle child process errors child.on('error', (err) => { if (timer) clearTimeout(timer); reject(err); }); }); } /** * Selects a whois server from the configuration * @param {string|Array<string>} whoisServer - Single server string or array of servers * @param {string} mode - Selection mode: 'random' (default) or 'cycle' * @returns {string|null} Selected whois server or null if none specified */ function selectWhoisServer(whoisServer = '', mode = 'random'){ if (!whoisServer) { return null; // Use default whois behavior } if (typeof whoisServer === 'string') { return whoisServer; } if (Array.isArray(whoisServer) && whoisServer.length > 0) { if (mode === 'cycle') { const selectedServer = whoisServer[whoisServerCycleIndex % whoisServer.length]; whoisServerCycleIndex = (whoisServerCycleIndex + 1) % whoisServer.length; return selectedServer; } else { // Random selection (default behavior) const randomIndex = Math.floor(Math.random() * whoisServer.length); return whoisServer[randomIndex]; } } return null; } /** * Gets common whois servers for debugging/fallback suggestions * @returns {Array<string>} List of common whois servers */ function getCommonWhoisServers() { return [ 'whois.iana.org', 'whois.internic.net', 'whois.verisign-grs.com', 'whois.markmonitor.com', 'whois.godaddy.com', 'whois.namecheap.com', 'whois.1and1.com' ]; } /** * Suggests alternative whois servers based on domain TLD * @param {string} domain - Domain to get suggestions for * @param {string} failedServer - Server that failed (to exclude from suggestions) * @returns {Array<string>} Suggested whois servers */ function suggestWhoisServers(domain, failedServer = null) { const tld = domain.split('.').pop().toLowerCase(); const suggestions = []; // TLD-specific servers const tldServers = { 'com': ['whois.verisign-grs.com', 'whois.internic.net'], 'net': ['whois.verisign-grs.com', 'whois.internic.net'], 'org': ['whois.pir.org'], 'info': ['whois.afilias.net'], 'biz': ['whois.neulevel.biz'], 'uk': ['whois.nominet.uk'], 'de': ['whois.denic.de'], 'fr': ['whois.afnic.fr'], 'it': ['whois.nic.it'], 'nl': ['whois.domain-registry.nl'] }; if (tldServers[tld]) { suggestions.push(...tldServers[tld]); } // Add common servers suggestions.push(...getCommonWhoisServers()); // Remove duplicates and failed server const uniqueSuggestions = [...new Set(suggestions)]; return failedServer ? uniqueSuggestions.filter(s => s !== failedServer) : uniqueSuggestions; } /** * Performs a whois lookup on a domain with proper timeout handling and custom server support (basic version) * @param {string} domain - Domain to lookup * @param {number} timeout - Timeout in milliseconds (default: 10000) * @param {string|Array<string>} whoisServer - Custom whois server(s) to use * @param {boolean} debugMode - Enable debug logging (default: false) * @returns {Promise<Object>} Object with success status and output/error */ async function whoisLookup(domain = '', timeout = 10000, whoisServer = '', debugMode = false, logFunc = null) { const startTime = Date.now(); let cleanDomain, selectedServer, whoisCommand; try { // Clean domain (remove protocol, path, etc) cleanDomain = domain.replace(/^https?:\/\//, '').replace(/\/.*$/, '').replace(/:\d+$/, ''); // Select whois server if provided selectedServer = selectWhoisServer(whoisServer); // Build whois argv. Pass each token as a separate argv element -- // execFile does NOT spawn a shell, so neither cleanDomain nor // selectedServer can inject commands no matter what they contain. // The leading `--` is preserved so dashes in `cleanDomain` don't get // re-interpreted as flags by the whois binary itself. let whoisArgs; if (selectedServer) { whoisArgs = ['-h', selectedServer, '--', cleanDomain]; } else { whoisArgs = ['--', cleanDomain]; } // Kept as a display string for debug logging only -- never executed. whoisCommand = `whois ${whoisArgs.join(' ')}`; if (debugMode) { if (logFunc) { logFunc(`${messageColors.highlight('[whois]')} Starting lookup for ${cleanDomain} (timeout: ${timeout}ms)`); logFunc(`${messageColors.highlight('[whois]')} Command: ${whoisCommand}`); } else { console.log(formatLogMessage('debug', `${messageColors.highlight('[whois]')} Starting lookup for ${cleanDomain} (timeout: ${timeout}ms)`)); console.log(formatLogMessage('debug', `${messageColors.highlight('[whois]')} Command: ${whoisCommand}`)); } } const { stdout, stderr } = await execFileWithTimeout('whois', whoisArgs, timeout); const duration = Date.now() - startTime; if (stderr && stderr.trim()) { if (debugMode) { if (logFunc) { logFunc(`${messageColors.highlight('[whois]')} Lookup failed for ${cleanDomain} after ${duration}ms`); logFunc(`${messageColors.highlight('[whois]')} Server: ${selectedServer || 'default'}`); logFunc(`${messageColors.highlight('[whois]')} Error: ${stderr.trim()}`); logFunc(`${messageColors.highlight('[whois]')} Command executed: ${whoisCommand}`); } else { console.log(formatLogMessage('debug', `${messageColors.highlight('[whois]')} Lookup failed for ${cleanDomain} after ${duration}ms`)); console.log(formatLogMessage('debug', `${messageColors.highlight('[whois]')} Server: ${selectedServer || 'default'}`)); console.log(formatLogMessage('debug', `${messageColors.highlight('[whois]')} Error: ${stderr.trim()}`)); console.log(formatLogMessage('debug', `${messageColors.highlight('[whois]')} Command executed: ${whoisCommand}`)); } if (selectedServer) { if (logFunc) { logFunc(`${messageColors.highlight('[whois]')} Custom server used: ${selectedServer}`); } else { console.log(formatLogMessage('debug', `${messageColors.highlight('[whois]')} Custom server used: ${selectedServer}`)); } } } return { success: false, error: stderr.trim(), domain: cleanDomain, whoisServer: selectedServer, duration: duration, command: whoisCommand }; } if (debugMode) { if (logFunc) { logFunc(`${messageColors.highlight('[whois]')} Lookup successful for ${cleanDomain} after ${duration}ms`); logFunc(`${messageColors.highlight('[whois]')} Server: ${selectedServer || 'default'}`); logFunc(`${messageColors.highlight('[whois]')} Output length: ${stdout.length} characters`); } else { console.log(formatLogMessage('debug', `${messageColors.highlight('[whois]')} Lookup successful for ${cleanDomain} after ${duration}ms`)); console.log(formatLogMessage('debug', `${messageColors.highlight('[whois]')} Server: ${selectedServer || 'default'}`)); console.log(formatLogMessage('debug', `${messageColors.highlight('[whois]')} Output length: ${stdout.length} characters`)); } } return { success: true, output: stdout, domain: cleanDomain, whoisServer: selectedServer, duration: duration, command: whoisCommand }; } catch (error) { const duration = Date.now() - startTime; const isTimeout = error.message.includes('timeout') || error.message.includes('Command timeout'); const errorType = isTimeout ? 'timeout' : 'error'; if (debugMode) { if (logFunc) { logFunc(`${messageColors.highlight('[whois]')} Lookup ${errorType} for ${cleanDomain || domain} after ${duration}ms`); logFunc(`${messageColors.highlight('[whois]')} Server: ${selectedServer || 'default'}`); logFunc(`${messageColors.highlight('[whois]')} Command: ${whoisCommand || 'command not built'}`); logFunc(`${messageColors.highlight('[whois]')} ${errorType === 'timeout' ? 'Timeout' : 'Error'}: ${error.message}`); } else { console.log(formatLogMessage('debug', `${messageColors.highlight('[whois]')} Lookup ${errorType} for ${cleanDomain || domain} after ${duration}ms`)); console.log(formatLogMessage('debug', `${messageColors.highlight('[whois]')} Server: ${selectedServer || 'default'}`)); console.log(formatLogMessage('debug', `${messageColors.highlight('[whois]')} Command: ${whoisCommand || 'command not built'}`)); console.log(formatLogMessage('debug', `${messageColors.highlight('[whois]')} ${errorType === 'timeout' ? 'Timeout' : 'Error'}: ${error.message}`)); } if (selectedServer) { if (logFunc) { logFunc(`${messageColors.highlight('[whois]')} Failed server: ${selectedServer} (custom)`); } else { console.log(formatLogMessage('debug', `${messageColors.highlight('[whois]')} Failed server: ${selectedServer} (custom)`)); } } else { if (logFunc) { logFunc(`${messageColors.highlight('[whois]')} Failed server: system default whois server`); } else { console.log(formatLogMessage('debug', `${messageColors.highlight('[whois]')} Failed server: system default whois server`)); } } if (isTimeout) { if (logFunc) { logFunc(`${messageColors.highlight('[whois]')} Timeout exceeded ${timeout}ms limit`); } else { console.log(formatLogMessage('debug', `${messageColors.highlight('[whois]')} Timeout exceeded ${timeout}ms limit`)); } if (selectedServer) { if (logFunc) { logFunc(`${messageColors.highlight('[whois]')} Consider using a different whois server or increasing timeout`); } else { console.log(formatLogMessage('debug', `${messageColors.highlight('[whois]')} Consider using a different whois server or increasing timeout`)); } } } } return { success: false, error: error.message, domain: cleanDomain || domain, whoisServer: selectedServer, duration: duration, command: whoisCommand, isTimeout: isTimeout, errorType: errorType }; } } /** * Performs a whois lookup with retry logic and fallback servers * @param {string} domain - Domain to lookup * @param {number} timeout - Timeout in milliseconds (default: 10000) * @param {string|Array<string>} whoisServer - Custom whois server(s) to use * @param {boolean} debugMode - Enable debug logging (default: false) * @param {Object} retryOptions - Retry configuration options * @param {number} whoisDelay - Delay in milliseconds before whois requests (default: 2000) * @returns {Promise<Object>} Object with success status and output/error */ async function whoisLookupWithRetry(domain = '', timeout = 10000, whoisServer = '', debugMode = false, retryOptions = {}, whoisDelay = 8000, logFunc = null) { const { maxRetries = 3, timeoutMultiplier = 1.5, useFallbackServers = true, retryOnTimeout = true, retryOnError = true } = retryOptions; let serversToTry = []; // Build list of servers to try if (whoisServer && whoisServer !== '') { if (Array.isArray(whoisServer)) { serversToTry = [...whoisServer]; // Copy array to avoid modifying original } else { serversToTry = [whoisServer]; } } else { serversToTry = ['']; // Default server (empty string instead of null) } // Add fallback servers if enabled and we have custom servers if (useFallbackServers && whoisServer && whoisServer !== '') { const fallbacks = suggestWhoisServers(domain).slice(0, 3); // Only add fallbacks that aren't already in our list const existingServers = serversToTry.filter(s => s !== ''); const existingServerCount = existingServers.length; const newFallbacks = fallbacks.filter(fb => { for (let i = 0; i < existingServerCount; i++) { if (existingServers[i] === fb) return false; } return true; }); serversToTry.push(...newFallbacks); } let lastError = null; let totalAttempts = 0; let serversAttempted = []; if (debugMode) { const totalServers = serversToTry.length; if (logFunc) { logFunc(`${messageColors.highlight('[whois-retry]')} Starting whois lookup for ${domain} with ${totalServers} server(s) to try`); logFunc(`${messageColors.highlight('[whois-retry]')} Servers: [${serversToTry.map(s => s || 'default').join(', ')}]`); logFunc(`${messageColors.highlight('[whois-retry]')} Retry settings: maxRetries=${maxRetries} per server, timeoutMultiplier=${timeoutMultiplier}, retryOnTimeout=${retryOnTimeout}, retryOnError=${retryOnError}`); } else { console.log(formatLogMessage('debug', `${messageColors.highlight('[whois-retry]')} Starting whois lookup for ${domain} with ${totalServers} server(s) to try`)); console.log(formatLogMessage('debug', `${messageColors.highlight('[whois-retry]')} Servers: [${serversToTry.map(s => s || 'default').join(', ')}]`)); console.log(formatLogMessage('debug', `${messageColors.highlight('[whois-retry]')} Retry settings: maxRetries=${maxRetries} per server, timeoutMultiplier=${timeoutMultiplier}, retryOnTimeout=${retryOnTimeout}, retryOnError=${retryOnError}`)); } } // Try each server with retry logic const serverCount = serversToTry.length; for (let serverIndex = 0; serverIndex < serverCount; serverIndex++) { const server = serversToTry[serverIndex]; let currentTimeout = timeout; let retryCount = 0; serversAttempted.push(server); if (debugMode) { const serverName = (server && server !== '') ? server : 'default'; if (logFunc) { logFunc(`${messageColors.highlight('[whois-retry]')} Server ${serverIndex + 1}/${serverCount}: ${serverName} (max ${maxRetries} attempts)`); } else { console.log(formatLogMessage('debug', `${messageColors.highlight('[whois-retry]')} Server ${serverIndex + 1}/${serverCount}: ${serverName} (max ${maxRetries} attempts)`)); } } // Retry this server up to maxRetries times while (retryCount < maxRetries) { totalAttempts++; const attemptNum = retryCount + 1; if (debugMode) { const serverName = (server && server !== '') ? server : 'default'; if (logFunc) { logFunc(`${messageColors.highlight('[whois-retry]')} Attempt ${attemptNum}/${maxRetries} on server ${serverName} (timeout: ${currentTimeout}ms)`); } else { console.log(formatLogMessage('debug', `${messageColors.highlight('[whois-retry]')} Attempt ${attemptNum}/${maxRetries} on server ${serverName} (timeout: ${currentTimeout}ms)`)); } } // Add progressive delay between retries (but not before first attempt on any server) if (retryCount > 0 && whoisDelay > 0) { // Progressive delay: base delay * retry attempt number + extra delay // Attempt 2: base delay * 1 + 4000ms = 8000ms + 4000ms = 12000ms // Attempt 3: base delay * 2 + 6000ms = 16000ms + 6000ms = 22000ms // Attempt 4+: base delay * 3 + 6000ms = 24000ms + 6000ms = 30000ms (if maxRetries > 3) const delayMultiplier = Math.min(retryCount, 3); const baseDelay = whoisDelay * delayMultiplier; // Add extra delay based on retry attempt let extraDelay = 0; if (retryCount === 1) { extraDelay = 4000; // Extra 4 seconds for 2nd attempt } else if (retryCount >= 2) { extraDelay = 6000; // Extra 6 seconds for 3rd+ attempts } const actualDelay = baseDelay + extraDelay; if (debugMode) { if (logFunc) { logFunc(`${messageColors.highlight('[whois-retry]')} Adding ${actualDelay}ms progressive delay before retry ${retryCount + 1} (base: ${baseDelay}ms + extra: ${extraDelay}ms)...`); } else { console.log(formatLogMessage('debug', `${messageColors.highlight('[whois-retry]')} Adding ${actualDelay}ms progressive delay before retry ${retryCount + 1} (base: ${baseDelay}ms + extra: ${extraDelay}ms)...`)); } } await new Promise(resolve => setTimeout(resolve, actualDelay)); } else if (serverIndex > 0 && retryCount === 0 && whoisDelay > 0) { // Add delay before trying a new server (but not the very first server) if (debugMode) { if (logFunc) { logFunc(`${messageColors.highlight('[whois-retry]')} Adding ${whoisDelay}ms delay before trying new server...`); } else { console.log(formatLogMessage('debug', `${messageColors.highlight('[whois-retry]')} Adding ${whoisDelay}ms delay before trying new server...`)); } } await new Promise(resolve => setTimeout(resolve, whoisDelay)); } else if (debugMode && whoisDelay === 0) { // Log when delay is skipped due to whoisDelay being 0 if (logFunc) { logFunc(`${messageColors.highlight('[whois-retry]')} Skipping delay (whoisDelay: ${whoisDelay}ms)`); } else { console.log(formatLogMessage('debug', `${messageColors.highlight('[whois-retry]')} Skipping delay (whoisDelay: ${whoisDelay}ms)`)); } } try { const result = await whoisLookup(domain, currentTimeout, server || '', debugMode, logFunc); if (result.success) { if (debugMode) { if (logFunc) { logFunc(`${messageColors.highlight('[whois-retry]')} SUCCESS on attempt ${attemptNum}/${maxRetries} for server ${result.whoisServer || 'default'} (total attempts: ${totalAttempts})`); } else { console.log(formatLogMessage('debug', `${messageColors.highlight('[whois-retry]')} SUCCESS on attempt ${attemptNum}/${maxRetries} for server ${result.whoisServer || 'default'} (total attempts: ${totalAttempts})`)); } } // Add retry info to result // V8 Optimized: Object.assign performs better than spread return Object.assign({}, result, { retryInfo: { totalAttempts: totalAttempts, maxAttempts: serverCount * maxRetries, serversAttempted: serversAttempted, finalServer: result.whoisServer, retriedAfterFailure: totalAttempts > 1, serverRetries: retryCount, serverIndex: serverIndex } }); } // Determine if we should retry based on error type const shouldRetry = (result.isTimeout && retryOnTimeout) || (!result.isTimeout && retryOnError); if (debugMode) { const serverName = (result.whoisServer && result.whoisServer !== '') ? result.whoisServer : 'default'; const errorType = result.isTimeout ? 'TIMEOUT' : 'ERROR'; if (logFunc) { logFunc(`${messageColors.highlight('[whois-retry]')} ${errorType} on attempt ${attemptNum}/${maxRetries} with server ${serverName}: ${result.error}`); } else { console.log(formatLogMessage('debug', `${messageColors.highlight('[whois-retry]')} ${errorType} on attempt ${attemptNum}/${maxRetries} with server ${serverName}: ${result.error}`)); } if (retryCount < maxRetries - 1) { if (shouldRetry) { if (logFunc) { logFunc(`${messageColors.highlight('[whois-retry]')} Will retry attempt ${attemptNum + 1}/${maxRetries} on same server...`); } else { console.log(formatLogMessage('debug', `${messageColors.highlight('[whois-retry]')} Will retry attempt ${attemptNum + 1}/${maxRetries} on same server...`)); } } else { if (logFunc) { logFunc(`${messageColors.highlight('[whois-retry]')} Skipping retry on same server (retryOn${result.isTimeout ? 'Timeout' : 'Error'}=${shouldRetry})`); } else { console.log(formatLogMessage('debug', `${messageColors.highlight('[whois-retry]')} Skipping retry on same server (retryOn${result.isTimeout ? 'Timeout' : 'Error'}=${shouldRetry})`)); } } } else if (serverIndex < serverCount - 1) { if (logFunc) { logFunc(`${messageColors.highlight('[whois-retry]')} Max retries reached for server${serverIndex < serverCount - 1 ? ', will try next server...' : ', no more servers to try'}`); } else { console.log(formatLogMessage('debug', `${messageColors.highlight('[whois-retry]')} Max retries reached for server${serverIndex < serverCount - 1 ? ', will try next server...' : ', no more servers to try'}`)); } } } lastError = result; // If this is the last retry for this server or we shouldn't retry this error type, break to next server if (retryCount >= maxRetries - 1 || !shouldRetry) { break; } // Increase timeout for next retry attempt on same server retryCount++; currentTimeout = Math.round(currentTimeout * timeoutMultiplier); } catch (error) { if (debugMode) { const serverName = (server && server !== '') ? server : 'default' if (logFunc) { logFunc(`${messageColors.highlight('[whois-retry]')} EXCEPTION on attempt ${attemptNum}/${maxRetries} with server ${serverName}: ${error.message}`); } else { console.log(formatLogMessage('debug', `${messageColors.highlight('[whois-retry]')} EXCEPTION on attempt ${attemptNum}/${maxRetries} with server ${serverName}: ${error.message}`)); } } lastError = { success: false, error: error.message, domain: domain, whoisServer: server || '', isTimeout: error.message.includes('timeout'), duration: 0 }; // For exceptions, only retry if it's a retryable error type const isRetryableException = error.message.includes('timeout') || error.message.includes('ECONNRESET') || error.message.includes('ENOTFOUND'); if (retryCount >= maxRetries - 1 || !isRetryableException) { break; } retryCount++; currentTimeout = Math.round(currentTimeout * timeoutMultiplier); } } } // All attempts failed if (debugMode) { const attemptedServerCount = serversAttempted.length; if (logFunc) { logFunc(`${messageColors.highlight('[whois-retry]')} FINAL FAILURE: All ${totalAttempts} attempts failed for ${domain} across ${attemptedServerCount} server(s)`); } else { console.log(formatLogMessage('debug', `${messageColors.highlight('[whois-retry]')} FINAL FAILURE: All ${totalAttempts} attempts failed for ${domain} across ${attemptedServerCount} server(s)`)); } if (lastError) { if (logFunc) { logFunc(`${messageColors.highlight('[whois-retry]')} Last error: ${lastError.error} (${lastError.isTimeout ? 'timeout' : 'error'})`); } else { console.log(formatLogMessage('debug', `${messageColors.highlight('[whois-retry]')} Last error: ${lastError.error} (${lastError.isTimeout ? 'timeout' : 'error'})`)); } } } // Return the last error with retry info // V8 Optimized: Object.assign instead of spread operator return Object.assign({}, lastError, { retryInfo: { totalAttempts: totalAttempts, maxAttempts: serverCount * maxRetries, serversAttempted: serversAttempted, finalServer: lastError?.whoisServer || '', retriedAfterFailure: totalAttempts > 1, allAttemptsFailed: true } }); } /** * Performs a dig lookup on a domain with proper timeout handling * @param {string} domain - Domain to lookup * @param {string} recordType - DNS record type (A, AAAA, MX, TXT, etc.) default: 'A' * @param {number} timeout - Timeout in milliseconds (default: 5000) * @returns {Promise<Object>} Object with success status and output/error */ async function digLookup(domain = '', recordType = 'A', timeout = 5000) { try { // Clean domain const cleanDomain = domain.replace(/^https?:\/\//, '').replace(/\/.*$/, '').replace(/:\d+$/, ''); // Single dig command — full output contains everything including short // answers. execFile (no shell) so cleanDomain / recordType can contain // any chars without injection risk. const { stdout: fullOutput, stderr } = await execFileWithTimeout('dig', [cleanDomain, recordType], timeout); if (stderr && stderr.trim()) { return { success: false, error: stderr.trim(), domain: cleanDomain, recordType }; } // Extract short output from ANSWER SECTION of full dig output const answerMatch = fullOutput.match(/;; ANSWER SECTION:\n([\s\S]*?)(?:\n;;|\n*$)/); let shortOutput = ''; if (answerMatch) { shortOutput = answerMatch[1] .split('\n') .map(line => line.split(/\s+/).pop()) .filter(Boolean) .join('\n'); } return { success: true, output: fullOutput, shortOutput, domain: cleanDomain, recordType }; } catch (error) { return { success: false, error: error.message, domain: domain, recordType }; } } /** * Checks if whois output contains all specified search terms (AND logic) * @param {string} whoisOutput - The whois lookup output * @param {Array<string>} searchTerms - Array of terms that must all be present * @returns {boolean} True if all terms are found */ function checkWhoisTerms(whoisOutput, searchTerms) { if (!searchTerms || !Array.isArray(searchTerms) || searchTerms.length === 0) { return false; } const lowerOutput = whoisOutput.toLowerCase(); return searchTerms.every(term => lowerOutput.includes(term.toLowerCase())); } /** * Checks if whois output contains any of the specified search terms (OR logic) * @param {string} whoisOutput - The whois lookup output * @param {Array<string>} searchTerms - Array of terms where at least one must be present * @returns {boolean} True if any term is found */ function checkWhoisTermsOr(whoisOutput, searchTerms) { if (!searchTerms || !Array.isArray(searchTerms) || searchTerms.length === 0) { return false; } const lowerOutput = whoisOutput.toLowerCase(); return searchTerms.some(term => lowerOutput.includes(term.toLowerCase())); } /** * Checks if dig output contains all specified search terms (AND logic) * @param {string} digOutput - The dig lookup output * @param {Array<string>} searchTerms - Array of terms that must all be present * @returns {boolean} True if all terms are found */ function checkDigTerms(digOutput, searchTerms) { if (!searchTerms || !Array.isArray(searchTerms) || searchTerms.length === 0) { return false; } const lowerOutput = digOutput.toLowerCase(); return searchTerms.every(term => lowerOutput.includes(term.toLowerCase())); } /** * Checks if dig output contains any of the specified search terms (OR logic) * @param {string} digOutput - The dig lookup output * @param {Array<string>} searchTerms - Array of terms where at least one must be present * @returns {boolean} True if any term is found */ function checkDigTermsOr(digOutput, searchTerms) { if (!searchTerms || !Array.isArray(searchTerms) || searchTerms.length === 0) { return false; } const lowerOutput = digOutput.toLowerCase(); return searchTerms.some(term => lowerOutput.includes(term.toLowerCase())); } /** * Enhanced dry run callback factory for better nettools reporting * @param {Map} matchedDomains - The matched domains collection * @param {boolean} forceDebug - Debug logging flag * @returns {Function} Enhanced dry run callback */ function createEnhancedDryRunCallback(matchedDomains, forceDebug) { return (domain, tool, matchType, matchedTerm, details, additionalInfo = {}) => { const result = { domain, tool, matchType, matchedTerm, details, ...additionalInfo }; matchedDomains.get('dryRunNetTools').push(result); if (forceDebug) { const serverInfo = additionalInfo.server ? ` (server: ${additionalInfo.server})` : ''; const timingInfo = additionalInfo.duration ? ` [${additionalInfo.duration}ms]` : ''; console.log(formatLogMessage('debug', `[DRY RUN] NetTools match: ${domain} via ${tool.toUpperCase()} (${matchType})${serverInfo}${timingInfo}`)); } }; } /** * Creates a handler for network tools checks with enhanced error handling * @param {Object} config - Configuration object * @returns {Function} Async function that handles network tool lookups */ function createNetToolsHandler(config) { const { whoisTerms, whoisOrTerms, whoisDelay = 4000, whoisServer, whoisServerMode = 'random', debugLogFile = null, digTerms, digOrTerms, digRecordType = 'A', digSubdomain = false, dryRunCallback = null, matchedDomains, addMatchedDomain, isDomainAlreadyDetected, getRootDomain, siteConfig, processedWhoisDomains = new Set(), // Accept global sets, fallback to new for backward compatibility processedDigDomains = new Set(), dumpUrls, matchedUrlsLogFile, forceDebug, fs, // ignoreDomains guard: callers pass the live ignoreDomains list + matcher // so a domain that became ignored AFTER the request fired (e.g. via // ignoreDomainsByUrl on a sibling request, or _dynamicallyIgnoredDomains) // doesn't slip into matchedDomains during the async whois/dig window. // Both default to no-op so older callers without the kwargs still work. ignoreDomains = null, matchesIgnoreDomain = null } = config; const hasWhois = whoisTerms && Array.isArray(whoisTerms) && whoisTerms.length > 0; const hasWhoisOr = whoisOrTerms && Array.isArray(whoisOrTerms) && whoisOrTerms.length > 0; const hasDig = digTerms && Array.isArray(digTerms) && digTerms.length > 0; const hasDigOr = digOrTerms && Array.isArray(digOrTerms) && digOrTerms.length > 0; // Pre-lowercase search terms once per handler so the per-domain check loop // doesn't re-lowercase the same constants for every output it scans. const whoisTermsLower = hasWhois ? whoisTerms.map(t => t.toLowerCase()) : null; const whoisOrTermsLower = hasWhoisOr ? whoisOrTerms.map(t => t.toLowerCase()) : null; const digTermsLower = hasDig ? digTerms.map(t => t.toLowerCase()) : null; const digOrTermsLower = hasDigOr ? digOrTerms.map(t => t.toLowerCase()) : null; // Hoisted out of handleNetToolsCheck so the closure is constructed once per // handler rather than once per invocation. References forceDebug, debugLogFile, // and fs from the destructured config above. function logToConsoleAndFile(message) { if (forceDebug) { console.log(formatLogMessage('debug', message)); } if (debugLogFile && fs) { try { const timestamp = new Date().toISOString(); const cleanMessage = stripAnsiColors(message); fs.appendFileSync(debugLogFile, `${timestamp} [debug nettools] ${cleanMessage}\n`); } catch (_) { // Silently fail file logging to avoid disrupting whois operations } } } // Create config-aware cache keys for deduplication // Whois: Only include search terms + server (domain registry data is consistent across subdomains) const whoisConfigKey = JSON.stringify({ terms: whoisTerms || [], orTerms: whoisOrTerms || [], server: whoisServer || 'default', serverMode: whoisServerMode || 'random' }); // Dig: Include all config (DNS records can vary by specific subdomain) const digConfigKey = JSON.stringify({ terms: digTerms || [], orTerms: digOrTerms || [], recordType: digRecordType, subdomain: digSubdomain }); // Whois cache is global (globalWhoisResultCache) — shared across all handler instances // Whois data is per root domain and doesn't change based on search terms // Dig cache is global (globalDigResultCache) — shared across all handler instances // DNS results are the same regardless of search terms return async function handleNetToolsCheck(domain, fullSubdomain) { const originalDomain = fullSubdomain; // Check if domain was already detected (skip expensive operations) if (typeof isDomainAlreadyDetected === 'function' && isDomainAlreadyDetected(fullSubdomain)) { if (forceDebug) { logToConsoleAndFile(`${messageColors.hig