UNPKG

@fanboynz/network-scanner

Version:

A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.

458 lines (397 loc) 15.9 kB
// === Dry Run Module (dry-run.js) === // Handles dry run mode functionality for network scanner const fs = require('fs'); const { messageColors, formatLogMessage } = require('./colorize'); // Constants for dry run collection keys. SEARCH_STRING was removed — // addDryRunSearchString had zero callers, so the map was never populated // and the downstream "Searchstring Match" enhancement always produced // null. See the cleanup comment in processDryRunResults. const DRY_RUN_KEYS = { MATCHES: 'dryRunMatches', NET_TOOLS: 'dryRunNetTools' }; /** * Initialize dry run collections for a matched domains map * @param {Map} matchedDomains - The matched domains map to initialize * @throws {Error} If matchedDomains is not a Map instance */ function initializeDryRunCollections(matchedDomains) { if (!(matchedDomains instanceof Map)) { throw new Error('matchedDomains must be a Map instance for dry-run mode'); } matchedDomains.set(DRY_RUN_KEYS.MATCHES, []); matchedDomains.set(DRY_RUN_KEYS.NET_TOOLS, []); } /** * Validates match data object structure * @param {Object} matchData - Match data to validate * @throws {Error} If matchData is invalid */ function validateMatchData(matchData) { if (!matchData || typeof matchData !== 'object') { throw new Error('Match data must be an object'); } const requiredFields = ['regex', 'domain', 'resourceType', 'fullUrl']; for (const field of requiredFields) { // Check VALUE, not just key presence. The old `field in matchData` // accepted `{regex: undefined, ...}` because `in` only tests for // the property's existence on the object. The downstream output // then printed 'unknown' via `item.regex || 'unknown'` defensive // fallbacks — validation that doesn't catch this defeats its purpose. if (matchData[field] === undefined || matchData[field] === null) { throw new Error(`Match data missing required field: ${field}`); } } } /** * Validates nettools data object structure * @param {Object} netToolsData - NetTools data to validate * @throws {Error} If netToolsData is invalid */ function validateNetToolsData(netToolsData) { if (!netToolsData || typeof netToolsData !== 'object') { throw new Error('NetTools data must be an object'); } const requiredFields = ['domain', 'tool', 'matchType', 'matchedTerm']; for (const field of requiredFields) { // Value check (see validateMatchData for the rationale). if (netToolsData[field] === undefined || netToolsData[field] === null) { throw new Error(`NetTools data missing required field: ${field}`); } } } /** * Add a match to dry run collections * @param {Map} matchedDomains - The matched domains map * @param {Object} matchData - Match data object * @throws {Error} If parameters are invalid */ function addDryRunMatch(matchedDomains, matchData) { if (!(matchedDomains instanceof Map)) { throw new Error('matchedDomains must be a Map instance'); } validateMatchData(matchData); if (!matchedDomains.has(DRY_RUN_KEYS.MATCHES)) { throw new Error('Dry run collections not initialized. Call initializeDryRunCollections first.'); } matchedDomains.get(DRY_RUN_KEYS.MATCHES).push({ ...matchData, timestamp: new Date().toISOString() }); } /** * Add a nettools result to dry run collections * @param {Map} matchedDomains - The matched domains map * @param {Object} netToolsData - NetTools result data * @throws {Error} If parameters are invalid */ function addDryRunNetTools(matchedDomains, netToolsData) { if (!(matchedDomains instanceof Map)) { throw new Error('matchedDomains must be a Map instance'); } validateNetToolsData(netToolsData); if (!matchedDomains.has(DRY_RUN_KEYS.NET_TOOLS)) { throw new Error('Dry run collections not initialized. Call initializeDryRunCollections first.'); } matchedDomains.get(DRY_RUN_KEYS.NET_TOOLS).push({ ...netToolsData, timestamp: new Date().toISOString() }); } /** * Safely truncate long URLs for display * @param {string} url - URL to truncate * @param {number} maxLength - Maximum length to display * @returns {string} Truncated URL with ellipsis if needed */ function truncateUrl(url, maxLength = 80) { if (!url || url.length <= maxLength) { return url; } return url.substring(0, maxLength - 3) + '...'; } /** * Generate adblock rule from domain and resource type * @param {string} domain - Domain name * @param {string} resourceType - Resource type (optional) * @returns {string} Formatted adblock rule */ function generateAdblockRule(domain, resourceType = null) { if (!domain) return ''; if (resourceType && resourceType !== 'other') { return `||${domain}^$${resourceType}`; } return `||${domain}^`; } /** * Outputs dry run results to console with formatted display * If outputFile is specified, also captures output for file writing * @param {string} url - The URL being processed * @param {Array} matchedItems - Array of matched items with regex, domain, and resource type * @param {Array} netToolsResults - Array of whois/dig results * @param {string} pageTitle - Title of the page (if available) * @param {string} outputFile - Output file path (optional) * @param {Array} dryRunOutput - Array to collect output lines for file writing */ function outputDryRunResults(url, matchedItems = [], netToolsResults = [], pageTitle = '', outputFile = null, dryRunOutput = []) { try { const lines = []; // emit() — single source of truth for output. Writes the plain // version to the file-output array AND the (possibly colored) // version to the console. Previously every output line was a // paired lines.push(...) + console.log(...) statement, often in // separate blocks (file pushes first, then console logs), so // drift between file and terminal output was a real risk every // time someone edited only one half of a pair. const emit = (plain, colored = plain) => { lines.push(plain); console.log(colored); }; const truncatedUrl = truncateUrl(url); emit( `\n=== DRY RUN RESULTS === ${truncatedUrl}`, `\n${messageColors.scanning('=== DRY RUN RESULTS ===')} ${truncatedUrl}` ); if (pageTitle && pageTitle.trim()) { const cleanTitle = pageTitle.trim().substring(0, 200); // Limit title length emit( `Title: ${cleanTitle}`, `${messageColors.info('Title:')} ${cleanTitle}` ); } const totalMatches = matchedItems.length + netToolsResults.length; if (totalMatches === 0) { const noMatchMsg = `No matching rules found on ${truncatedUrl}`; emit(noMatchMsg, messageColors.warn(noMatchMsg)); if (outputFile) { dryRunOutput.push(...lines); dryRunOutput.push(''); // Add empty line } return; } emit( `Matches found: ${totalMatches}`, `${messageColors.success('Matches found:')} ${totalMatches}` ); // Process regex matches matchedItems.forEach((item, index) => { try { emit(''); // blank separator before each match item emit( `[${index + 1}] Regex Match:`, `${messageColors.highlight(`[${index + 1}]`)} ${messageColors.match('Regex Match:')}` ); emit(` Pattern: ${item.regex || 'unknown'}`); emit(` Domain: ${item.domain || 'unknown'}`); emit(` Resource Type: ${item.resourceType || 'unknown'}`); emit(` Full URL: ${truncateUrl(item.fullUrl || '')}`); if (item.wasBlocked) { emit( ` Status: BLOCKED (even_blocked enabled)`, ` ${messageColors.warn('Status:')} BLOCKED (even_blocked enabled)` ); } // Searchstring "not found" — see processDryRunResults comment // for why the positive-match branch was removed. if (item.searchStringChecked) { emit( ` ✗ Searchstring: No matches found in content`, ` ${messageColors.warn('✗ Searchstring:')} No matches found in content` ); } const adblockRule = generateAdblockRule(item.domain, item.resourceType); emit( ` Adblock Rule: ${adblockRule}`, ` ${messageColors.info('Adblock Rule:')} ${adblockRule}` ); } catch (itemErr) { const errorMsg = `Error processing match item ${index + 1}: ${itemErr.message}`; emit( ` Error: ${errorMsg}`, ` ${messageColors.warn('Error:')} ${errorMsg}` ); } }); // Process nettools results netToolsResults.forEach((result, index) => { try { const resultIndex = matchedItems.length + index + 1; emit(''); // blank separator before each nettools item emit( `[${resultIndex}] NetTools Match:`, `${messageColors.highlight(`[${resultIndex}]`)} ${messageColors.match('NetTools Match:')}` ); emit(` Domain: ${result.domain || 'unknown'}`); emit(` Tool: ${(result.tool || 'unknown').toUpperCase()}`); const matchDesc = `${result.matchType || 'unknown'} - "${result.matchedTerm || 'unknown'}"`; emit( ` ✓ Match: ${matchDesc}`, ` ${messageColors.success('✓ Match:')} ${matchDesc}` ); if (result.details) { emit(` Details: ${result.details}`); } const adblockRule = generateAdblockRule(result.domain); emit( ` Adblock Rule: ${adblockRule}`, ` ${messageColors.info('Adblock Rule:')} ${adblockRule}` ); } catch (resultErr) { const errorMsg = `Error processing nettools result ${index + 1}: ${resultErr.message}`; emit( ` Error: ${errorMsg}`, ` ${messageColors.warn('Error:')} ${errorMsg}` ); } }); // Store output for file writing if outputFile is specified if (outputFile) { dryRunOutput.push(...lines); dryRunOutput.push(''); // Add empty line between sites } } catch (outputErr) { const errorMsg = `Error in outputDryRunResults: ${outputErr.message}`; console.error(messageColors.error(errorMsg)); if (outputFile) { dryRunOutput.push(`Error: ${errorMsg}`); } } } /** * Process dry run results for a URL and output them * @param {string} currentUrl - The URL being processed * @param {Map} matchedDomains - The matched domains map with dry run collections * @param {Object} page - Puppeteer page object for getting title * @param {string} outputFile - Output file path (optional) * @param {Array} dryRunOutput - Array to collect output lines for file writing * @param {boolean} forceDebug - Debug logging flag * @returns {Object} Dry run result summary */ async function processDryRunResults(currentUrl, matchedDomains, page, outputFile = null, dryRunOutput = [], forceDebug = false) { try { // Validate inputs if (!currentUrl || typeof currentUrl !== 'string') { throw new Error('currentUrl must be a non-empty string'); } if (!(matchedDomains instanceof Map)) { throw new Error('matchedDomains must be a Map instance'); } // Get page title for dry run output with error handling let pageTitle = ''; try { if (page && typeof page.title === 'function') { pageTitle = await page.title(); } } catch (titleErr) { if (forceDebug) { console.log(formatLogMessage('debug', `Failed to get page title for ${currentUrl}: ${titleErr.message}`)); } // Leave pageTitle as '' (its initial value) on failure — the // truthy check in outputDryRunResults then skips the Title line // entirely. Previously we set 'Title unavailable' here, which // was truthy and got printed as if it were the page's real // title: 'Title: Title unavailable'. } // Get collected matches with safe fallbacks const dryRunMatches = matchedDomains.get(DRY_RUN_KEYS.MATCHES) || []; const dryRunNetTools = matchedDomains.get(DRY_RUN_KEYS.NET_TOOLS) || []; // Enhance matches with the searchstring-checked flag from the // incoming match data. Previously this also looked up positive // searchstring results in a `dryRunSearchString` Map — but // `addDryRunSearchString` was never wired to any caller, so the // map was always empty and `searchStringMatch` was always null. // Removed that dead lookup and the per-item try/catch it required. const enhancedMatches = dryRunMatches.map((match) => ({ ...match, searchStringChecked: Boolean(match.needsSearchStringCheck) })); outputDryRunResults(currentUrl, enhancedMatches, dryRunNetTools, pageTitle, outputFile, dryRunOutput); const totalMatches = enhancedMatches.length + dryRunNetTools.length; return { success: true, matchCount: totalMatches, enhancedMatches, netToolsResults: dryRunNetTools, pageTitle, regexMatches: enhancedMatches.length, netToolsMatches: dryRunNetTools.length }; } catch (processErr) { const errorMsg = `Error processing dry run results for ${currentUrl}: ${processErr.message}`; console.error(messageColors.error(errorMsg)); if (forceDebug) { console.log(formatLogMessage('debug', `Stack trace: ${processErr.stack}`)); } return { success: false, error: errorMsg, matchCount: 0, enhancedMatches: [], netToolsResults: [], pageTitle: '', regexMatches: 0, netToolsMatches: 0 }; } } /** * Write dry run output to file with enhanced error handling * @param {string} outputFile - Output file path * @param {Array} dryRunOutput - Array of output lines * @param {boolean} silentMode - Silent mode flag * @returns {Object} Operation result with details */ function writeDryRunOutput(outputFile, dryRunOutput, silentMode = false) { try { if (!outputFile || typeof outputFile !== 'string') { return { success: false, error: 'Invalid output file path' }; } if (!Array.isArray(dryRunOutput) || dryRunOutput.length === 0) { if (!silentMode) { console.log(messageColors.info('No dry run output to write')); } return { success: true, written: false, reason: 'No output to write' }; } const dryRunContent = dryRunOutput.join('\n'); // Ensure output directory exists const path = require('path'); const outputDir = path.dirname(outputFile); if (outputDir !== '.') { fs.mkdirSync(outputDir, { recursive: true }); } fs.writeFileSync(outputFile, dryRunContent); if (!silentMode) { console.log(`${messageColors.fileOp('📄 Dry run results saved to:')} ${outputFile}`); } return { success: true, written: true, file: outputFile, lines: dryRunOutput.length, bytes: Buffer.byteLength(dryRunContent, 'utf8') }; } catch (writeErr) { const errorMsg = `Failed to write dry run output to ${outputFile}: ${writeErr.message}`; // Matches outputDryRunResults / processDryRunResults error format — // was bare `console.error('✗ ${errorMsg}')` here, the odd one out // among the three error paths in this module. console.error(messageColors.error(errorMsg)); return { success: false, error: errorMsg, written: false }; } } // Public surface used by nwss.js. Internal helpers (truncateUrl, // generateAdblockRule, validateMatchData, validateNetToolsData, // outputDryRunResults) stay module-private. DRY_RUN_KEYS, getDryRunStats, // addDryRunSearchString, and formatSearchStringMatch were removed — // see comments at their original sites for details. module.exports = { initializeDryRunCollections, addDryRunMatch, addDryRunNetTools, processDryRunResults, writeDryRunOutput };