UNPKG

@fanboynz/network-scanner

Version:

A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.

github.com/ryanbr/network-scanner

ryanbr/network-scanner

524 lines (461 loc) • 20.8 kB

JavaScript

/** * Browser exit and cleanup handler module * Provides graceful and forced browser closure functionality with comprehensive temp file cleanup */ const fs = require('fs'); const path = require('path'); const { execSync } = require('child_process'); const { formatLogMessage, messageColors } = require('./colorize'); // Constants for temp file cleanup const CHROME_TEMP_PATHS = [ '/tmp', '/dev/shm', '/tmp/snap-private-tmp/snap.chromium/tmp' ]; const CHROME_TEMP_PATTERNS = [ /^\.?com\.google\.Chrome\./, /^\.?org\.chromium\.Chromium\./, /^puppeteer-/ ]; // Precomputed colored subsystem prefixes — used so debug/temp-cleanup/user-data // log lines look the same as the rest of the codebase's colorized output. // Previously these were raw template literals like `[debug] [browser] ...` // which printed uncolored, while formatLogMessage-routed messages elsewhere // in the codebase had colored [debug] tags. That produced inconsistent // 'sometimes colored, sometimes not' output in scan logs. const BROWSER_TAG = messageColors.fileOp('[browser]'); const TEMP_CLEANUP_TAG = messageColors.cleanup('[temp-cleanup]'); const USER_DATA_TAG = messageColors.fileOp('[user-data]'); /** * Count and remove matching Chrome/Puppeteer temp entries from a directory using fs * @param {string} basePath - Directory to scan * @param {boolean} forceDebug - Whether to output debug logs * @returns {number} Number of items cleaned */ function cleanTempDir(basePath, forceDebug) { let entries; try { entries = fs.readdirSync(basePath); } catch { if (forceDebug) console.log(formatLogMessage('debug', `${TEMP_CLEANUP_TAG} Cannot read ${basePath}`)); return 0; } let cleaned = 0; for (const entry of entries) { let matched = false; for (const re of CHROME_TEMP_PATTERNS) { if (re.test(entry)) { matched = true; break; } } if (!matched) continue; try { fs.rmSync(path.join(basePath, entry), { recursive: true, force: true }); cleaned++; if (forceDebug) console.log(formatLogMessage('debug', `${TEMP_CLEANUP_TAG} Removed ${basePath}/${entry}`)); } catch (rmErr) { if (forceDebug) console.log(formatLogMessage('debug', `${TEMP_CLEANUP_TAG} Failed to remove ${basePath}/${entry}: ${rmErr.message}`)); } } return cleaned; } /** * Clean Chrome temporary files and directories * @param {Object} options - Cleanup options * @param {boolean} options.includeSnapTemp - Whether to clean snap temp directories * @param {boolean} options.forceDebug - Whether to output debug logs * @param {boolean} options.comprehensive - Equivalent to includeSnapTemp; kept * for source compatibility with prior callers that distinguished the two. * @param {boolean} options.verbose - Whether to print a user-facing summary * (in addition to forceDebug's developer logs) * @returns {Object} Cleanup results */ function cleanupChromeTempFiles(options = {}) { const { includeSnapTemp = false, forceDebug = false, comprehensive = false, verbose = false } = options; try { if (verbose && !forceDebug) { console.log(`${TEMP_CLEANUP_TAG} Scanning Chrome/Puppeteer temporary files...`); } const paths = comprehensive || includeSnapTemp ? CHROME_TEMP_PATHS : CHROME_TEMP_PATHS.slice(0, 2); // /tmp and /dev/shm only let totalCleaned = 0; for (const basePath of paths) { totalCleaned += cleanTempDir(basePath, forceDebug); } if (verbose) { console.log(totalCleaned > 0 ? `${TEMP_CLEANUP_TAG} Removed ${totalCleaned} temporary file(s)/folder(s)` : `${TEMP_CLEANUP_TAG} Clean - no remaining temporary files`); } else if (forceDebug) { console.log(formatLogMessage('debug', `${TEMP_CLEANUP_TAG} Cleanup completed (${totalCleaned} items)`)); } return { success: true, itemsCleaned: totalCleaned }; } catch (cleanupErr) { const errorMsg = `Chrome temp cleanup failed: ${cleanupErr.message}`; if (verbose) { console.warn(`${TEMP_CLEANUP_TAG} ${errorMsg}`); } else if (forceDebug) { console.log(formatLogMessage('debug', `${TEMP_CLEANUP_TAG} ${errorMsg}`)); } return { success: false, error: cleanupErr.message, itemsCleaned: 0 }; } } /** * Cleanup specific user data directory (for browser instances) * @param {string} userDataDir - Path to user data directory to clean * @param {boolean} forceDebug - Whether to output debug logs * @returns {Promise<Object>} Cleanup results */ async function cleanupUserDataDir(userDataDir, forceDebug = false) { if (!userDataDir) { return { success: true, cleaned: false, reason: 'No user data directory specified' }; } // fs.rmSync({force: true}) treats ENOENT as a no-op, so an existsSync // pre-check is two syscalls where one would do (and a TOCTOU besides). // If the dir was already gone we just report cleaned:true without drama. try { fs.rmSync(userDataDir, { recursive: true, force: true }); if (forceDebug) { console.log(formatLogMessage('debug', `${USER_DATA_TAG} Cleaned user data directory: ${userDataDir}`)); } return { success: true, cleaned: true }; } catch (rmErr) { if (forceDebug) { console.log(formatLogMessage('debug', `${USER_DATA_TAG} Failed to remove user data directory ${userDataDir}: ${rmErr.message}`)); } return { success: false, error: rmErr.message, cleaned: false }; } } /** * Attempts to gracefully close all browser pages and the browser instance * @param {import('puppeteer').Browser} browser - The Puppeteer browser instance * @param {boolean} forceDebug - Whether to output debug logs * @returns {Promise<void>} */ async function gracefulBrowserCleanup(browser, forceDebug = false) { // FIX: Check browser connection before operations if (!browser || !browser.isConnected()) { if (forceDebug) console.log(formatLogMessage('debug', `${BROWSER_TAG} Browser not connected, skipping cleanup`)); return; } if (forceDebug) console.log(formatLogMessage('debug', `${BROWSER_TAG} Getting all browser pages...`)); let pages; try { pages = await browser.pages(); } catch (pagesErr) { if (forceDebug) console.log(formatLogMessage('debug', `${BROWSER_TAG} Failed to get pages: ${pagesErr.message}`)); return; } if (forceDebug) console.log(formatLogMessage('debug', `${BROWSER_TAG} Found ${pages.length} pages to close`)); await Promise.all(pages.map(async (page) => { if (!page.isClosed()) { try { // FIX: Wrap page.url() in try-catch to handle race condition let pageUrl = 'unknown'; try { pageUrl = page.url(); } catch (urlErr) { // Page closed between check and url call } if (forceDebug) console.log(formatLogMessage('debug', `${BROWSER_TAG} Closing page: ${pageUrl}`)); await page.close(); if (forceDebug) console.log(formatLogMessage('debug', `${BROWSER_TAG} Page closed successfully`)); } catch (err) { // Force close if normal close fails if (forceDebug) console.log(formatLogMessage('debug', `${BROWSER_TAG} Force closing page: ${err.message}`)); } } })); if (forceDebug) console.log(formatLogMessage('debug', `${BROWSER_TAG} All pages closed, closing browser...`)); // FIX: Check browser is still connected before closing try { if (browser.isConnected()) { await browser.close(); if (forceDebug) console.log(formatLogMessage('debug', `${BROWSER_TAG} Browser closed successfully`)); } else { if (forceDebug) console.log(formatLogMessage('debug', `${BROWSER_TAG} Browser already disconnected`)); } } catch (closeErr) { if (forceDebug) console.log(formatLogMessage('debug', `${BROWSER_TAG} Browser close failed: ${closeErr.message}`)); } } /** * Force kills the browser process using system signals * @param {import('puppeteer').Browser} browser - The Puppeteer browser instance * @param {boolean} forceDebug - Whether to output debug logs * @returns {Promise<void>} */ async function forceBrowserKill(browser, forceDebug = false) { try { if (forceDebug) console.log(formatLogMessage('debug', `${BROWSER_TAG} Attempting force closure of browser process...`)); const browserProcess = browser.process(); if (!browserProcess || !browserProcess.pid) { if (forceDebug) console.log(formatLogMessage('debug', `${BROWSER_TAG} No browser process available`)); return; } const mainPid = browserProcess.pid; if (forceDebug) console.log(formatLogMessage('debug', `${BROWSER_TAG} Main Chrome PID: ${mainPid}`)); // PRIMARY PATH: kill OUR browser process tree only. // // The previous primary path ran `ps | grep "puppeteer.*chrome"` and // SIGTERM'd every match, which kills puppeteer-chrome processes spawned // by ANY other process on the machine (concurrent nwss runs, automate // scripts, other tools). The fix is to walk the live process table once // and filter to PIDs whose ancestor chain leads back to OUR mainPid. // The broad sweep stays as the fallback only if ps fails or the targeted // kill doesn't take down the main PID. let killedTargeted = false; try { const psOutput = execSync(`ps -eo pid,ppid,cmd`, { encoding: 'utf8', timeout: 5000 }); const psLines = psOutput.trim().split('\n').slice(1); // drop header // pid -> ppid map for ancestry walks; collect chrome-ish candidates. const ppidOf = new Map(); const chromeCandidates = new Set(); for (const line of psLines) { const m = line.trim().match(/^\s*(\d+)\s+(\d+)\s+(.*)$/); if (!m) continue; const pid = parseInt(m[1], 10); const ppid = parseInt(m[2], 10); if (Number.isNaN(pid) || Number.isNaN(ppid)) continue; ppidOf.set(pid, ppid); // Chrome's helpers (gpu, renderer, utility) don't all carry the // 'puppeteer' substring; rely on ancestry instead of cmd matching. // 'chrom' covers both 'chrome' and 'chromium' in one substring scan. if (m[3].includes('chrom')) { chromeCandidates.add(pid); } } // Filter candidates to descendants of (or equal to) mainPid. const ourPids = [mainPid]; for (const pid of chromeCandidates) { if (pid === mainPid) continue; let cur = ppidOf.get(pid); let hops = 0; while (cur && cur > 1 && hops < 128) { if (cur === mainPid) { ourPids.push(pid); break; } cur = ppidOf.get(cur); hops++; } } if (forceDebug) { console.log(formatLogMessage('debug', `${BROWSER_TAG} Targeted kill: ${ourPids.length} PIDs in mainPid=${mainPid}'s tree: [${ourPids.join(', ')}]`)); } // SIGTERM the tree gracefully. for (const pid of ourPids) { try { process.kill(pid, 'SIGTERM'); } catch (killErr) { if (forceDebug && killErr.code !== 'ESRCH') { console.log(formatLogMessage('debug', `${BROWSER_TAG} SIGTERM to PID ${pid} failed: ${killErr.message}`)); } } } await new Promise(resolve => setTimeout(resolve, 2000)); // SIGKILL stragglers. for (const pid of ourPids) { try { process.kill(pid, 0); // existence probe process.kill(pid, 'SIGKILL'); if (forceDebug) console.log(formatLogMessage('debug', `${BROWSER_TAG} Force-killed PID ${pid}`)); } catch (checkErr) { if (forceDebug && checkErr.code !== 'ESRCH') { console.log(formatLogMessage('debug', `${BROWSER_TAG} Probe/kill PID ${pid} error: ${checkErr.message}`)); } } } // Confirm mainPid is gone — if not, the targeted kill is considered // not-effective and we fall through to the broad sweep below. try { process.kill(mainPid, 0); } catch (e) { if (e.code === 'ESRCH') killedTargeted = true; } } catch (psErr) { if (forceDebug) console.log(formatLogMessage('debug', `${BROWSER_TAG} ps -eo pid,ppid,cmd failed: ${psErr.message}`)); } // FALLBACK PATH: targeted kill failed or ps wasn't available. Try the // spawned-process handle directly, then last-resort the broad pkill. // (killAllPuppeteerChrome in the next module is the truly nuclear option.) if (!killedTargeted) { if (forceDebug) console.log(formatLogMessage('debug', `${BROWSER_TAG} Targeted kill did not confirm mainPid death; trying browserProcess handle`)); try { browserProcess.kill('SIGTERM'); await new Promise(resolve => setTimeout(resolve, 2000)); try { process.kill(mainPid, 0); browserProcess.kill('SIGKILL'); if (forceDebug) console.log(formatLogMessage('debug', `${BROWSER_TAG} Fallback: Force-killed main PID ${mainPid}`)); } catch (checkErr) { if (forceDebug && checkErr.code !== 'ESRCH') { console.log(formatLogMessage('debug', `${BROWSER_TAG} Fallback probe PID ${mainPid} error: ${checkErr.message}`)); } } } catch (fallbackErr) { if (forceDebug) console.log(formatLogMessage('debug', `${BROWSER_TAG} Fallback kill failed: ${fallbackErr.message}`)); } } } catch (forceKillErr) { console.error(formatLogMessage('error', `${BROWSER_TAG} Failed to force kill browser: ${forceKillErr.message}`)); } try { if (browser.isConnected()) { browser.disconnect(); if (forceDebug) console.log(formatLogMessage('debug', `${BROWSER_TAG} Browser connection disconnected`)); } } catch (disconnectErr) { if (forceDebug) console.log(formatLogMessage('debug', `${BROWSER_TAG} Failed to disconnect browser: ${disconnectErr.message}`)); } } /** * Kill all Chrome processes by command line pattern (nuclear option) * @param {boolean} forceDebug - Whether to output debug logs * @returns {Promise<void>} */ async function killAllPuppeteerChrome(forceDebug = false) { try { if (forceDebug) console.log(formatLogMessage('debug', `${BROWSER_TAG} Nuclear option: killing all puppeteer Chrome processes...`)); try { execSync(`pkill -f "puppeteer.*chrome"`, { stdio: 'ignore', timeout: 5000 }); if (forceDebug) console.log(formatLogMessage('debug', `${BROWSER_TAG} pkill completed`)); } catch (pkillErr) { if (forceDebug && pkillErr.status !== 1) { console.log(formatLogMessage('debug', `${BROWSER_TAG} pkill failed with status ${pkillErr.status}: ${pkillErr.message}`)); } } await new Promise(resolve => setTimeout(resolve, 2000)); } catch (nuclearErr) { console.error(formatLogMessage('error', `${BROWSER_TAG} Nuclear Chrome kill failed: ${nuclearErr.message}`)); } } /** * Handles comprehensive browser cleanup including processes, temp files, and user data * @param {import('puppeteer').Browser} browser - The Puppeteer browser instance * @param {Object} options - Cleanup options * @param {boolean} options.forceDebug - Whether to output debug logs * @param {number} options.timeout - Timeout in milliseconds before force closure (default: 10000) * @param {boolean} options.exitOnFailure - Whether to exit process on cleanup failure (default: true) * @param {boolean} options.cleanTempFiles - Whether to clean standard temp files (default: true) * @param {boolean} options.comprehensiveCleanup - Whether to perform comprehensive temp file cleanup (default: false) * @param {string} options.userDataDir - User data directory to clean (optional) * @param {boolean} options.verbose - Whether to show verbose cleanup output (default: false) * @returns {Promise<Object>} - Returns cleanup results object */ async function handleBrowserExit(browser, options = {}) { const { forceDebug = false, timeout = 10000, exitOnFailure = true, cleanTempFiles = true, comprehensiveCleanup = false, userDataDir = null, verbose = false } = options; if (forceDebug) console.log(formatLogMessage('debug', `${BROWSER_TAG} Starting comprehensive browser cleanup...`)); // All fields declared upfront so step 3 doesn't extend the object shape at // runtime (V8 hidden-class transition); the result shape is also fully // documented in one place this way. const results = { browserClosed: false, tempFilesCleanedCount: 0, tempFilesCleanedSuccess: false, tempFilesCleanedComprehensive: false, userDataCleaned: false, success: false, errors: [] }; try { // Step 1: Browser process cleanup try { // Race cleanup against a timeout. Attach a no-op .catch to the racing // cleanup promise so that when the timeout wins the eventual rejection // from the still-running graceful cleanup (page.close / browser.close // failing after we move on to forceBrowserKill) doesn't surface as an // unhandledRejection warning. const cleanupPromise = gracefulBrowserCleanup(browser, forceDebug); cleanupPromise.catch(() => {}); await Promise.race([ cleanupPromise, new Promise((_, reject) => setTimeout(() => reject(new Error('Browser cleanup timeout')), timeout) ) ]); results.browserClosed = true; } catch (browserCloseErr) { results.errors.push(`Browser cleanup failed: ${browserCloseErr.message}`); if (forceDebug || verbose) { console.warn(formatLogMessage('warn', `${BROWSER_TAG} Browser cleanup had issues: ${browserCloseErr.message}`)); } // Attempt targeted force kill of OUR process tree. await forceBrowserKill(browser, forceDebug); // Only escalate to the broad pkill if our browser is still up. A // successful targeted kill breaks the CDP WebSocket, which flips // isConnected() to false — in that case the nuclear path would just // murder other people's puppeteer-chrome instances for no gain. let stillConnected = false; try { stillConnected = browser.isConnected(); } catch (_) {} if (stillConnected) { if (forceDebug) console.log(formatLogMessage('debug', `${BROWSER_TAG} Targeted force kill didn't take — escalating to nuclear cleanup`)); await killAllPuppeteerChrome(forceDebug); } else if (forceDebug) { console.log(formatLogMessage('debug', `${BROWSER_TAG} Targeted force kill succeeded; skipping nuclear cleanup`)); } results.browserClosed = true; // Assume success after force/nuclear path } // Step 2: User data directory cleanup if (userDataDir) { const userDataResult = await cleanupUserDataDir(userDataDir, forceDebug); results.userDataCleaned = userDataResult.cleaned; if (!userDataResult.success) { results.errors.push(`User data cleanup failed: ${userDataResult.error}`); } } // Step 3: Temp file cleanup. Both branches of the prior code ended up // walking all three CHROME_TEMP_PATHS (comprehensive used all 3 directly; // standard set includeSnapTemp:true which expands to all 3 too) — the // only meaningful difference was the verbose summary log. One call now. if (cleanTempFiles) { const tempResult = await cleanupChromeTempFiles({ includeSnapTemp: true, comprehensive: comprehensiveCleanup, forceDebug, verbose }); results.tempFilesCleanedSuccess = tempResult.success; results.tempFilesCleanedComprehensive = comprehensiveCleanup; if (tempResult.success) { results.tempFilesCleanedCount = tempResult.itemsCleaned; } else { results.errors.push(`${comprehensiveCleanup ? 'Comprehensive' : 'Standard'} temp cleanup failed: ${tempResult.error}`); } } // Determine overall success results.success = results.browserClosed && (results.errors.length === 0 || !exitOnFailure); if (forceDebug) { console.log(formatLogMessage('debug', `${BROWSER_TAG} Cleanup completed - Browser: ${results.browserClosed}, ` + `Temp files: ${results.tempFilesCleanedCount || 0}, ` + `User data: ${results.userDataCleaned}, ` + `Errors: ${results.errors.length}`)); } return results; } catch (overallErr) { results.errors.push(`Overall cleanup failed: ${overallErr.message}`); results.success = false; if (exitOnFailure) { if (forceDebug) console.log(formatLogMessage('debug', `${BROWSER_TAG} Forcing process exit due to cleanup failure`)); process.exit(1); } return results; } } module.exports = { handleBrowserExit, gracefulBrowserCleanup, forceBrowserKill, killAllPuppeteerChrome, cleanupChromeTempFiles, cleanupUserDataDir, CHROME_TEMP_PATHS, CHROME_TEMP_PATTERNS };