UNPKG

@fanboynz/network-scanner

Version:

A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.

1,267 lines (1,125 loc) 46.4 kB
/** * Browser health monitoring module for nwss.js * Provides health checks and recovery mechanisms to prevent protocol timeouts */ const { formatLogMessage, messageColors } = require('./colorize'); const IS_PAGE_FROM_PREVIOUS_SCAN_TAG = messageColors.processing('[isPageFromPreviousScan]'); const REALTIME_CLEANUP_TAG = messageColors.processing('[realtime_cleanup]'); const GROUP_WINDOW_CLEANUP_TAG = messageColors.processing('[group_window_cleanup]'); const { execSync, execFile } = require('child_process'); // Window cleanup delay constant const WINDOW_CLEANUP_DELAY_MS = 15000; // window_clean REALTIME const REALTIME_CLEANUP_BUFFER_MS = 25000; // Additional buffer time after site delay (increased for Cloudflare) const REALTIME_CLEANUP_THRESHOLD = 12; // Default number of pages to keep const REALTIME_CLEANUP_MIN_PAGES = 6; // Minimum pages before cleanup kicks in // Page-count thresholds — soft warn vs hard restart trigger. The two used to // live as bare 30/40 literals in different functions with no visible link. const PAGE_COUNT_WARN_THRESHOLD = 30; // checkBrowserHealth: warn-only recommendation const PAGE_COUNT_RESTART_THRESHOLD = 40; // monitorBrowserHealth: trigger restart // Browser response-time threshold above which monitorBrowserHealth triggers a restart. const SLOW_RESPONSE_RESTART_MS = 6000; // Heuristics for isPageFromPreviousScan: pages older than CREATION_AGE_MS or // idle longer than IDLE_AGE_MS are treated as leftover from a prior scan. const PREVIOUS_SCAN_CREATION_AGE_MS = 120000; // 2 minutes const PREVIOUS_SCAN_IDLE_AGE_MS = 60000; // 60 seconds // Track page creation order for realtime cleanup. WeakMap so closed pages // drop out of tracking automatically when Puppeteer releases its internal // references — no manual purge needed to prevent the leak class that // purgeStaleTrackers() used to mitigate. The only API loss from Map -> // WeakMap is iteration (for...of), which only purgeStaleTrackers used. const pageCreationTracker = new WeakMap(); // Page -> creation timestamp // Track page usage for realtime cleanup safety. Same WeakMap rationale. const pageUsageTracker = new WeakMap(); // Page -> { lastActivity, isProcessing } const PAGE_IDLE_THRESHOLD = 25000; // 25 seconds of inactivity before considering page safe to clean /** * Race a promise against a timeout, clearing the timer when the promise resolves/rejects. * Prevents leaked setTimeout handles that hold closure references until they fire. * @param {Promise} promise - The operation to race * @param {number} ms - Timeout in milliseconds * @param {string} msg - Error message on timeout * @returns {Promise} Resolves/rejects with the operation result, or rejects on timeout */ function raceWithTimeout(promise, ms, msg) { let timeoutId; const timeoutPromise = new Promise((_, reject) => { timeoutId = setTimeout(() => reject(new Error(msg)), ms); }); return Promise.race([promise, timeoutPromise]).finally(() => clearTimeout(timeoutId)); } const BYTES_GB = 1073741824; // 1024^3 const BYTES_MB = 1048576; // 1024^2 const BYTES_KB = 1024; /** * Format bytes to human readable string * @param {number} bytes * @returns {string} */ function formatMemory(bytes) { if (bytes >= BYTES_GB) return `${(bytes / BYTES_GB).toFixed(1)}GB`; if (bytes >= BYTES_MB) return `${(bytes / BYTES_MB).toFixed(1)}MB`; if (bytes >= BYTES_KB) return `${(bytes / BYTES_KB).toFixed(1)}KB`; return `${bytes}B`; } /** * Performs group-level window cleanup after all URLs in a site group complete * Closes all extra windows except the main browser window * @param {import('puppeteer').Browser} browserInstance - Browser instance * @param {string} groupDescription - Description of the group for logging * @param {boolean} forceDebug - Debug logging flag * @param {string|boolean} cleanupMode - Cleanup mode: true/"default" (conservative), "all" (aggressive) * @returns {Promise<Object>} Cleanup results */ async function performGroupWindowCleanup(browserInstance, groupDescription, forceDebug, cleanupMode = true) { try { // Wait before cleanup to allow any final operations to complete // Initialize result object with ALL possible properties upfront for V8 optimization const result = { success: false, closedCount: 0, totalPages: 0, mainPagePreserved: false, delayUsed: 0, estimatedMemoryFreed: 0, estimatedMemoryFreedFormatted: '', cleanupMode: '', error: null }; const modeText = cleanupMode === "all" ? "aggressive cleanup of old windows" : "conservative cleanup of extra windows" if (forceDebug) { console.log(formatLogMessage('debug', `${GROUP_WINDOW_CLEANUP_TAG} Waiting ${WINDOW_CLEANUP_DELAY_MS}ms before ${modeText} for group: ${groupDescription}`)); } await new Promise(resolve => setTimeout(resolve, WINDOW_CLEANUP_DELAY_MS)); const allPages = await browserInstance.pages(); // Identify the main Puppeteer window (should be about:blank or the initial page) let mainPuppeteerPage = null; let pagesToClose = []; // Find the main page - typically the first page that's about:blank or has been there longest for (const page of allPages) { // Cache page.url() call to avoid repeated DOM/browser communication const pageUrl = page.url(); if (pageUrl === 'about:blank' || pageUrl === '' || pageUrl.startsWith('chrome://')) { if (!mainPuppeteerPage) { mainPuppeteerPage = page; // First blank page is likely the main window } else { pagesToClose.push(page); // Additional blank pages can be closed } } else { // Any page with actual content should be evaluated for closure if (cleanupMode === "all") { // Aggressive mode: close all content pages pagesToClose.push(page); } else { // Conservative mode: only close pages that look like leftovers from previous scans // Keep pages that might still be actively used const isOldPage = await isPageFromPreviousScan(page, forceDebug); if (isOldPage) { pagesToClose.push(page); } } } } // Ensure we always have a main page if (!mainPuppeteerPage && allPages.length > 0) { mainPuppeteerPage = allPages[0]; // Fallback to first page pagesToClose = allPages.slice(1); if (forceDebug) { // Cache URL call for logging const mainPageUrl = mainPuppeteerPage.url(); console.log(formatLogMessage('debug', `${GROUP_WINDOW_CLEANUP_TAG} No blank page found, using first page as main: ${mainPageUrl}`)); } } if (pagesToClose.length === 0) { if (forceDebug) { console.log(formatLogMessage('debug', `${GROUP_WINDOW_CLEANUP_TAG} No windows to close for group: ${groupDescription}`)); } result.success = true; result.totalPages = allPages.length; result.mainPagePreserved = true; result.cleanupMode = cleanupMode === "all" ? "all" : "default"; return result; } // Estimate memory usage before closing (parallel for performance) let totalEstimatedMemory = 0; const DEFAULT_PAGE_MEMORY = 8 * 1024 * 1024; // 8MB default estimate const pageMemoryEstimates = await Promise.all(pagesToClose.map(async (page) => { try { if (page.isClosed()) return 0; const metrics = await raceWithTimeout( page.metrics(), 1000, 'metrics timeout' ); if (metrics) { return ( (metrics.JSHeapUsedSize || 0) + (metrics.JSHeapTotalSize || 0) * 0.1 + (metrics.Nodes || 0) * 100 + (metrics.JSEventListeners || 0) * 50 ); } return DEFAULT_PAGE_MEMORY; } catch (metricsErr) { return DEFAULT_PAGE_MEMORY; } })); totalEstimatedMemory = pageMemoryEstimates.reduce((sum, mem) => sum + mem, 0); // Close identified old/unused pages const closePromises = pagesToClose.map(async (page, index) => { try { // Cache page state and URL for this operation const isPageClosed = page.isClosed(); const pageUrl = page.url(); if (!isPageClosed) { if (forceDebug) { console.log(formatLogMessage('debug', `${GROUP_WINDOW_CLEANUP_TAG} Closing page: ${pageUrl}`)); } pageCreationTracker.delete(page); pageUsageTracker.delete(page); await page.close(); return { success: true, url: pageUrl || `page-${index}`, estimatedMemory: pageMemoryEstimates[index] }; } return { success: false, reason: 'already_closed', estimatedMemory: 0 }; } catch (closeErr) { if (forceDebug) { console.log(formatLogMessage('debug', `${GROUP_WINDOW_CLEANUP_TAG} Failed to close old page ${index + 1}: ${closeErr.message}`)); } return { success: false, error: closeErr.message, estimatedMemory: 0 }; } }); const closeResults = await Promise.all(closePromises); // Single-pass count + sum (avoids 2 intermediate array allocations from .filter()) let successfulCloses = 0; let actualMemoryFreed = 0; for (let i = 0; i < closeResults.length; i++) { if (closeResults[i].success === true) { successfulCloses++; actualMemoryFreed += closeResults[i].estimatedMemory || 0; } } if (forceDebug) { console.log(formatLogMessage('debug', `${GROUP_WINDOW_CLEANUP_TAG} Closed ${successfulCloses}/${pagesToClose.length} old windows for completed group: ${groupDescription} after ${WINDOW_CLEANUP_DELAY_MS}ms delay`)); console.log(formatLogMessage('debug', `${GROUP_WINDOW_CLEANUP_TAG} Estimated memory freed: ${formatMemory(actualMemoryFreed)}`)); if (mainPuppeteerPage) { // Cache URL for final logging const mainPageUrl = mainPuppeteerPage.url(); console.log(formatLogMessage('debug', `${GROUP_WINDOW_CLEANUP_TAG} Main Puppeteer window preserved: ${mainPageUrl}`)); } } // Update result object instead of creating new one result.success = true; result.closedCount = successfulCloses; result.totalPages = allPages.length; result.mainPagePreserved = mainPuppeteerPage && !mainPuppeteerPage.isClosed(); result.delayUsed = WINDOW_CLEANUP_DELAY_MS; result.estimatedMemoryFreed = actualMemoryFreed; result.estimatedMemoryFreedFormatted = formatMemory(actualMemoryFreed); result.cleanupMode = cleanupMode === "all" ? "all" : "default"; return result; } catch (cleanupErr) { if (forceDebug) { console.log(formatLogMessage('debug', `${GROUP_WINDOW_CLEANUP_TAG} Group cleanup failed for ${groupDescription}: ${cleanupErr.message}`)); } // Initialize result object with consistent shape for error case const result = { success: false, closedCount: 0, totalPages: 0, mainPagePreserved: false, delayUsed: 0, estimatedMemoryFreed: 0, estimatedMemoryFreedFormatted: '', cleanupMode: '', error: cleanupErr.message }; return result; } } /** * Checks if a page is safe to close (not actively processing) * @param {import('puppeteer').Page} page - Page to check * @param {boolean} forceDebug - Debug logging flag * @returns {Promise<boolean>} True if page is safe to close */ async function isPageSafeToClose(page, forceDebug) { try { if (page.isClosed()) { return true; // Already closed } // EXTRA SAFETY: Never close pages that might be in injection process const now = Date.now(); try { const pageUrl = page.url(); if (pageUrl && pageUrl !== 'about:blank' && now - (pageCreationTracker.get(page) || 0) < 30000) { return false; // Don't close recently created pages (within 30 seconds) } } catch (err) { /* ignore */ } const usage = pageUsageTracker.get(page); if (!usage) { return true; } if (usage.isProcessing) { if (forceDebug) { const pageUrl = page.url(); console.log(formatLogMessage('debug', `${REALTIME_CLEANUP_TAG} Page still processing: ${pageUrl.substring(0, 50)}...`)); } return false; } const idleTime = now - usage.lastActivity; const isSafe = idleTime >= PAGE_IDLE_THRESHOLD; if (!isSafe && forceDebug) { console.log(formatLogMessage('debug', `${REALTIME_CLEANUP_TAG} Page not idle long enough: ${Math.round(idleTime/1000)}s < ${PAGE_IDLE_THRESHOLD/1000}s`)); } return isSafe; } catch (err) { if (forceDebug) { console.log(formatLogMessage('debug', `${REALTIME_CLEANUP_TAG} Error checking page safety: ${err.message}`)); } return true; // Assume safe if we can't check } } /** * Updates page usage tracking * @param {import('puppeteer').Page} page - Page to update * @param {boolean} isProcessing - Whether page is actively processing */ function updatePageUsage(page, isProcessing = false) { try { if (!page.isClosed()) { const existing = pageUsageTracker.get(page); if (existing) { // Mutate in place -- same hidden class, no GC pressure existing.lastActivity = Date.now(); existing.isProcessing = isProcessing; } else { pageUsageTracker.set(page, { lastActivity: Date.now(), isProcessing: isProcessing }); } } } catch (err) { // Ignore errors in usage tracking } } /** * Performs realtime window cleanup - removes oldest pages when threshold is exceeded * Waits for site delay + buffer before cleanup, with extended buffer for Cloudflare sites * @param {import('puppeteer').Browser} browserInstance - Browser instance * @param {number} threshold - Maximum number of pages to keep (default: 8) * @param {boolean} forceDebug - Debug logging flag * @param {number} totalDelay - Total delay including site delay and appropriate buffer (default: 4000 + 15000) * @returns {Promise<Object>} Cleanup results */ async function performRealtimeWindowCleanup(browserInstance, threshold = REALTIME_CLEANUP_THRESHOLD, forceDebug, totalDelay = 19000) { try { // Initialize result object with consistent shape const result = { success: false, closedCount: 0, totalPages: 0, remainingPages: 0, threshold: 0, cleanupDelay: 0, reason: '', error: null }; // Quick count check before waiting (avoid the expensive delay if unnecessary) let quickPages; try { quickPages = await browserInstance.pages(); } catch (e) { result.error = e.message; return result; } // Skip cleanup if we don't have enough pages to warrant it if (quickPages.length <= Math.max(threshold, REALTIME_CLEANUP_MIN_PAGES)) { if (forceDebug) { console.log(formatLogMessage('debug', `${REALTIME_CLEANUP_TAG} Only ${quickPages.length} pages open, threshold is ${threshold} - no cleanup needed`)); } result.success = true; result.totalPages = quickPages.length; result.reason = 'below_threshold'; return result; } // Use the provided total delay (already includes appropriate buffer) const cleanupDelay = totalDelay; if (forceDebug) { console.log(formatLogMessage('debug', `${REALTIME_CLEANUP_TAG} Waiting ${cleanupDelay}ms before cleanup (threshold: ${threshold})`)); } await new Promise(resolve => setTimeout(resolve, cleanupDelay)); const allPagesAfterDelay = await browserInstance.pages(); // Also check for and close any popup contexts try { const contexts = await browserInstance.browserContexts(); for (const context of contexts) { if (context.isIncognito && context !== browserInstance.defaultBrowserContext()) { const contextPages = await context.pages(); if (forceDebug) { console.log(formatLogMessage('debug', `${REALTIME_CLEANUP_TAG} Found ${contextPages.length} pages in popup context`)); } // Close popup context pages for (const page of contextPages) { if (!page.isClosed()) { await page.close(); } } } } } catch (contextErr) { if (forceDebug) { console.log(formatLogMessage('debug', `${REALTIME_CLEANUP_TAG} Context cleanup error: ${contextErr.message}`)); } } // Find main Puppeteer page (usually about:blank) let mainPage = allPagesAfterDelay.find(page => { // Cache page.url() for main page detection const pageUrl = page.url(); return pageUrl === 'about:blank' || pageUrl === '' || pageUrl.startsWith('chrome://'); }) || allPagesAfterDelay[0]; // Fallback to first page // Get pages sorted by creation time (oldest first) const sortedPages = allPagesAfterDelay .filter(page => { // Cache page.isClosed() for filtering const isPageClosed = page.isClosed(); return page !== mainPage && !isPageClosed; }) .sort((a, b) => { const timeA = pageCreationTracker.get(a) || 0; const timeB = pageCreationTracker.get(b) || 0; return timeA - timeB; // Oldest first }); // Calculate how many pages to close const pagesToKeep = threshold - 1; // -1 for main page const pagesToClose = sortedPages.slice(0, Math.max(0, sortedPages.length - pagesToKeep)); // Filter out pages that are still being used const safetyChecks = await Promise.all( pagesToClose.map(page => isPageSafeToClose(page, forceDebug)) ); const safePagesToClose = pagesToClose.filter((page, index) => safetyChecks[index]); const unsafePagesCount = pagesToClose.length - safePagesToClose.length; if (unsafePagesCount > 0 && forceDebug) { console.log(formatLogMessage('debug', `${REALTIME_CLEANUP_TAG} Skipping ${unsafePagesCount} active pages for safety`)); } if (safePagesToClose.length === 0) { if (forceDebug) { const reason = pagesToClose.length === 0 ? `${sortedPages.length} content pages, keeping ${pagesToKeep}` : `${pagesToClose.length} pages still active`; console.log(formatLogMessage('debug', `${REALTIME_CLEANUP_TAG} No pages need closing (${reason})`)); } result.success = true; result.totalPages = allPagesAfterDelay.length; result.reason = 'no_cleanup_needed'; return result; } // Close oldest pages let closedCount = 0; for (const page of safePagesToClose) { try { const isPageClosed = page.isClosed(); // Re-check processing state — may have changed since safety check const usage = pageUsageTracker.get(page); if (!isPageClosed && !(usage && usage.isProcessing)) { const pageUrl = page.url(); await page.close(); pageCreationTracker.delete(page); // Remove from tracker pageUsageTracker.delete(page); closedCount++; if (forceDebug) { console.log(formatLogMessage('debug', `${REALTIME_CLEANUP_TAG} Closed old page: ${pageUrl.substring(0, 50)}...`)); } } } catch (closeErr) { if (forceDebug) { console.log(formatLogMessage('debug', `${REALTIME_CLEANUP_TAG} Failed to close page: ${closeErr.message}`)); } } } const remainingPages = allPagesAfterDelay.length - closedCount; if (forceDebug) { console.log(formatLogMessage('debug', `${REALTIME_CLEANUP_TAG} Closed ${closedCount}/${pagesToClose.length} oldest pages (${unsafePagesCount} skipped for safety), ${remainingPages} pages remaining`)); } result.success = true; result.closedCount = closedCount; result.totalPages = allPagesAfterDelay.length; result.remainingPages = remainingPages; result.threshold = threshold; result.cleanupDelay = cleanupDelay; result.reason = 'cleanup_completed'; return result; } catch (cleanupErr) { if (forceDebug) { console.log(formatLogMessage('debug', `${REALTIME_CLEANUP_TAG} Cleanup failed: ${cleanupErr.message}`)); } // Initialize result object with consistent shape for error case const result = { success: false, closedCount: 0, totalPages: 0, remainingPages: 0, threshold: 0, cleanupDelay: 0, reason: '', error: cleanupErr.message }; return result; } } /** * Determines if a page appears to be from a previous scan and can be safely closed * @param {import('puppeteer').Page} page - Page to evaluate * @param {boolean} forceDebug - Debug logging flag * @returns {Promise<boolean>} True if page appears to be from previous scan */ async function isPageFromPreviousScan(page, forceDebug) { try { // FIX: Check page state first before any operations if (page.isClosed()) { return true; // Closed pages should be cleaned up } // Cache page.url() for all checks in this function const pageUrl = page.url(); // Always consider these as old/closeable if (pageUrl === 'about:blank' || pageUrl === '' || pageUrl.startsWith('chrome://') || pageUrl.startsWith('chrome-error://') || pageUrl.startsWith('data:')) { return false; // Don't close blank pages here, handled separately } // Use tracker timestamp instead of expensive page.title() CDP call const now = Date.now(); const createdAt = pageCreationTracker.get(page); if (createdAt && now - createdAt > PREVIOUS_SCAN_CREATION_AGE_MS) { // Page older than the creation-age threshold — likely from a previous scan return true; } // Check usage tracker -- idle pages are likely old const usage = pageUsageTracker.get(page); if (usage && !usage.isProcessing && now - usage.lastActivity > PREVIOUS_SCAN_IDLE_AGE_MS) { return true; // Idle beyond the idle-age threshold } // Fallback: only use page.title() if trackers have no data if (!createdAt && !usage) { try { const title = await page.title(); if (title.includes('404') || title.includes('Error') || title.includes('Not Found') || title === '') { return true; } } catch (titleErr) { return true; // Can't get title = bad state } } return false; // Conservative - don't close unless we're sure } catch (err) { if (forceDebug) { try { // Cache URL for error logging - wrap in try-catch as page might be closed const pageUrl = page.url(); console.log(formatLogMessage('debug', `${IS_PAGE_FROM_PREVIOUS_SCAN_TAG} Error evaluating page ${pageUrl}: ${err.message}`)); } catch (urlErr) { console.log(formatLogMessage('debug', `${IS_PAGE_FROM_PREVIOUS_SCAN_TAG} Error evaluating page: ${err.message}`)); } } return false; // Conservative - don't close if we can't evaluate } } /** * Tracks a new page for realtime cleanup purposes * @param {import('puppeteer').Page} page - Page to track */ function trackPageForRealtime(page) { pageCreationTracker.set(page, Date.now()); updatePageUsage(page, false); // Initialize usage tracking } /** * Removes a page from all tracking Maps immediately. * Call this before page.close() to prevent stale entries during concurrent execution. * @param {import('puppeteer').Page} page - Page to untrack */ function untrackPage(page) { pageCreationTracker.delete(page); pageUsageTracker.delete(page); } /** * No-op since the trackers were migrated to WeakMap — GC reclaims dead-page * entries automatically when Puppeteer drops its internal references. Kept * exported so the ~7 callers in nwss.js continue to compile; safe to delete * entirely once those callsites are scrubbed. */ function purgeStaleTrackers() { // intentionally empty } /** * Quick browser responsiveness test for use during page setup * Designed to catch browser degradation between operations * @param {import('puppeteer').Browser} browserInstance - Puppeteer browser instance * @param {number} timeout - Timeout in milliseconds (default: 3000) * @returns {Promise<boolean>} True if browser responds quickly, false otherwise */ async function isQuicklyResponsive(browserInstance, timeout = 3000) { try { await raceWithTimeout( browserInstance.version(), timeout, 'Quick responsiveness timeout' ); return true; } catch (error) { return false; } } /** * Tests if browser can handle network operations (like Network.enable) * Creates a test page and attempts basic network setup * @param {import('puppeteer').Browser} browserInstance - Puppeteer browser instance * @param {number} timeout - Timeout in milliseconds (default: 10000) * @returns {Promise<object>} Network capability test result */ async function testNetworkCapability(browserInstance, timeout = 10000) { const result = { capable: false, error: null, responseTime: 0 }; const startTime = Date.now(); let testPage = null; try { // Create test page testPage = await raceWithTimeout( browserInstance.newPage(), timeout, 'Test page creation timeout' ); // Test network operations (the critical operation that's failing) await raceWithTimeout( testPage.setRequestInterception(true), timeout, 'Network.enable test timeout' ); // Turn off interception. Symmetric to the enable above — Network.disable // can hang for the same CDP reasons, so it needs the same watchdog. await raceWithTimeout( testPage.setRequestInterception(false), timeout, 'Network.disable test timeout' ); result.capable = true; result.responseTime = Date.now() - startTime; } catch (error) { result.error = error.message; result.responseTime = Date.now() - startTime; // Classify the error type if (error.message.includes('Network.enable') || error.message.includes('timed out') || error.message.includes('Protocol error')) { result.error = `Network capability test failed: ${error.message}`; } } finally { if (testPage && !testPage.isClosed()) { try { await testPage.close(); } catch (closeErr) { /* ignore cleanup errors */ } } } return result; } /** * Checks if browser instance is still responsive * @param {import('puppeteer').Browser} browserInstance - Puppeteer browser instance * @param {number} timeout - Timeout in milliseconds (default: 5000) * @returns {Promise<object>} Health check result */ async function checkBrowserHealth(browserInstance, timeout = 8000) { const healthResult = { healthy: false, pageCount: 0, error: null, responseTime: 0, recommendations: [], criticalError: false, networkCapable: false }; const startTime = Date.now(); try { // Test 1: Check if browser is connected if (!browserInstance || browserInstance.process() === null) { healthResult.error = 'Browser process not running'; healthResult.recommendations.push('Create new browser instance'); healthResult.criticalError = true; return healthResult; } // Test 2: Try to get pages list with timeout const pages = await raceWithTimeout( browserInstance.pages(), timeout, 'Browser unresponsive - pages() timeout' ); healthResult.pageCount = pages.length; healthResult.responseTime = Date.now() - startTime; // Test 3: Check for excessive pages (memory leak indicator) if (pages.length > PAGE_COUNT_WARN_THRESHOLD) { healthResult.recommendations.push('Too many open pages - consider browser restart'); } // Test 4: Create a single test page to verify both browser functionality AND network capability let testPage = null; try { testPage = await raceWithTimeout( browserInstance.newPage(), timeout, 'Page creation timeout' ); // Quick test navigation to about:blank await raceWithTimeout( testPage.goto('about:blank'), timeout, 'Navigation timeout' ); // Test 5: Network capability test on the same page (avoids creating a second test page) try { const netTimeout = Math.min(timeout, 5000); await raceWithTimeout( testPage.setRequestInterception(true), netTimeout, 'Network.enable test timeout' ); // Disable can hang for the same CDP reasons enable can; mirror the watchdog. await raceWithTimeout( testPage.setRequestInterception(false), netTimeout, 'Network.disable test timeout' ); healthResult.networkCapable = true; } catch (networkErr) { healthResult.networkCapable = false; healthResult.recommendations.push(`Network operations failing: ${networkErr.message}`); if (networkErr.message.includes('Network.enable')) { healthResult.criticalError = true; } } await testPage.close(); } catch (pageTestError) { if (testPage && !testPage.isClosed()) { try { await testPage.close(); } catch (e) { /* ignore */ } } healthResult.error = `Page creation/navigation failed: ${pageTestError.message}`; if (isCriticalProtocolError(pageTestError)) { healthResult.recommendations.push('Browser restart required - critical protocol error'); healthResult.criticalError = true; } else { healthResult.recommendations.push('Browser restart recommended'); } return healthResult; } // Test 6: Check response time performance if (healthResult.responseTime > 5000) { healthResult.recommendations.push('Slow browser response - consider restart'); } // If all tests pass (including network capability) healthResult.healthy = healthResult.networkCapable; // Network capability is now critical for health } catch (error) { healthResult.error = error.message; healthResult.responseTime = Date.now() - startTime; // Categorize error types for better recommendations // Enhanced error categorization for Puppeteer 23.x if (isCriticalProtocolError(error)) { healthResult.recommendations.push('Browser restart required - critical protocol error'); healthResult.criticalError = true; } else if (error.message.includes('WebSocket') || error.message.includes('Connection terminated') || error.message.includes('Network service crashed')) { // New error types more common in Puppeteer 23.x healthResult.recommendations.push('Browser restart required - connection error'); healthResult.criticalError = true; } else if (error.message.includes('AbortError') || error.message.includes('Operation was aborted')) { healthResult.recommendations.push('Browser restart recommended - operation aborted'); } else if (error.message.includes('timeout') || error.message.includes('unresponsive')) { healthResult.recommendations.push('Browser restart required - unresponsive'); healthResult.criticalError = true; } else { healthResult.recommendations.push('Browser restart recommended - unknown error'); } } return healthResult; } /** * Checks memory usage of browser process (if available) * @param {import('puppeteer').Browser} browserInstance - Puppeteer browser instance * @returns {Promise<object>} Memory usage information */ async function checkBrowserMemory(browserInstance) { const memoryResult = { available: false, usage: null, error: null, recommendations: [] }; try { const browserProcess = browserInstance.process(); if (!browserProcess || !browserProcess.pid) { memoryResult.error = 'No browser process available'; return memoryResult; } // Try to get process memory info (Linux/Unix) try { const memInfo = await new Promise((resolve, reject) => { execFile('ps', ['-p', String(browserProcess.pid), '-o', 'rss='], { encoding: 'utf8', timeout: 2000 }, (err, stdout) => { if (err) reject(err); else resolve(stdout); }); }); const memoryKB = parseInt(memInfo.trim(), 10); if (!Number.isNaN(memoryKB)) { const memoryMB = Math.round(memoryKB / 1024); memoryResult.available = true; memoryResult.usage = { rss: memoryKB, rssMB: memoryMB }; // Memory usage recommendations if (memoryMB > 1000) { memoryResult.recommendations.push(`High memory usage: ${memoryMB}MB - restart recommended`); } else if (memoryMB > 500) { memoryResult.recommendations.push(`Elevated memory usage: ${memoryMB}MB - monitor closely`); } } } catch (psError) { memoryResult.error = `Memory check failed: ${psError.message}`; } } catch (error) { memoryResult.error = error.message; } return memoryResult; } /** * Precompiled regex for critical protocol error detection (avoids array allocation per call) */ const CRITICAL_ERROR_REGEX = /Runtime\.callFunctionOn timed out|Protocol error|Target closed|Session closed|Connection closed|Browser has been closed|Runtime\.evaluate timed out|WebSocket is not open|WebSocket connection lost|Connection terminated|Network service crashed|Browser disconnected|CDP session invalid|Browser process exited|Navigation timeout of|Page crashed|Renderer process crashed|Network\.enable timed out|Network\.disable timed out|Network service not available/; /** * Precompiled regex for restart recommendation detection */ const RESTART_RECOMMENDATION_REGEX = /restart required|High memory usage/; /** * Detects critical protocol errors that require immediate browser restart */ function isCriticalProtocolError(error) { if (!error || !error.message) return false; return CRITICAL_ERROR_REGEX.test(error.message); } /** * Enhanced browser connectivity test for Puppeteer 23.x * Tests WebSocket connection and CDP session validity */ async function testBrowserConnectivity(browserInstance, timeout = 2500) { const connectivityResult = { connected: false, cdpResponsive: false, websocketHealthy: false, error: null }; try { // Test 1: Basic browser connection const isConnected = browserInstance.isConnected(); connectivityResult.connected = isConnected; if (!isConnected) { connectivityResult.error = 'Browser is not connected'; return connectivityResult; } // Test 2: CDP responsiveness with version check try { const version = await raceWithTimeout( browserInstance.version(), timeout, 'CDP version check timeout' ); connectivityResult.cdpResponsive = true; connectivityResult.websocketHealthy = true; // If version works, WebSocket is healthy } catch (cdpError) { connectivityResult.error = `CDP not responsive: ${cdpError.message}`; if (cdpError.message.includes('WebSocket')) { connectivityResult.websocketHealthy = false; } } } catch (error) { connectivityResult.error = error.message; } return connectivityResult; } /** * Performs comprehensive browser health assessment * @param {import('puppeteer').Browser} browserInstance - Puppeteer browser instance * @param {object} options - Health check options * @returns {Promise<object>} Comprehensive health report */ async function performHealthAssessment(browserInstance, options = {}) { const { timeout = 8000, checkMemory = true, testConnectivity = true, forceDebug = false } = options; const assessment = { overall: 'unknown', timestamp: new Date().toISOString(), browser: {}, memory: {}, connectivity: {}, recommendations: [], needsRestart: false }; if (forceDebug) { console.log(formatLogMessage('debug', 'Starting browser health assessment...')); } // Browser responsiveness check assessment.browser = await checkBrowserHealth(browserInstance, timeout); // Enhanced connectivity check for Puppeteer 23.x if (testConnectivity) { assessment.connectivity = await testBrowserConnectivity(browserInstance, timeout); } // Memory usage check (if enabled and available) if (checkMemory) { assessment.memory = await checkBrowserMemory(browserInstance); } // Combine recommendations (push avoids spread operator intermediate arrays) assessment.recommendations = assessment.browser.recommendations.slice(); if (assessment.connectivity.error) { assessment.recommendations.push(`Connectivity issue: ${assessment.connectivity.error}`); } if (assessment.memory.recommendations) { for (let i = 0; i < assessment.memory.recommendations.length; i++) { assessment.recommendations.push(assessment.memory.recommendations[i]); } } // Determine overall health and restart necessity if (!assessment.browser.healthy) { assessment.overall = 'unhealthy'; assessment.needsRestart = true; } else if (assessment.browser.criticalError) { assessment.overall = 'critical'; assessment.needsRestart = true; } else if (testConnectivity && (!assessment.connectivity.connected || !assessment.connectivity.cdpResponsive)) { assessment.overall = 'disconnected'; assessment.needsRestart = true; } else if (assessment.recommendations.length > 0) { assessment.overall = 'degraded'; // Test each recommendation independently — avoids allocating a joined // string just to feed one regex test against it. assessment.needsRestart = assessment.recommendations.some(r => RESTART_RECOMMENDATION_REGEX.test(r)); } else { assessment.overall = 'healthy'; assessment.needsRestart = false; } if (forceDebug) { console.log(formatLogMessage('debug', `Health assessment complete: ${assessment.overall}`)); if (assessment.recommendations.length > 0) { console.log(formatLogMessage('debug', `Recommendations: ${assessment.recommendations.join(', ')}`)); } } return assessment; } /** * Monitors browser health and suggests actions for nwss.js integration * @param {import('puppeteer').Browser} browserInstance - Puppeteer browser instance * @param {object} context - Context information for logging * @param {object} options - Monitoring options * @returns {Promise<object>} Monitoring result with action suggestions */ async function monitorBrowserHealth(browserInstance, context = {}, options = {}) { const { siteIndex = 0, totalSites = 0, urlsSinceCleanup = 0, cleanupInterval = 40, forceDebug = false, silentMode = false } = options; const result = { shouldRestart: false, shouldContinue: true, reason: null, assessment: null }; try { // Perform health assessment const assessment = await performHealthAssessment(browserInstance, { timeout: 8000, checkMemory: true, testConnectivity: true, // Enable enhanced connectivity testing forceDebug }); result.assessment = assessment; // Decision logic for restart if (assessment.browser.criticalError) { result.shouldRestart = true; result.reason = `Critical protocol error detected - immediate restart required`; } else if (assessment.connectivity && (!assessment.connectivity.connected || !assessment.connectivity.cdpResponsive)) { result.shouldRestart = true; result.reason = `Browser connectivity lost - WebSocket/CDP failure`; } else if (assessment.needsRestart) { result.shouldRestart = true; result.reason = `Browser health: ${assessment.overall} - ${assessment.recommendations[0] || 'restart needed'}`; } else if (urlsSinceCleanup >= cleanupInterval) { result.shouldRestart = true; result.reason = `Scheduled cleanup after ${urlsSinceCleanup} URLs`; } else if (assessment.browser.responseTime > SLOW_RESPONSE_RESTART_MS) { result.shouldRestart = true; result.reason = `Slow browser response: ${assessment.browser.responseTime}ms (threshold: ${SLOW_RESPONSE_RESTART_MS}ms)`; } else if (assessment.browser.pageCount > PAGE_COUNT_RESTART_THRESHOLD) { // More aggressive page count monitoring for Puppeteer 23.x result.shouldRestart = true; result.reason = `Too many open pages: ${assessment.browser.pageCount} (memory leak protection)`; } // Logging if (!silentMode && result.shouldRestart) { const progress = totalSites > 0 ? ` (${siteIndex + 1}/${totalSites})` : ''; console.log(`\n${messageColors.fileOp('Browser restart needed')} before site${progress}: ${result.reason}`); } if (forceDebug && !result.shouldRestart) { const connectivity = assessment.connectivity.connected ? 'connected' : 'disconnected'; const cdp = assessment.connectivity.cdpResponsive ? 'responsive' : 'unresponsive'; console.log(formatLogMessage('debug', `Browser health OK - continuing (pages: ${assessment.browser.pageCount}, response: ${assessment.browser.responseTime}ms, ${connectivity}, CDP: ${cdp})`)); } } catch (monitorError) { result.shouldRestart = true; result.reason = `Health monitoring failed: ${monitorError.message}`; if (forceDebug) { console.log(formatLogMessage('debug', `Browser health monitoring error: ${monitorError.message}`)); } } return result; } /** * Simple health check function for quick integration * Enhanced version that includes network capability testing * @param {import('puppeteer').Browser} browserInstance - Puppeteer browser instance * @param {boolean} includeNetworkTest - Whether to test network capabilities (default: true) * @returns {Promise<boolean>} True if browser is healthy, false otherwise */ async function isBrowserHealthy(browserInstance, includeNetworkTest = true) { try { // Quick responsiveness test first (fastest check) const quickCheck = await isQuicklyResponsive(browserInstance, 2500); if (!quickCheck) return false; // More comprehensive health check if quick test passes const health = await checkBrowserHealth(browserInstance, includeNetworkTest ? 8000 : 5000); const connectivity = await testBrowserConnectivity(browserInstance, 3000); const baseHealth = health.healthy && connectivity.connected && connectivity.cdpResponsive; // Include network capability in health assessment if requested return includeNetworkTest ? (baseHealth && health.networkCapable) : baseHealth; } catch (error) { return false; } } /** * Performs comprehensive cleanup of page resources before operations that might cause detached frames * Also attempts to stop any pending navigations that might interfere * Used before reloads, navigations, and other operations that can trigger frame detachment * @param {import('puppeteer').Page} page - Page to clean up * @param {boolean} forceDebug - Debug logging flag * @returns {Promise<boolean>} True if cleanup succeeded */ async function cleanupPageBeforeReload(page, forceDebug = false) { try { // Cache page.isClosed() to avoid repeated browser calls const isPageClosed = page.isClosed(); if (isPageClosed) { return false; } // First, try to stop any pending navigation try { await page.evaluate(() => { // Stop any ongoing navigation if (window.stop) { window.stop(); } }); } catch (e) { // Page might be mid-navigation, that's ok } // Wait a bit for navigation to stop await new Promise(resolve => setTimeout(resolve, 500)); // FIX: Check if page is still open after delay before cleanup if (page.isClosed()) { if (forceDebug) { console.log(formatLogMessage('debug', 'Page closed during cleanup delay')); } return false; } // Now do the full cleanup try { await page.evaluate(() => { // Stop all media elements document.querySelectorAll('video, audio').forEach(media => { try { media.pause(); media.src = ''; media.load(); } catch(e) {} }); // Clear recent timers and intervals (cap to last 1000 to avoid massive loops) const highestId = setTimeout(() => {}, 0); const clearFrom = Math.max(0, highestId - 1000); for (let i = highestId; i >= clearFrom; i--) { clearTimeout(i); clearInterval(i); } // Stop recent animations if (typeof cancelAnimationFrame !== 'undefined') { const highestRAF = requestAnimationFrame(() => {}); const clearRAFFrom = Math.max(0, highestRAF - 200); for (let i = highestRAF; i >= clearRAFFrom; i--) { cancelAnimationFrame(i); } } // Clear all iframes properly document.querySelectorAll('iframe').forEach(iframe => { try { // Stop iframe content first if (iframe.contentWindow) { iframe.contentWindow.stop(); } iframe.src = 'about:blank'; iframe.remove(); } catch(e) {} }); // Force garbage collection if available if (window.gc) window.gc(); }); } catch (evalErr) { // Page closed during cleanup if (forceDebug) { console.log(formatLogMessage('debug', `Page cleanup evaluation failed: ${evalErr.message}`)); } return false; } if (forceDebug) { console.log(formatLogMessage('debug', 'Page resources cleaned before reload')); } return true; } catch (err) { if (forceDebug) { console.log(formatLogMessage('debug', `Page cleanup error: ${err.message}`)); } return false; } } module.exports = { checkBrowserHealth, checkBrowserMemory, testBrowserConnectivity, performGroupWindowCleanup, performRealtimeWindowCleanup, trackPageForRealtime, testNetworkCapability, isQuicklyResponsive, performHealthAssessment, monitorBrowserHealth, isBrowserHealthy, isCriticalProtocolError, updatePageUsage, untrackPage, cleanupPageBeforeReload, purgeStaleTrackers };