UNPKG

@fanboynz/network-scanner

Version:

A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.

726 lines (661 loc) 33.1 kB
/** * FlowProxy protection detection and handling module * Version: 1.0.0 - Enhanced with comprehensive documentation and smart detection * Detects flowProxy DDoS protection and handles it appropriately for security scanning * * FlowProxy (by Aurologic) is a DDoS protection service similar to Cloudflare that: * - Implements rate limiting and browser verification * - Uses JavaScript challenges to verify legitimate browsers * - Can block automated tools and scrapers * - Requires specific handling for security scanning tools */ const { formatLogMessage, messageColors } = require('./colorize'); // Precomputed colored '[flowproxy]' subsystem prefix. formatLogMessage only // colors the [severity] tag; this constant colors the subsystem prefix so // '[debug] [flowproxy] X' has both tags visually distinct. const FLOWPROXY_TAG = messageColors.processing('[flowproxy]'); /** * Timeout constants for FlowProxy operations (in milliseconds) * Optimized for Puppeteer 22.x performance while maintaining FlowProxy compatibility */ const TIMEOUTS = { PAGE_EVALUATION_SAFE: 10000, // Safe page evaluation timeout // FlowProxy-specific timeouts JS_CHALLENGE_DEFAULT: 15000, // Default JavaScript challenge timeout RATE_LIMIT_DEFAULT: 30000, // Default rate limit delay PAGE_TIMEOUT_DEFAULT: 45000, // Default page timeout NAVIGATION_TIMEOUT_DEFAULT: 45000 // Default navigation timeout }; // Default-false detection shape returned by the catch paths in // safePageEvaluate / analyzeFlowProxyProtection. Hoisted so the two // catch branches don't drift if a new detection flag is added. const DEFAULT_DETECTION = Object.freeze({ isFlowProxyDetected: false, hasSpecificSignal: false, hasFlowProxyDomain: false, hasFlowProxyElements: false, hasFlowProxyScripts: false, hasFlowProxyBrandText: false, hasFlowProxyHeaders: false, hasFlowProxyCookies: false, matchedHeader: null, matchedCookie: null, hasProtectionPage: false, hasChallengeElements: false, isRateLimited: false, hasJSChallenge: false, isProcessing: false }); // === HTTP RESPONSE HEADER / COOKIE DETECTION ================================= // Per-page accumulator for vendor-specific HTTP response signals. Populated // by the response listener attached via attachFlowProxyHeaderListener(); // read by analyzeFlowProxyProtection() to merge with the DOM/text scan. // // WeakMap so the entry is released when Puppeteer drops the page reference — // no manual cleanup needed. const pageHeaderState = new WeakMap(); // Header/cookie tokens that uniquely identify FlowProxy/Aurologic. Lowercase // for case-insensitive matching (response.headers() returns lowercase keys // but values keep their case). const VENDOR_TOKENS = ['flowproxy', 'aurologic']; // Header names where a vendor token in the VALUE is a strong signal. // (Server, Via, X-Powered-By, X-Cache, X-CDN are all places a CDN/proxy // commonly self-identifies.) const VENDOR_VALUE_HEADERS = ['server', 'via', 'x-powered-by', 'x-cache', 'x-cdn']; // Single source of truth for "is this cookie name vendor-namespaced?" // Used both by the Set-Cookie listener (parsing header text) and the jar // check in analyzeFlowProxyProtection (cookies()-API results), so the two // sides can't disagree about what counts. function isVendorCookieName(name) { if (!name) return false; const n = name.toLowerCase(); return n === 'flowproxy' || n === 'aurologic' || n.startsWith('flowproxy_') || n.startsWith('aurologic_') || n.startsWith('flowproxy-') || n.startsWith('aurologic-'); } // Default-empty listener state. One source of truth for the shape, used // at attach time (initial state) and at read time (fallback when the // listener was never attached). Add a new signal field once, applied // everywhere. function emptyHeaderState() { return { hasFlowProxyHeaders: false, hasFlowProxyCookies: false, matchedHeader: null, matchedCookie: null }; } /** * Attach a response listener to a page that watches for FlowProxy/Aurologic * HTTP response headers + cookies. Idempotent — safe to call multiple times. * * Headers are the most reliable signal: DOM scraping can be fooled by any * "Please wait" / "Loading" string, but a `Server: flowProxy` header is * uniquely the vendor's. Cookies likewise — `flowproxy_*` / `aurologic_*` * names don't collide with anything else in practice. * * Call BEFORE page.goto() so the navigation response itself is observed. * State is read later via analyzeFlowProxyProtection(). * * @param {import('puppeteer').Page} page - Puppeteer page instance */ function attachFlowProxyHeaderListener(page) { if (pageHeaderState.has(page)) return; // idempotent const state = emptyHeaderState(); pageHeaderState.set(page, state); page.on('response', (response) => { // Once both signals are found there's nothing more to learn — bail // immediately to keep per-response overhead near zero on long pages. if (state.hasFlowProxyHeaders && state.hasFlowProxyCookies) return; try { const headers = response.headers(); if (!headers) return; // 1) Vendor-token search across the well-known value-bearing headers. if (!state.hasFlowProxyHeaders) { for (const h of VENDOR_VALUE_HEADERS) { const v = headers[h]; if (!v) continue; const vl = v.toLowerCase(); for (const tok of VENDOR_TOKENS) { if (vl.includes(tok)) { state.hasFlowProxyHeaders = true; state.matchedHeader = `${h}: ${v}`; break; } } if (state.hasFlowProxyHeaders) break; } } // 2) Any X-FlowProxy-* or X-Aurologic-* custom header name — those // are vendor-namespaced by convention and don't collide. if (!state.hasFlowProxyHeaders) { for (const key of Object.keys(headers)) { // key is already lowercase per Puppeteer's headers() contract if (key.startsWith('x-flowproxy-') || key.startsWith('x-aurologic-')) { state.hasFlowProxyHeaders = true; state.matchedHeader = `${key}: ${headers[key]}`; break; } } } // 3) Set-Cookie parsing — extract each cookie's NAME (substring // before the first '=') and apply the shared vendor-name // predicate. The old substring-on-value match could false-fire // on names like `__flowproxy=` or `notaurologic_x=` that contain // the token without being vendor cookies; the name-level check // matches the jar inspection in analyzeFlowProxyProtection // exactly. Puppeteer joins multi-Set-Cookie with '\n'. if (!state.hasFlowProxyCookies) { const setCookie = headers['set-cookie']; if (setCookie) { const lines = setCookie.split('\n'); for (let i = 0; i < lines.length; i++) { const line = lines[i]; const eq = line.indexOf('='); if (eq <= 0) continue; const name = line.slice(0, eq).trim(); if (isVendorCookieName(name)) { state.hasFlowProxyCookies = true; state.matchedCookie = name; break; } } } } } catch (_) { // Observation-only — never let a header read throw into Puppeteer's // event-emitter chain. } }); } // Fast timeout constants - optimized for speed while respecting FlowProxy delays const FAST_TIMEOUTS = { PAGE_LOAD_WAIT: 1500, // Reduced from 2000ms ADDITIONAL_DELAY_DEFAULT: 3000 // Reduced from 5000ms }; // Protocols to skip — FlowProxy only protects web traffic const SKIP_PATTERNS = [ 'about:', 'chrome:', 'chrome-extension:', 'chrome-error:', 'chrome-search:', 'devtools:', 'edge:', 'moz-extension:', 'safari-extension:', 'webkit:', 'data:', 'blob:', 'javascript:', 'vbscript:', 'file:', 'ftp:', 'ftps:' ]; /** * Validates if a URL should be processed by FlowProxy protection * Only allows HTTP/HTTPS URLs, skips browser-internal and special protocols * * @param {string} url - URL to validate * @param {boolean} forceDebug - Debug logging flag * @returns {boolean} True if URL should be processed * * @example * // Valid URLs that will be processed * shouldProcessUrl('https://example.com') // => true * shouldProcessUrl('http://test.com') // => true * * // Invalid URLs that will be skipped * shouldProcessUrl('chrome://settings') // => false * shouldProcessUrl('about:blank') // => false * shouldProcessUrl('file:///local/file.html') // => false */ function shouldProcessUrl(url, forceDebug = false) { if (!url || typeof url !== 'string') { if (forceDebug) console.log(formatLogMessage('debug', `${FLOWPROXY_TAG}[url-validation] Skipping invalid URL: ${url}`)); return false; } // Skip browser-internal and special protocol URLs const urlLower = url.toLowerCase(); for (const pattern of SKIP_PATTERNS) { if (urlLower.startsWith(pattern)) { if (forceDebug) { console.log(formatLogMessage('debug', `${FLOWPROXY_TAG}[url-validation] Skipping ${pattern} URL: ${url.substring(0, 100)}${url.length > 100 ? '...' : ''}`)); } return false; } } // Only process HTTP/HTTPS URLs - FlowProxy only protects web traffic if (!urlLower.startsWith('http://') && !urlLower.startsWith('https://')) { if (forceDebug) { console.log(formatLogMessage('debug', `${FLOWPROXY_TAG}[url-validation] Skipping non-HTTP(S) URL: ${url.substring(0, 100)}${url.length > 100 ? '...' : ''}`)); } return false; } return true; } /** * Fast timeout helper for Puppeteer 22.x compatibility * Replaces deprecated page.waitForTimeout() with standard Promise-based approach * * @param {import('puppeteer').Page} page - Puppeteer page instance * @param {number} timeout - Timeout in milliseconds * @returns {Promise<void>} */ async function waitForTimeout(page, timeout) { // Use fast Promise-based timeout for Puppeteer 22.x compatibility return new Promise(resolve => setTimeout(resolve, timeout)); } /** * Safe page evaluation with timeout protection for FlowProxy analysis */ async function safePageEvaluate(page, func, timeout = TIMEOUTS.PAGE_EVALUATION_SAFE) { let timer; try { return await Promise.race([ page.evaluate(func), new Promise((_, reject) => { timer = setTimeout(() => reject(new Error('FlowProxy page evaluation timeout')), timeout); }) ]); } catch (error) { // Return full default-false shape so downstream `.hasProtectionPage` // etc. read as `false` instead of `undefined` — keeps debug logs // honest and conditional branches in handleFlowProxyProtection // deterministic. return { ...DEFAULT_DETECTION, error: error.message }; } finally { if (timer) clearTimeout(timer); } } /** * Analyzes the current page to detect flowProxy protection with comprehensive detection logic * * FlowProxy protection typically manifests as: * - DDoS protection pages with "Please wait" messages * - Rate limiting responses (429 errors) * - JavaScript challenges that must complete before access * - Aurologic branding and flowproxy-specific elements * - Browser verification processes * * @param {import('puppeteer').Page} page - Puppeteer page instance * @returns {Promise<object>} Detection information object with detailed analysis * * @example * const analysis = await analyzeFlowProxyProtection(page); * if (analysis.isFlowProxyDetected) { * console.log(`FlowProxy protection found: ${analysis.title}`); * if (analysis.isRateLimited) { * console.log('Rate limiting is active'); * } * } */ async function analyzeFlowProxyProtection(page) { try { // Get current page URL and validate it first. // page.url() is synchronous in Puppeteer 20+; no await needed. const currentPageUrl = page.url(); if (!shouldProcessUrl(currentPageUrl, false)) { return { isFlowProxyDetected: false, skippedInvalidUrl: true, url: currentPageUrl }; } // Pull HTTP-layer signals collected by the response listener (populated // by attachFlowProxyHeaderListener if the caller wired it up before // navigation). Falls back to empty-defaults if the listener was never // attached, so DOM-only detection still works. const httpState = pageHeaderState.get(page) || emptyHeaderState(); // Continue with comprehensive FlowProxy detection for valid HTTP(S) URLs const domResult = await safePageEvaluate(page, () => { const title = document.title || ''; const bodyText = document.body ? document.body.textContent : ''; const url = window.location.href; // === VENDOR-SPECIFIC SIGNALS (high-confidence FlowProxy markers) === // Anything here is unambiguous: it names FlowProxy or its parent // company Aurologic. At least ONE of these must be present for the // primary detection to fire — generic loaders / Cloudflare's // "Checking your browser" / SPA spinners alone do NOT count. // URL signals — note: 'ddos-protection' was moved out of this set; // it's too broad (matches docs/blog URLs about DDoS protection). const hasFlowProxyDomain = url.includes('aurologic') || url.includes('flowproxy'); // DOM signals tied to the vendor's class/id/data-attribute namespace // or its uniquely named challenge input. const hasFlowProxyElements = document.querySelector('[data-flowproxy]') !== null || document.querySelector('.flowproxy-challenge') !== null || document.querySelector('#flowproxy-container') !== null || document.querySelector('.aurologic-protection') !== null || document.querySelector('input[name="flowproxy-response"]') !== null; // Script src patterns from the vendor. const hasFlowProxyScripts = document.querySelector('script[src*="flowproxy"]') !== null || document.querySelector('script[src*="aurologic"]') !== null; // Brand-name strings — "flowProxy" (cased) and the canonical // Aurologic attribution line. const hasFlowProxyBrandText = bodyText.includes('DDoS protection by aurologic') || bodyText.includes('flowProxy'); // DOM-side specific signals only. The Node caller below merges this // with HTTP-header / cookie signals (which live outside the page // context) to produce the final hasSpecificSignal. const domSpecificSignal = hasFlowProxyDomain || hasFlowProxyElements || hasFlowProxyScripts || hasFlowProxyBrandText; // === GENERIC SIGNALS (low-confidence; used for sub-handling only) === // These flags help the handler decide WHICH delay to apply once // FlowProxy presence is already confirmed by a specific signal. // They are NOT inputs to isFlowProxyDetected — by themselves they // collide with Cloudflare, Sucuri, generic SPA loaders, etc. // Generic protection-page text (kept for verification-step semantics // and debug logging — exposed as `hasProtectionPage` for backward // compat with the rest of the module). const hasProtectionPage = hasFlowProxyBrandText || title.includes('DDoS Protection') || title.includes('Please wait') || title.includes('Checking your browser') || bodyText.includes('Verifying your browser') || url.includes('ddos-protection'); // Generic challenge-element markers (still exposed for the handler's // sub-decisions; hasFlowProxyElements above is the strong subset). const hasChallengeElements = hasFlowProxyElements || document.querySelector('.challenge-running') !== null || document.querySelector('.verification-container') !== null; const isRateLimited = bodyText.includes('Rate limited') || bodyText.includes('Too many requests') || bodyText.includes('Please try again later') || title.includes('429') || title.includes('Rate Limit'); // hasJSChallenge gates the wait-for-challenge-completion path. The // vendor script-src patterns are strong; the JS-required strings are // generic but only matter when hasSpecificSignal already gated us in. const hasJSChallenge = hasFlowProxyScripts || bodyText.includes('JavaScript is required') || bodyText.includes('Please enable JavaScript'); const isProcessing = bodyText.includes('Processing') || bodyText.includes('Loading') || document.querySelector('.loading-spinner') !== null || document.querySelector('.processing-indicator') !== null; // The Node-side caller merges this with HTTP signals to compute // the final hasSpecificSignal / isFlowProxyDetected. return { domSpecificSignal, hasFlowProxyDomain, hasFlowProxyElements, hasFlowProxyScripts, hasFlowProxyBrandText, hasProtectionPage, hasChallengeElements, isRateLimited, hasJSChallenge, isProcessing, title, url, bodySnippet: bodyText.substring(0, 200) // First 200 chars for debugging }; }); // Cookie-jar check: complements the Set-Cookie response-header listener // by reading what's ACTUALLY persisted in the browser jar. Catches: // - cookies set on prior visits (session-reuse scenarios) // - cookies set via document.cookie = '...' from page JS // - Set-Cookie emitted before the listener attached (defensive) // Uses isVendorCookieName so the predicate matches the listener. // Try/catch because page.cookies() throws on closed/detached pages. let hasJarCookie = false; let jarMatchedCookie = null; try { const cookies = await page.cookies(); if (Array.isArray(cookies)) { for (let i = 0; i < cookies.length; i++) { if (isVendorCookieName(cookies[i].name)) { hasJarCookie = true; jarMatchedCookie = cookies[i].name; break; } } } } catch (_) { // Observation-only — never fail detection because the jar read errored. } // Unified merge: works for both the success path AND the DOM-error // path. On error, domResult is `{...DEFAULT_DETECTION, error}` so // domSpecificSignal is undefined; isFlowProxyDetected still becomes // true if HTTP signals fired. Previously the error path returned // early before recomputing primary detection, silently dropping // header/cookie evidence whenever the DOM eval errored. const hasFlowProxyCookies = httpState.hasFlowProxyCookies || hasJarCookie; const hasSpecificSignal = (domResult && domResult.domSpecificSignal) || httpState.hasFlowProxyHeaders || hasFlowProxyCookies; return { ...domResult, // includes .error on the safePageEvaluate failure path hasFlowProxyHeaders: httpState.hasFlowProxyHeaders, hasFlowProxyCookies, matchedHeader: httpState.matchedHeader, // Listener-captured name wins over jar name when both fire — the // listener saw the cookie at the moment it was set, which is the // more informative time-of-event for debug output. matchedCookie: httpState.matchedCookie || jarMatchedCookie, hasSpecificSignal, // PRIMARY DETECTION: at least one vendor-specific signal across DOM // OR HTTP layer. Headers are the most reliable signal; cookies // close behind. DOM markers remain the fallback for sites where // the listener wasn't wired up before navigation. isFlowProxyDetected: hasSpecificSignal }; } catch (error) { // Return safe defaults if page evaluation fails return { ...DEFAULT_DETECTION, error: error.message }; } } /** * Handles flowProxy protection by implementing appropriate delays and retry logic * * FlowProxy handling strategy: * 1. Detect protection type (rate limiting, JS challenge, etc.) * 2. Implement appropriate delays based on protection type * 3. Wait for JavaScript challenges to complete * 4. Verify successful bypass before continuing * * @param {import('puppeteer').Page} page - Puppeteer page instance * @param {string} currentUrl - Current URL being processed * @param {object} siteConfig - Site configuration object with FlowProxy settings * @param {boolean} forceDebug - Debug mode flag for detailed logging * * @returns {Promise<object>} Result object with comprehensive handling details: * { * flowProxyDetection: { * attempted: boolean, // Whether detection was attempted * detected: boolean, // Whether FlowProxy protection was found * details: object|null // Detailed detection information * }, * handlingResult: { * attempted: boolean, // Whether handling was attempted * success: boolean // Whether handling succeeded * }, * overallSuccess: boolean, // True if no critical failures occurred * errors: string[], // Array of error messages * warnings: string[], // Array of warning messages * skippedInvalidUrl: boolean // True if URL was skipped due to invalid protocol * } * * @example * const config = { * flowproxy_delay: 45000, // Rate limit delay (45 seconds) * flowproxy_js_timeout: 20000, // JS challenge timeout (20 seconds) * flowproxy_additional_delay: 8000 // Additional processing delay (8 seconds) * }; * * const result = await handleFlowProxyProtection(page, url, config, true); * if (result.flowProxyDetection.detected) { * console.log('FlowProxy protection handled'); * if (result.warnings.length > 0) { * console.log('Warnings:', result.warnings); * } * } */ async function handleFlowProxyProtection(page, currentUrl, siteConfig, forceDebug = false) { // VALIDATE URL FIRST - Skip protection handling for non-HTTP(S) URLs // FlowProxy only protects web traffic, so other protocols should be skipped if (!shouldProcessUrl(currentUrl, forceDebug)) { if (forceDebug) { console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Skipping protection handling for non-HTTP(S) URL: ${currentUrl}`)); } return { flowProxyDetection: { attempted: false, detected: false }, handlingResult: { attempted: false, success: true }, overallSuccess: true, errors: [], warnings: [], skippedInvalidUrl: true }; } // Initialize result structure for tracking all handling aspects const result = { flowProxyDetection: { attempted: false, detected: false }, handlingResult: { attempted: false, success: false }, overallSuccess: true, errors: [], warnings: [] }; try { if (forceDebug) console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Checking for flowProxy protection on ${currentUrl}`)); // Wait for initial page load before analyzing // FlowProxy protection pages need time to fully render their elements await waitForTimeout(page, FAST_TIMEOUTS.PAGE_LOAD_WAIT); // Perform comprehensive FlowProxy detection const detectionInfo = await analyzeFlowProxyProtection(page); result.flowProxyDetection = { attempted: true, detected: detectionInfo.isFlowProxyDetected, details: detectionInfo }; // Only proceed with handling if FlowProxy protection is detected if (detectionInfo.isFlowProxyDetected) { result.handlingResult.attempted = true; if (forceDebug) { console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} FlowProxy protection detected on ${currentUrl}:`)); console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Page Title: "${detectionInfo.title}"`)); console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Current URL: ${detectionInfo.url}`)); // Specific-signal breakdown — which vendor-specific marker(s) fired console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Specific signals: domain=${detectionInfo.hasFlowProxyDomain} elements=${detectionInfo.hasFlowProxyElements} scripts=${detectionInfo.hasFlowProxyScripts} brandText=${detectionInfo.hasFlowProxyBrandText} headers=${detectionInfo.hasFlowProxyHeaders} cookies=${detectionInfo.hasFlowProxyCookies}`)); if (detectionInfo.matchedHeader) { console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Matched header: ${detectionInfo.matchedHeader}`)); } if (detectionInfo.matchedCookie) { console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Matched cookie: ${detectionInfo.matchedCookie}`)); } console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Has Protection Page: ${detectionInfo.hasProtectionPage}`)); console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Has Challenge Elements: ${detectionInfo.hasChallengeElements}`)); console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Is Rate Limited: ${detectionInfo.isRateLimited}`)); console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Has JS Challenge: ${detectionInfo.hasJSChallenge}`)); console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Is Processing: ${detectionInfo.isProcessing}`)); console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Body Snippet: "${detectionInfo.bodySnippet}"`)); } // HANDLE RATE LIMITING - Highest priority as it blocks all requests // Rate limiting requires waiting before any other actions if (detectionInfo.isRateLimited) { const rateLimitDelay = siteConfig.flowproxy_delay || TIMEOUTS.RATE_LIMIT_DEFAULT; result.warnings.push(`Rate limiting detected - implementing ${rateLimitDelay}ms delay`); if (forceDebug) console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Rate limiting detected, waiting ${rateLimitDelay}ms`)); await waitForTimeout(page, rateLimitDelay); } // HANDLE JAVASCRIPT CHALLENGES - Second priority as they must complete // FlowProxy uses JS challenges to verify browser legitimacy if (detectionInfo.hasJSChallenge || detectionInfo.isProcessing) { const jsWaitTime = siteConfig.flowproxy_js_timeout || TIMEOUTS.JS_CHALLENGE_DEFAULT; if (forceDebug) console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} JavaScript challenge detected, waiting up to ${jsWaitTime}ms for completion`)); try { // Wait for challenge completion indicators to disappear. // page.waitForFunction has its own { timeout } — the previous // outer Promise.race added a setTimeout that fired 5s LATER, // leaked its timer on the success path, and never won the race // in practice. Dropped: waitForFunction's own timeout is the // single source of truth. await page.waitForFunction( () => { const bodyText = document.body ? document.body.textContent : ''; return !bodyText.includes('Processing') && !bodyText.includes('Checking your browser') && !bodyText.includes('Please wait') && !document.querySelector('.loading-spinner') && !document.querySelector('.processing-indicator'); }, { timeout: jsWaitTime } ); if (forceDebug) console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} JavaScript challenge appears to have completed`)); } catch (timeoutErr) { // Continue even if timeout occurs - some challenges may take longer result.warnings.push(`JavaScript challenge timeout after ${jsWaitTime}ms`); if (forceDebug) console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} JavaScript challenge timeout - continuing anyway`)); } } // IMPLEMENT ADDITIONAL DELAY - Final step to ensure all processing completes // FlowProxy may need extra time even after challenges complete const additionalDelay = siteConfig.flowproxy_additional_delay || FAST_TIMEOUTS.ADDITIONAL_DELAY_DEFAULT; if (forceDebug) console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Implementing additional ${additionalDelay}ms delay for flowProxy processing`)); await waitForTimeout(page, additionalDelay); // VERIFY SUCCESSFUL BYPASS - Check if we're still on a protection page // This helps identify if our handling was successful const finalCheck = await analyzeFlowProxyProtection(page); if (finalCheck.isFlowProxyDetected && finalCheck.hasProtectionPage) { result.warnings.push('Still on flowProxy protection page after handling attempts'); if (forceDebug) console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Warning: Still appears to be on protection page`)); // Don't mark as failure - protection page may persist but still allow access } else { result.handlingResult.success = true; if (forceDebug) console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Successfully handled flowProxy protection for ${currentUrl}`)); } } else { // No FlowProxy protection detected — nothing to handle. // result.overallSuccess is already true from initialization. if (forceDebug) console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} No flowProxy protection detected on ${currentUrl}`)); } } catch (error) { // Critical error occurred during handling result.errors.push(`FlowProxy handling error: ${error.message}`); result.overallSuccess = false; if (forceDebug) { console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} FlowProxy handling failed for ${currentUrl}:`)); console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Error: ${error.message}`)); console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} Stack: ${error.stack}`)); } } // LOG COMPREHENSIVE RESULTS for debugging and monitoring if (result.errors.length > 0 && forceDebug) { console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} FlowProxy handling completed with errors for ${currentUrl}:`)); result.errors.forEach(error => { console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} - ${error}`)); }); } else if (result.warnings.length > 0 && forceDebug) { console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} FlowProxy handling completed with warnings for ${currentUrl}:`)); result.warnings.forEach(warning => { console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} - ${warning}`)); }); } else if (result.flowProxyDetection.attempted && forceDebug) { console.log(formatLogMessage('debug', `${FLOWPROXY_TAG} FlowProxy handling completed successfully for ${currentUrl}`)); } return result; } /** * Gets page-level timeout values for flowProxy-protected sites. Used by * nwss.js to call page.setDefaultTimeout/setDefaultNavigationTimeout * before navigating. The handler itself reads challenge/rate-limit/ * additional-delay values directly from siteConfig (with TIMEOUTS * fallbacks), so those don't need to round-trip through this function. * * @param {object} siteConfig - Site configuration object * @returns {{ pageTimeout: number, navigationTimeout: number }} * * @example * const { pageTimeout, navigationTimeout } = getFlowProxyTimeouts(siteConfig); * page.setDefaultTimeout(pageTimeout); * page.setDefaultNavigationTimeout(navigationTimeout); */ function getFlowProxyTimeouts(siteConfig) { return { pageTimeout: siteConfig.flowproxy_page_timeout || TIMEOUTS.PAGE_TIMEOUT_DEFAULT, navigationTimeout: siteConfig.flowproxy_nav_timeout || TIMEOUTS.NAVIGATION_TIMEOUT_DEFAULT }; } // Public surface used by nwss.js. Internal helpers (waitForTimeout, // safePageEvaluate, analyzeFlowProxyProtection, shouldProcessUrl) stay // module-private — the old export list included several functions no // caller imported. // // attachFlowProxyHeaderListener should be called by the caller BEFORE // navigation so the response listener observes the document response's // own headers. Without it, header/cookie detection silently no-ops and // the module falls back to DOM-only detection. module.exports = { handleFlowProxyProtection, getFlowProxyTimeouts, attachFlowProxyHeaderListener };