@fanboynz/network-scanner
Version:
A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.
1,258 lines (1,105 loc) • 85.1 kB
JavaScript
/**
* Cloudflare bypass and challenge handling module - Optimized with smart detection and adaptive timeouts
* Version: 2.7.0 - Major fixes and performance overhaul
* - Fix: Challenge solvers had empty if-blocks (JS/Turnstile/Legacy never executed in non-debug mode)
* - Fix: a[href*="continue"] false positive removed (matched nearly every website)
* - Perf: Domain-level detection cache (was per-URL, now per-hostname)
* - Perf: Timeout outcome caching (domain times out once -> all subsequent URLs skip instantly)
* - Perf: Short-circuit quick detection (title/URL -> fast selectors -> slow text, early return at each stage)
* - Perf: Eliminated body.textContent in quick detection (was extracting entire DOM text tree)
* - Perf: Capped body.textContent to 2KB in analyzeCloudflareChallenge
* - Perf: No-indicator pages skip immediately regardless of config (was 10-15s wasted)
* - Perf: Quick detection timeout 4s->2s, retries 2->1
* - Perf: PAGE_EVALUATION timeout 12s->5s, detached frame delay 3s->1s
* - Perf: Inner timeouts tightened to fit within outer adaptive timeouts
* - Perf: CHALLENGE_SOLVING 30s->12s, TURNSTILE_COMPLETION 20s->10s, JS_CHALLENGE_BUFFER 26s->12s
* - Perf: MAX_RETRIES 3->2, baseDelay 1000->800ms, maxDelay 8000->5000ms
* - Perf: Parallel detection gated behind cloudflare config (was running on every URL)
* Version: 2.6.3 - Fixes Cannot read properties of undefined (reading 'hasIndicators')
* Version: 2.6.2 - Further detached Frame fixes
* Version: 2.6.1 - timeoutId is not defined & race condition fix
* Version: 2.6.0 - Memory leak fixes and timeout cleanup
* Version: 2.5.0 - Fix Frame Lifecycle issue, Timing and Race condition
* Version: 2.4.1 - Bump timeout values
* Version: 2.4.0 - Fix possible endless loops with retry logic and loop detection
* Version: 2.3.1 - Colorize CF
* Version: 2.3.0 - Support CF iframe challenges, and better error handling
* Version: 2.2.0 - Enhanced with retry logic, caching, and improved error handling
* Version: 2.1.0 - Enhanced with quick detection, adaptive timeouts, and comprehensive debug logging
* Handles phishing warnings, Turnstile challenges, and modern Cloudflare protections
*/
// Import color utilities
const { formatLogMessage } = require('./colorize');
/**
* Module version information
*/
const CLOUDFLARE_MODULE_VERSION = '2.7.0';
/**
* Timeout constants for various operations (in milliseconds)
* Optimized timeout constants for Puppeteer 22.x performance (in milliseconds)
* All values tuned for maximum scanning speed while maintaining functionality
*/
const TIMEOUTS = {
PAGE_EVALUATION: 5000, // Standard page evaluation timeout (DOM queries are instant)
PAGE_EVALUATION_SAFE: 5000, // Safe page evaluation with extra buffer
PHISHING_CLICK: 3000, // Timeout for clicking phishing continue button
PHISHING_NAVIGATION: 8000, // Wait for navigation after phishing bypass
JS_CHALLENGE_BUFFER: 12000, // JS challenge -- must fit within 15s adaptive outer timeout
TURNSTILE_COMPLETION: 10000, // Turnstile completion check -- fits within adaptive timeout
TURNSTILE_COMPLETION_BUFFER: 12000, // Turnstile completion with buffer
CLICK_TIMEOUT: 5000, // Standard click operation timeout
CLICK_TIMEOUT_BUFFER: 1000, // Click timeout safety buffer
NAVIGATION_TIMEOUT: 15000, // Standard navigation timeout
NAVIGATION_TIMEOUT_BUFFER: 2000, // Navigation timeout safety buffer
ADAPTIVE_TIMEOUT_WITH_INDICATORS: 25000, // Adaptive timeout when indicators found + explicit config
ADAPTIVE_TIMEOUT_WITHOUT_INDICATORS: 20000, // Adaptive timeout with explicit config only
ADAPTIVE_TIMEOUT_AUTO_WITH_INDICATORS: 15000, // Adaptive timeout for auto-detected with indicators
ADAPTIVE_TIMEOUT_AUTO_WITHOUT_INDICATORS: 10000, // Adaptive timeout for auto-detected without indicators
// New timeouts for enhanced functionality
RETRY_DELAY: 1000, // Delay between retry attempts
MAX_RETRIES: 2, // Maximum retry attempts (only 2 fit within 25s outer timeout)
CHALLENGE_POLL_INTERVAL: 500, // Interval for polling challenge completion
CHALLENGE_MAX_POLLS: 20 // Maximum polling attempts
};
// Fast timeout constants - optimized for speed
const FAST_TIMEOUTS = {
QUICK_DETECTION: 2000, // Fast Cloudflare detection (DOM check, instant on loaded pages)
PHISHING_WAIT: 1000, // Fast phishing check
CHALLENGE_WAIT: 500, // Fast challenge detection
ELEMENT_INTERACTION_DELAY: 250, // Fast element interactions
SELECTOR_WAIT: 3000, // Fast selector waits
TURNSTILE_OPERATION: 6000, // Fast Turnstile operations
JS_CHALLENGE: 10000, // Fast JS challenge completion
CHALLENGE_SOLVING: 12000, // Overall challenge solving -- fits within 15s adaptive outer
CHALLENGE_COMPLETION: 8000 // Fast completion check
};
/**
* Finds and clicks an element inside shadow DOM trees via page.evaluate
* Returns {found, clicked, x, y} - coordinates allow fallback mouse.click
*/
async function clickInShadowDOM(context, selectors, forceDebug = false, waitMs = 1500) {
// Try Puppeteer's pierce/ selector first -- handles CLOSED shadow roots via CDP
for (const selector of selectors) {
try {
// Wait for element to appear (handles delayed rendering)
const start = Date.now();
const element = await context.waitForSelector(`pierce/${selector}`, { timeout: waitMs });
if (element) {
const box = await element.boundingBox();
if (box && box.width > 0 && box.height > 0) {
if (forceDebug) console.log(formatLogMessage('cloudflare', `pierce/${selector} matched in ${Date.now() - start}ms -- box: ${box.width}x${box.height} at (${box.x},${box.y})`));
await element.click();
await element.dispose();
return { found: true, clicked: true, selector, x: box.x + box.width / 2, y: box.y + box.height / 2 };
}
if (forceDebug) console.log(formatLogMessage('cloudflare', `pierce/${selector} found but not visible (0x0)`));
await element.dispose();
// Element found but not visible
return { found: true, clicked: false, selector, x: 0, y: 0 };
}
} catch (e) {
if (forceDebug) console.log(formatLogMessage('cloudflare', `pierce/${selector} timeout after ${waitMs}ms`));
continue;
}
}
// Fallback: manual traversal for open shadow roots
const result = await context.evaluate((sels) => {
function deepQuery(root, selector) {
// Try direct query first
const el = root.querySelector(selector);
if (el) return el;
// Traverse shadow roots
const allElements = root.querySelectorAll('*');
for (const node of allElements) {
if (node.shadowRoot) {
const found = deepQuery(node.shadowRoot, selector);
if (found) return found;
}
}
return null;
}
for (const selector of sels) {
const el = deepQuery(document, selector);
if (el) {
const rect = el.getBoundingClientRect();
if (rect.width > 0 && rect.height > 0) {
el.click();
return { found: true, clicked: true, selector, x: rect.x + rect.width / 2, y: rect.y + rect.height / 2 };
}
return { found: true, clicked: false, selector, x: 0, y: 0 };
}
}
return { found: false, clicked: false, selector: null, x: 0, y: 0 };
}, selectors);
return result;
}
/**
* Error categories for better handling
*/
const ERROR_TYPES = {
NETWORK: 'network',
TIMEOUT: 'timeout',
ELEMENT_NOT_FOUND: 'element_not_found',
EVALUATION_FAILED: 'evaluation_failed',
NAVIGATION_FAILED: 'navigation_failed',
DETACHED_FRAME: 'detached_frame',
UNKNOWN: 'unknown'
};
/**
* Retry configuration with exponential backoff.
* Defined before getRetryConfig so the reference order is structurally
* sound — previously getRetryConfig was hoisted above this const and only
* worked because the function was never called during module load.
*/
const RETRY_CONFIG = {
maxAttempts: 2, // Only 2 attempts fit within 25s outer timeout
baseDelay: 800, // Slightly faster retry delay
maxDelay: 5000, // Lower max delay cap
backoffMultiplier: 2,
retryableErrors: [ERROR_TYPES.NETWORK, ERROR_TYPES.TIMEOUT, ERROR_TYPES.ELEMENT_NOT_FOUND, ERROR_TYPES.DETACHED_FRAME]
};
/**
* Gets the retry configuration for a site, merging site-specific and global settings
* @param {Object} siteConfig - Site configuration object
* @returns {Object} Merged retry configuration
*/
function getRetryConfig(siteConfig) {
return {
maxAttempts: siteConfig.cloudflare_max_retries || RETRY_CONFIG.maxAttempts,
baseDelay: RETRY_CONFIG.baseDelay,
maxDelay: RETRY_CONFIG.maxDelay,
backoffMultiplier: RETRY_CONFIG.backoffMultiplier,
retryableErrors: RETRY_CONFIG.retryableErrors,
retryOnError: siteConfig.cloudflare_retry_on_error !== false // Default to true
};
}
/**
* Detects if we're in a challenge redirect loop by checking URL patterns
*/
function detectChallengeLoop(url, previousUrls = []) {
// Check if current URL contains challenge indicators and we've seen similar URLs
const isChallengeUrl = url.includes('/cdn-cgi/challenge-platform/') ||
url.includes('challenges.cloudflare.com') ||
url.includes('cf-ray');
if (!isChallengeUrl) return false;
// Check if we've seen this exact URL or very similar challenge URLs
const similarUrls = previousUrls.filter(prevUrl => {
if (prevUrl === url) return true; // Exact match
// Check for similar challenge URLs with different ray IDs
if (prevUrl.includes('/cdn-cgi/challenge-platform/') && url.includes('/cdn-cgi/challenge-platform/')) {
return true;
}
return false;
});
return similarUrls.length >= 2; // Loop detected if we've seen similar URLs 2+ times
}
/**
* Performance cache for detection results
* Stores detection results per domain to avoid redundant checks
*/
class CloudflareDetectionCache {
constructor(ttl = 300000) { // 5 minutes TTL by default
this.cache = new Map();
this.ttl = ttl;
this.hits = 0;
this.misses = 0;
// Prevent memory buildup in long-running processes. unref() so the
// interval never prevents the Node process from exiting on its own —
// nwss.js calls cleanup() explicitly on scan completion, but any other
// consumer of this module that forgets to is still safe.
this.cleanupInterval = setInterval(() => this.cleanupExpired(), ttl / 10);
this.cleanupInterval.unref();
}
getCacheKey(url) {
try {
const urlObj = new URL(url);
return urlObj.hostname; // Domain-level caching: all URLs from same host share one entry
} catch {
return url;
}
}
get(url) {
const key = this.getCacheKey(url);
const cached = this.cache.get(key);
if (cached && Date.now() - cached.timestamp < this.ttl) {
this.hits++;
return cached.data;
}
if (cached) {
this.cache.delete(key); // Remove expired entry
}
this.misses++;
return null;
}
set(url, data) {
const key = this.getCacheKey(url);
this.cache.set(key, {
data,
timestamp: Date.now()
});
// Prevent cache from growing too large
if (this.cache.size > 1000) {
const firstKey = this.cache.keys().next().value;
this.cache.delete(firstKey);
}
}
cleanupExpired() {
const now = Date.now();
for (const [key, value] of this.cache.entries()) {
if (now - value.timestamp >= this.ttl) {
this.cache.delete(key);
}
}
}
destroy() {
if (this.cleanupInterval) clearInterval(this.cleanupInterval);
this.clear();
}
clear() {
this.cache.clear();
this.hits = 0;
this.misses = 0;
}
getStats() {
const total = this.hits + this.misses;
return {
hits: this.hits,
misses: this.misses,
hitRate: total > 0 ? (this.hits / total * 100).toFixed(2) + '%' : '0%',
size: this.cache.size
};
}
}
// Initialize cache singleton
const detectionCache = new CloudflareDetectionCache();
// One-shot flag for the per-process module-version banner. Was previously
// logged once per URL in handleCloudflareProtection's debug header, which
// produces N=URL-count copies for no useful signal beyond the first.
let _moduleVersionLogged = false;
/**
* Gets module version information
* @returns {object} Version information object
*/
function getModuleInfo() {
return {
version: CLOUDFLARE_MODULE_VERSION,
name: 'Cloudflare Protection Handler'
};
}
/**
* Validates if a URL should be processed by Cloudflare protection
* Only allows HTTP/HTTPS URLs, skips browser-internal and special protocols
* @param {string} url - URL to validate
* @param {boolean} forceDebug - Debug logging flag
* @returns {boolean} True if URL should be processed
*/
// Single precompiled regex anchored to URL start. Matches any of the
// browser-internal / special protocols we want to skip, plus succeeds on
// http(s):// for the inverse check below. Faster than running 13 sequential
// startsWith comparisons per URL.
const SKIP_PROTO_RE = /^(?:about|chrome|chrome-extension|chrome-error|chrome-search|devtools|edge|moz-extension|safari-extension|webkit|data|blob|javascript|vbscript|file|ftp|ftps):/i;
const HTTP_PROTO_RE = /^https?:\/\//i;
function shouldProcessUrl(url, forceDebug = false) {
if (!url || typeof url !== 'string') {
if (forceDebug) console.log(formatLogMessage('cloudflare', `[url-validation] Skipping invalid URL: ${url}`));
return false;
}
const skipMatch = url.match(SKIP_PROTO_RE);
if (skipMatch) {
if (forceDebug) {
console.log(formatLogMessage('cloudflare', `[url-validation] Skipping ${skipMatch[0].toLowerCase()} URL: ${url.substring(0, 100)}${url.length > 100 ? '...' : ''}`));
}
return false;
}
if (!HTTP_PROTO_RE.test(url)) {
if (forceDebug) {
console.log(formatLogMessage('cloudflare', `[url-validation] Skipping non-HTTP(S) URL: ${url.substring(0, 100)}${url.length > 100 ? '...' : ''}`));
}
return false;
}
return true;
}
/**
* Fast timeout helper for Puppeteer 22.x compatibility
* Replaces deprecated page.waitForTimeout() with standard Promise-based approach
*/
async function waitForTimeout(page, timeout) {
// Use fast Promise-based timeout for Puppeteer 22.x compatibility
// This eliminates the deprecated API dependency and improves performance
return new Promise(resolve => setTimeout(resolve, timeout));
}
/**
* Captures whether the page currently has Cloudflare's two key cookies.
* cf_clearance is the post-challenge clearance token — its presence is the
* single most reliable "did the bypass actually succeed" signal, beating
* any DOM-side completion check. __cf_bm is the bot-mitigation cookie
* (typically set on every request that goes through CF's edge).
* Errors swallowed: cookie read failures should not affect bypass logic.
*/
async function getCfCookieState(page) {
try {
const cookies = await page.cookies();
let cf_clearance = false;
let cf_bm = false;
for (const c of cookies) {
if (c.name === 'cf_clearance') cf_clearance = true;
else if (c.name === '__cf_bm') cf_bm = true;
}
return { cf_clearance, cf_bm };
} catch {
return { cf_clearance: false, cf_bm: false };
}
}
/**
* Maps a handleCloudflareProtection result back to a short outcome tag
* for the per-URL summary log. The tag is grep-friendly (no spaces) so
* users can post-process scan logs by outcome category.
*/
function buildOutcomeString(result, errorCode) {
if (!result) return 'unknown';
if (result.skippedInvalidUrl) return 'skipped(non-http)';
if (result.quickDetectionFailed) return 'detection_failed';
if (result.cloudflareErrorPage) return `error_page(${errorCode || '5xx'})`;
if (result.timedOut) return 'timeout';
if (result.verificationChallenge?.requiresHuman) return 'captcha_required';
if (result.verificationChallenge?.attempted && result.verificationChallenge?.success) {
return `solved(${result.verificationChallenge.method || 'unknown'})`;
}
if (result.phishingWarning?.attempted && result.phishingWarning?.success) {
return 'solved(phishing_continue)';
}
if (result.skippedNoIndicators) return 'no_indicators';
if (!result.overallSuccess) return 'failed';
return 'ok';
}
/**
* Categorizes errors for better handling
*/
function categorizeError(error) {
// Guard against null/undefined error so callers using categorizeError in
// safe-defaults return paths (e.g. safePageEvaluate's final fallback when
// lastError was never assigned) don't blow up reading .message.
if (!error) return ERROR_TYPES.UNKNOWN;
const errorMessage = error.message || '';
if (errorMessage.includes('detached Frame') || errorMessage.includes('Attempted to use detached')) {
return ERROR_TYPES.DETACHED_FRAME;
}
if (errorMessage.includes('timeout') || errorMessage.includes('Timeout')) {
return ERROR_TYPES.TIMEOUT;
}
if (errorMessage.includes('Protocol error') || errorMessage.includes('Target closed')) {
return ERROR_TYPES.NETWORK;
}
if (errorMessage.includes('evaluation') || errorMessage.includes('Evaluation')) {
return ERROR_TYPES.EVALUATION_FAILED;
}
if (errorMessage.includes('navigation') || errorMessage.includes('Navigation')) {
return ERROR_TYPES.NAVIGATION_FAILED;
}
return ERROR_TYPES.UNKNOWN;
}
/**
* Implements exponential backoff delay
*/
function getRetryDelay(attempt) {
const delay = Math.min(
RETRY_CONFIG.baseDelay * Math.pow(RETRY_CONFIG.backoffMultiplier, attempt - 1),
RETRY_CONFIG.maxDelay
);
return delay;
}
/**
* Enhanced safe page evaluation with retry logic and better error handling
*/
async function safePageEvaluate(page, func, timeout = TIMEOUTS.PAGE_EVALUATION_SAFE, options = {}) {
const { maxRetries = RETRY_CONFIG.maxAttempts, forceDebug = false } = options;
let lastError = null;
for (let attempt = 1; attempt <= maxRetries; attempt++) {
let timeoutId = null;
try {
// Multi-layered page state validation
if (page.isClosed()) {
throw new Error('Page is closed or invalid');
}
// Check if page is still navigating or has valid context
let currentUrl;
try {
currentUrl = await page.url();
if (!currentUrl || currentUrl === 'about:blank') {
throw new Error('Page URL is invalid or blank');
}
} catch (urlError) {
throw new Error('Page URL access failed - likely detached');
}
const result = await Promise.race([
page.evaluate(func),
new Promise((_, reject) => {
timeoutId = setTimeout(() => reject(new Error('Page evaluation timeout')), timeout);
})
]);
// Clear timeout if evaluation completed first
if (timeoutId) {
clearTimeout(timeoutId);
}
if (forceDebug && attempt > 1) {
console.log(formatLogMessage('cloudflare', `Page evaluation succeeded on attempt ${attempt}`));
}
return result;
} catch (error) {
// Ensure timeout is cleared on any error
if (timeoutId) {
clearTimeout(timeoutId);
}
lastError = error;
const errorType = categorizeError(error);
if (forceDebug) {
console.warn(formatLogMessage('cloudflare', `Page evaluation failed (attempt ${attempt}/${maxRetries}): ${error.message} [${errorType}]`));
}
// Handle detached frame errors specifically
if (errorType === ERROR_TYPES.DETACHED_FRAME) {
if (forceDebug) {
console.warn(formatLogMessage('cloudflare', `Detached frame detected on attempt ${attempt}/${maxRetries} - using longer delay`));
}
// For detached frames, brief delay before retry
await new Promise(resolve => setTimeout(resolve, 1000));
// For detached frames, only retry once more
if (attempt >= 2) {
break;
}
continue;
}
// Don't retry if error type is not retryable or if it's the last attempt
if (!RETRY_CONFIG.retryableErrors.includes(errorType) || attempt === maxRetries) {
break;
}
// Wait before retrying with exponential backoff
await new Promise(resolve => setTimeout(resolve, getRetryDelay(attempt)));
}
}
// Return safe defaults if all retries failed
return {
isChallengePresent: false,
isPhishingWarning: false,
isTurnstile: false,
isJSChallenge: false,
isChallengeCompleted: false,
error: lastError?.message || 'Unknown error',
errorType: categorizeError(lastError),
attempts: maxRetries
};
}
/**
* Safe element clicking with timeout protection
*/
async function safeClick(page, selector, timeout = TIMEOUTS.CLICK_TIMEOUT) {
let timeoutId;
try {
return await Promise.race([
page.click(selector, { timeout: timeout }),
new Promise((_, reject) => {
timeoutId = setTimeout(() => reject(new Error('Click timeout')), timeout + TIMEOUTS.CLICK_TIMEOUT_BUFFER);
})
]);
} catch (error) {
throw new Error(`Click failed: ${error.message}`);
} finally {
if (timeoutId) clearTimeout(timeoutId);
}
}
/**
* Safe navigation waiting with timeout protection
*/
async function safeWaitForNavigation(page, timeout = TIMEOUTS.NAVIGATION_TIMEOUT) {
let timeoutId;
try {
return await Promise.race([
page.waitForNavigation({ waitUntil: 'domcontentloaded', timeout: timeout }),
new Promise((_, reject) => {
timeoutId = setTimeout(() => reject(new Error('Navigation timeout')), timeout + TIMEOUTS.NAVIGATION_TIMEOUT_BUFFER);
})
]);
} catch (error) {
console.warn(formatLogMessage('cloudflare', `Navigation wait failed: ${error.message}`));
} finally {
if (timeoutId) clearTimeout(timeoutId);
}
}
/**
* Quick Cloudflare detection with caching for performance
*/
async function quickCloudflareDetection(page, forceDebug = false) {
try {
// Get current page URL and validate it
const currentPageUrl = await page.url();
if (!shouldProcessUrl(currentPageUrl, forceDebug)) {
if (forceDebug) {
console.log(formatLogMessage('cloudflare', `Quick detection skipping non-HTTP(S) page: ${currentPageUrl}`));
}
return { hasIndicators: false, skippedInvalidUrl: true };
}
// Check cache first
const cachedResult = detectionCache.get(currentPageUrl);
if (cachedResult !== null) {
if (forceDebug) {
const stats = detectionCache.getStats();
console.log(formatLogMessage('cloudflare', `Using cached detection result (cache hit rate: ${stats.hitRate})`));
}
// Return a fresh shallow copy tagged _fromCache so the handler's
// logging can say "[cached]" instead of presenting cached title/body
// details as if they were fresh.
return { ...cachedResult, _fromCache: true };
}
// Perform actual detection with enhanced error handling
const quickCheck = await safePageEvaluate(page, () => {
const title = document.title || '';
const url = window.location.href;
// Cloudflare-served 5xx origin-error pages (522/523/524/525/526/527/530).
// Title format is reliable: "<domain> | 5xx: <reason>". These are NOT
// bypass-able challenges — the origin is unreachable. Mark as
// recognized (hasErrorPage) but NOT as a bypass target (hasIndicators
// stays false) so the early-skip path still fires and the log can say
// "Cloudflare error page" instead of the misleading "No Cloudflare
// indicators found". errorCode is the captured 5xx digit so outcome
// logs can grep by specific error type.
const titleErrorMatch = title.match(/\|\s*(5\d\d):/);
if (titleErrorMatch) {
return { hasIndicators: false, hasErrorPage: true, errorCode: parseInt(titleErrorMatch[1], 10), title, url, bodySnippet: '' };
}
// FAST PATH: Check title + URL first (string ops, no DOM traversal)
const titleMatch =
title.includes('Just a moment') ||
title.includes('Checking your browser') ||
title.includes('Attention Required') ||
title.includes('Security check');
const urlMatch =
url.includes('/cdn-cgi/challenge-platform/') ||
url.includes('cloudflare.com');
if (titleMatch || urlMatch) {
return { hasIndicators: true, title, url, bodySnippet: '' };
}
// MEDIUM PATH: Combine fast-path selectors into one query — one DOM
// walk for all 7 alternatives instead of up to 7 separate walks.
const selectorMatch = document.querySelector(
'[data-ray], [data-cf-challenge], .cf-challenge-running, .cf-turnstile, .cf-managed-challenge, [data-cf-managed], script[src*="/cdn-cgi/challenge-platform/"]'
);
if (selectorMatch) {
return { hasIndicators: true, title, url, bodySnippet: '' };
}
// SLOW PATH: Extract limited body text only if fast checks failed
// Use body.innerText capped to first child nodes instead of full textContent
let bodyText = '';
if (document.body) {
const el = document.body.querySelector('.main-wrapper, .main-content, #challenge-body-text, .cf-challenge-container');
bodyText = el ? el.textContent.substring(0, 300) : (document.body.firstElementChild ? document.body.firstElementChild.textContent.substring(0, 300) : '');
}
const textMatch =
bodyText.includes('Cloudflare') ||
bodyText.includes('cf-ray') ||
bodyText.includes('Verify you are human') ||
bodyText.includes('This website has been reported for potential phishing') ||
bodyText.includes('Please wait while we verify') ||
bodyText.includes('Checking if the site connection is secure');
// Remaining slower selectors — combined into one query for the same reason.
const slowSelectorMatch = document.querySelector(
'.cf-challenge-container, .ctp-checkbox-container, iframe[src*="challenges.cloudflare.com"], iframe[title*="Cloudflare security challenge"]'
);
// Body-text fallback for error pages with non-standard titles.
// Same rationale as the early title check: recognize but don't bypass.
const bodyErrorMatch = bodyText.match(/Error code (5\d\d)/);
if (bodyErrorMatch && !textMatch && !slowSelectorMatch) {
return { hasIndicators: false, hasErrorPage: true, errorCode: parseInt(bodyErrorMatch[1], 10), title, url, bodySnippet: bodyText.substring(0, 200) };
}
return {
hasIndicators: !!(textMatch || slowSelectorMatch),
title,
url,
bodySnippet: bodyText.substring(0, 200)
};
}, FAST_TIMEOUTS.QUICK_DETECTION, { maxRetries: 1, forceDebug });
// Cache the result
detectionCache.set(currentPageUrl, quickCheck);
if (forceDebug) {
if (quickCheck.hasIndicators) {
console.log(formatLogMessage('cloudflare', `Quick detection found Cloudflare indicators on ${quickCheck.url}`));
}
// hasErrorPage and no-indicators cases are deliberately silent here —
// handleCloudflareProtection prints a clearer per-action line right
// after ("Cloudflare error page detected..." or "No Cloudflare
// indicators found, skipping protection handling..."), so logging
// here would just duplicate it.
if (quickCheck.attempts && quickCheck.attempts > 1) {
console.log(formatLogMessage('cloudflare', `Detection required ${quickCheck.attempts} attempts`));
}
}
return quickCheck;
} catch (error) {
if (forceDebug) console.log(formatLogMessage('cloudflare', `Quick detection failed: ${error.message}`));
return { hasIndicators: false, error: error.message };
}
}
/**
* Analyzes the current page to detect Cloudflare challenges - Enhanced with timeout protection and detailed debug logging
*/
async function analyzeCloudflareChallenge(page) {
try {
// CDP-level frame check -- bypasses closed shadow roots
const frames = page.frames();
const hasChallengeFrame = frames.some(f => {
const url = f.url();
return url.includes('challenges.cloudflare.com') || url.includes('/cdn-cgi/challenge-platform/');
});
const result = await safePageEvaluate(page, () => {
const title = document.title || '';
// Cap text extraction -- on content-heavy pages body.textContent can be megabytes
const bodyText = document.body ? document.body.textContent.substring(0, 2000) : '';
// Updated selectors for 2025 Cloudflare challenges. Each category groups
// its alternatives into a single comma-separated selector so the browser
// walks the DOM once per category instead of once per alternative.
const hasTurnstileIframe = !!document.querySelector(
'iframe[title*="Cloudflare security challenge"], iframe[src*="challenges.cloudflare.com"], iframe[title*="Widget containing a Cloudflare"]'
);
const hasTurnstileContainer = !!document.querySelector(
'.cf-turnstile, .ctp-checkbox-container, .ctp-checkbox-label'
);
const hasTurnstileCheckbox = !!document.querySelector(
'input[type="checkbox"].ctp-checkbox, .ctp-checkbox'
);
const hasLegacyCheckbox = !!document.querySelector(
'input[type="checkbox"]#challenge-form, input[type="checkbox"][name="cf_captcha_kind"]'
);
const hasChallengeRunning = !!document.querySelector(
'.cf-challenge-running, .cf-challenge-container, .challenge-stage, .challenge-form'
);
const hasDataRay = !!document.querySelector('[data-ray], [data-cf-challenge]');
const hasCaptcha = bodyText.includes('CAPTCHA') || bodyText.includes('captcha') ||
bodyText.includes('hCaptcha') || bodyText.includes('reCAPTCHA');
const hasJSChallenge = document.querySelector('script[src*="/cdn-cgi/challenge-platform/"]') !== null ||
bodyText.includes('Checking your browser') ||
bodyText.includes('Please wait while we verify');
const hasPhishingWarning = bodyText.includes('This website has been reported for potential phishing') ||
title.includes('Attention Required');
const hasTurnstileResponse = document.querySelector('input[name="cf-turnstile-response"]') !== null;
const isChallengeCompleted = hasTurnstileResponse &&
document.querySelector('input[name="cf-turnstile-response"]')?.value;
const isChallengePresent = title.includes('Just a moment') ||
title.includes('Checking your browser') ||
bodyText.includes('Verify you are human') ||
hasLegacyCheckbox ||
hasChallengeRunning ||
hasDataRay ||
hasTurnstileIframe ||
hasTurnstileContainer ||
hasJSChallenge;
return {
isChallengePresent,
isPhishingWarning: hasPhishingWarning,
isTurnstile: hasTurnstileIframe || hasTurnstileContainer || hasTurnstileCheckbox,
isJSChallenge: hasJSChallenge,
isChallengeCompleted,
title,
hasLegacyCheckbox,
hasTurnstileIframe,
hasTurnstileContainer,
hasTurnstileCheckbox,
hasChallengeRunning,
hasDataRay,
hasCaptcha,
hasTurnstileResponse,
url: window.location.href,
bodySnippet: bodyText.substring(0, 200)
};
}, TIMEOUTS.PAGE_EVALUATION);
// Merge CDP frame detection -- catches iframes behind closed shadow roots
if (hasChallengeFrame && !result.hasTurnstileIframe) {
result.hasTurnstileIframe = true;
result.isTurnstile = true;
result.isChallengePresent = true;
}
return result;
} catch (error) {
return {
isChallengePresent: false,
isPhishingWarning: false,
isTurnstile: false,
isJSChallenge: false,
isChallengeCompleted: false,
error: error.message
};
}
}
/**
* Handles Cloudflare phishing warnings with timeout protection and enhanced debug logging
*
* @param {Object} page - Puppeteer page instance
* @param {string} currentUrl - URL being processed
* @param {boolean} forceDebug - Debug logging flag
* @returns {Promise<Object>} Phishing warning result:
* {
* success: boolean, // True if no warning found OR successfully bypassed
* attempted: boolean, // True if warning was detected and bypass attempted
* error: string|null, // Error message if bypass failed
* details: object|null // Analysis details from analyzeCloudflareChallenge()
* }
*/
async function handlePhishingWarning(page, currentUrl, forceDebug = false) {
const result = {
success: false,
attempted: false,
error: null,
details: null
};
try {
if (forceDebug) console.log(formatLogMessage('cloudflare', `Checking for phishing warning on ${currentUrl}`));
// Shorter wait with timeout protection
await waitForTimeout(page, FAST_TIMEOUTS.PHISHING_WAIT);
const challengeInfo = await analyzeCloudflareChallenge(page);
if (challengeInfo.isPhishingWarning) {
result.attempted = true;
result.details = challengeInfo;
if (forceDebug) {
console.log(formatLogMessage('cloudflare', `Phishing warning detected on ${currentUrl}:`));
console.log(formatLogMessage('cloudflare', ` Page Title: "${challengeInfo.title}"`));
console.log(formatLogMessage('cloudflare', ` Current URL: ${challengeInfo.url}`));
console.log(formatLogMessage('cloudflare', ` Body snippet: ${challengeInfo.bodySnippet}`));
}
try {
// Use safe click with shorter timeout
await safeClick(page, 'a[href*="continue"]', TIMEOUTS.PHISHING_CLICK);
await safeWaitForNavigation(page, TIMEOUTS.PHISHING_NAVIGATION);
result.success = true;
if (forceDebug) console.log(formatLogMessage('cloudflare', `Successfully bypassed phishing warning for ${currentUrl}`));
} catch (clickError) {
result.error = `Failed to click continue button: ${clickError.message}`;
if (forceDebug) console.log(formatLogMessage('cloudflare', `Failed to bypass phishing warning: ${clickError.message}`));
}
} else {
if (forceDebug) console.log(formatLogMessage('cloudflare', `No phishing warning detected on ${currentUrl}`));
result.success = true; // No warning to handle
}
} catch (error) {
result.error = error.message;
if (forceDebug) console.log(formatLogMessage('cloudflare', `Phishing warning check failed for ${currentUrl}: ${error.message}`));
}
return result;
}
/**
* Attempts to solve Cloudflare challenges with timeout protection and enhanced debug logging
*
* @param {Object} page - Puppeteer page instance
* @param {string} currentUrl - URL being processed
* @param {boolean} forceDebug - Debug logging flag
* @returns {Promise<Object>} Challenge verification result:
* {
* success: boolean, // True if no challenge found OR successfully solved
* attempted: boolean, // True if challenge was detected and solving attempted
* error: string|null, // Error message if solving failed
* requiresHuman: boolean, // True if CAPTCHA detected (requires manual intervention)
* method: string|null, // Method that succeeded: 'js_challenge_wait', 'turnstile', 'legacy_checkbox'
* details: object|null // Analysis details from analyzeCloudflareChallenge()
* }
*/
async function handleVerificationChallenge(page, currentUrl, forceDebug = false) {
const result = {
success: false,
attempted: false,
error: null,
details: null,
requiresHuman: false,
method: null
};
try {
if (forceDebug) console.log(formatLogMessage('cloudflare', `Checking for verification challenge on ${currentUrl}`));
// Reduced wait time
await waitForTimeout(page, FAST_TIMEOUTS.CHALLENGE_WAIT);
const challengeInfo = await analyzeCloudflareChallenge(page);
result.details = challengeInfo;
if (challengeInfo.isChallengePresent) {
result.attempted = true;
if (forceDebug) {
console.log(formatLogMessage('cloudflare', `Challenge detected on ${currentUrl}:`));
console.log(formatLogMessage('cloudflare', ` Page Title: "${challengeInfo.title}"`));
console.log(formatLogMessage('cloudflare', ` Current URL: ${challengeInfo.url}`));
console.log(formatLogMessage('cloudflare', ` Is Turnstile: ${challengeInfo.isTurnstile}`));
console.log(formatLogMessage('cloudflare', ` Is JS Challenge: ${challengeInfo.isJSChallenge}`));
console.log(formatLogMessage('cloudflare', ` Has Legacy Checkbox: ${challengeInfo.hasLegacyCheckbox}`));
console.log(formatLogMessage('cloudflare', ` Has Turnstile Iframe: ${challengeInfo.hasTurnstileIframe}`));
console.log(formatLogMessage('cloudflare', ` Has Turnstile Container: ${challengeInfo.hasTurnstileContainer}`));
console.log(formatLogMessage('cloudflare', ` Has Turnstile Checkbox: ${challengeInfo.hasTurnstileCheckbox}`));
console.log(formatLogMessage('cloudflare', ` Has CAPTCHA: ${challengeInfo.hasCaptcha}`));
console.log(formatLogMessage('cloudflare', ` Has Challenge Running: ${challengeInfo.hasChallengeRunning}`));
console.log(formatLogMessage('cloudflare', ` Has Data Ray: ${challengeInfo.hasDataRay}`));
console.log(formatLogMessage('cloudflare', ` Has Turnstile Response: ${challengeInfo.hasTurnstileResponse}`));
console.log(formatLogMessage('cloudflare', ` Body snippet: ${challengeInfo.bodySnippet}`));
}
// Check for CAPTCHA that requires human intervention
if (challengeInfo.hasCaptcha) {
result.requiresHuman = true;
result.error = 'CAPTCHA detected - requires human intervention';
if (forceDebug) console.log(formatLogMessage('cloudflare', `Skipping automatic bypass due to CAPTCHA requirement`));
return result;
}
// Attempt to solve the challenge with timeout protection
const solveResult = await attemptChallengeSolveWithTimeout(page, currentUrl, challengeInfo, forceDebug);
result.success = solveResult.success;
result.error = solveResult.error;
result.method = solveResult.method;
} else {
if (forceDebug) console.log(formatLogMessage('cloudflare', `No verification challenge detected on ${currentUrl}`));
result.success = true;
}
} catch (error) {
result.error = error.message;
if (forceDebug) console.log(formatLogMessage('cloudflare', `Challenge check failed for ${currentUrl}: ${error.message}`));
}
return result;
}
/**
* Enhanced challenge handling with retry logic and loop detection
*/
async function handleVerificationChallengeWithRetries(page, currentUrl, siteConfig, forceDebug = false) {
const retryConfig = getRetryConfig(siteConfig);
const visitedUrls = []; // Track URLs to detect redirect loops
let lastError = null;
if (forceDebug) {
console.log(formatLogMessage('cloudflare', `Starting verification challenge with max ${retryConfig.maxAttempts} attempts`));
}
for (let attempt = 1; attempt <= retryConfig.maxAttempts; attempt++) {
try {
const currentPageUrl = await page.url();
visitedUrls.push(currentPageUrl);
// Check for redirect loops
if (detectChallengeLoop(currentPageUrl, visitedUrls)) {
const error = `Challenge redirect loop detected after ${attempt} attempts. URLs: ${visitedUrls.slice(-3).join(' -> ')}`;
if (forceDebug) {
console.log(formatLogMessage('cloudflare', error));
}
return {
success: false,
attempted: true,
error: error,
details: null,
requiresHuman: false,
method: null,
attempts: attempt,
loopDetected: true
};
}
if (forceDebug && attempt > 1) {
console.log(formatLogMessage('cloudflare', `Challenge attempt ${attempt}/${retryConfig.maxAttempts} for ${currentUrl}`));
}
const result = await handleVerificationChallenge(page, currentUrl, forceDebug);
if (result.success || result.requiresHuman || !retryConfig.retryOnError) {
if (forceDebug && attempt > 1) {
console.log(`[debug][cloudflare] Challenge ${result.success ? 'succeeded' : 'failed'} on attempt ${attempt}`);
}
return { ...result, attempts: attempt };
}
// If this wasn't the last attempt, wait before retrying
if (attempt < retryConfig.maxAttempts) {
const delay = getRetryDelay(attempt);
if (forceDebug) {
console.log(formatLogMessage('cloudflare', `Challenge attempt ${attempt} failed, retrying in ${delay}ms: ${result.error}`));
}
await new Promise(resolve => setTimeout(resolve, delay));
// Refresh the page to get a fresh challenge
try {
await page.reload({ waitUntil: 'domcontentloaded', timeout: 10000 });
await waitForTimeout(page, 2000); // Give challenge time to load
} catch (reloadErr) {
if (forceDebug) {
console.log(formatLogMessage('cloudflare', `Page reload failed on attempt ${attempt}: ${reloadErr.message}`));
}
}
}
lastError = result.error;
} catch (error) {
lastError = error.message;
const errorType = categorizeError(error);
if (forceDebug) {
console.warn(formatLogMessage('cloudflare', `Challenge attempt ${attempt}/${retryConfig.maxAttempts} failed: ${error.message} [${errorType}]`));
}
// Don't retry if error type is not retryable or if it's the last attempt
if (!retryConfig.retryableErrors.includes(errorType) || attempt === retryConfig.maxAttempts) {
return {
success: false,
attempted: true,
error: lastError,
details: null,
requiresHuman: false,
method: null,
attempts: attempt,
errorType: errorType
};
}
// Wait before retrying with exponential backoff
if (attempt < retryConfig.maxAttempts) {
await new Promise(resolve => setTimeout(resolve, getRetryDelay(attempt)));
}
}
}
return {
success: false,
attempted: true,
error: `All ${retryConfig.maxAttempts} challenge attempts failed. Last error: ${lastError}`,
details: null,
requiresHuman: false,
method: null,
attempts: retryConfig.maxAttempts,
maxRetriesExceeded: true
};
}
/**
* Enhanced phishing warning handling with retry logic
*/
async function handlePhishingWarningWithRetries(page, currentUrl, siteConfig, forceDebug = false) {
const retryConfig = getRetryConfig(siteConfig);
let lastError = null;
for (let attempt = 1; attempt <= retryConfig.maxAttempts; attempt++) {
try {
if (forceDebug && attempt > 1) {
console.log(formatLogMessage('cloudflare', `Phishing warning attempt ${attempt}/${retryConfig.maxAttempts} for ${currentUrl}`));
}
const result = await handlePhishingWarning(page, currentUrl, forceDebug);
if (result.success || !retryConfig.retryOnError) {
if (forceDebug && attempt > 1) {
console.log(`[debug][cloudflare] Phishing warning ${result.success ? 'succeeded' : 'failed'} on attempt ${attempt}`);
}
return { ...result, attempts: attempt };
}
// If this wasn't the last attempt, wait before retrying
if (attempt < retryConfig.maxAttempts) {
const delay = getRetryDelay(attempt);
if (forceDebug) {
console.log(formatLogMessage('cloudflare', `Phishing warning attempt ${attempt} failed, retrying in ${delay}ms: ${result.error}`));
}
await new Promise(resolve => setTimeout(resolve, delay));
}
lastError = result.error;
} catch (error) {
lastError = error.message;
const errorType = categorizeError(error);
if (forceDebug) {
console.warn(formatLogMessage('cloudflare', `Phishing warning attempt ${attempt}/${retryConfig.maxAttempts} failed: ${error.message} [${errorType}]`));
}
// Don't retry if error type is not retryable or if it's the last attempt
if (!retryConfig.retryableErrors.includes(errorType) || attempt === retryConfig.maxAttempts) {
return {
success: false,
attempted: true,
error: lastError,
details: null,
attempts: attempt,
errorType: errorType
};
}
// Wait before retrying with exponential backoff
if (attempt < retryConfig.maxAttempts) {
await new Promise(resolve => setTimeout(resolve, getRetryDelay(attempt)));
}
}
}
return {
success: false,
attempted: true,
error: `All ${retryConfig.maxAttempts} phishing warning attempts failed. Last error: ${lastError}`,
details: null,
attempts: retryConfig.maxAttempts,
maxRetriesExceeded: true
};
}
/**
* Challenge solving with overall timeout protection
*/
async function attemptChallengeSolveWithTimeout(page, currentUrl, challengeInfo, forceDebug = false) {
const result = {
success: false,
error: null,
method: null
};
let timeoutId = null;
try {
const timeoutPromise = new Promise((_, reject) => {
timeoutId = setTimeout(() => reject(new Error('Challenge solving timeout')), FAST_TIMEOUTS.CHALLENGE_SOLVING);
});
// Reduced timeout for challenge solving
const finalResult = await Promise.race([
attemptChallengeSolve(page, currentUrl, challengeInfo, forceDebug),
timeoutPromise
]);
// Clear timeout if operation completed first
if (timeoutId) {
clearTimeout(timeoutId);
}
return finalResult;
} catch (error) {
// Clear timeout on error
if (timeoutId) {
clearTimeout(timeoutId);
}
result.error = `Challenge solving timed out: ${error.message}`;
if (forceDebug) console.log(formatLogMessage('cloudflare', `Challenge solving timeout for ${currentUrl}`));
return result;
}
}
/**
* Attempts to solve a Cloudflare challenge with modern techniques and enhanced debug logging
*/
async function attemptChallengeSolve(page, currentUrl, challengeInfo, forceDebug = false) {
const result = {
success: false,
error: null,
method: null
};
// Method 1: Handle JS challenges (wait for automatic completion) - Most reliable
if (challengeInfo.isJSChallenge) {
try {
if (forceDebug) console.log(formatLogMessage('cloudflare', `Attempting JS challenge wait for ${currentUrl}`));
const jsResult = await waitForJSChallengeCompletion(page, forceDebug);
if (jsResult.success) {
// Wait for redirect after challenge completion
try {
await page.waitForNavigation({ waitUntil: 'domcontentloaded', timeout: 10000 });
if (forceDebug) console.log(formatLogMessage('cloudflare', `Post-challenge redirect completed for ${currentUrl}`));
} catch (navErr) {
if (forceDebug) console.log(formatLogMessage('cloudflare', `Post-challenge redirect timeout (may already be on target page): ${navErr.message}`));
}
result.success = true;
result.method = 'js_challenge_wait';
if (forceDebug) console.log(formatLogMessage('cloudflare', `JS challenge completed successfully for ${currentUrl}`));
return result;
}
} catch (jsError) {
if (forceDebug) console.log(formatLogMessage('cloudflare', `JS challenge wait failed for ${currentUrl}: ${jsError.message}`));
}
} else if (forceDebug) {
console.log(formatLogMessage('cloudflare', `Skipping JS challenge method (not detected)`));
}
// Method 2: Handle Turnstile challenges (interactive)
if (challengeInfo.isTurnstile) {
try {
if (forceDebug) console.log(formatLogMessage('cloudflare', `Attempting Turnstile method for ${currentUrl}`));
const turnstileResult = await handleTurnstileChallenge(page, forceDebug);
if (turnstileResult.success) {
result.success = true;
result.method = 'turnstile';
if (forceDebug) console.log(formatLogMessage('cloudflare', `Turnstile challenge solved successfully for ${currentUrl}`));
return result;
}
} catch (turnstileError) {
if (forceDebug) console.log(formatLogMessage('cloudflare', `Turnstile method failed for ${currentUrl}: ${turnstileError.message}`));
}
} else if (forceDebug) {
console.log(formatLogMessage('cloudflare', `Skipping Turnstile method (not detected)`));
}
// Method 3: Legacy checkbox interaction (fallback)
if (challengeInfo.hasLegacyCheckbox) {
try {
if (forceDebug) console.log(formatLogMessage('cloudflare', `Attempting legacy checkbox method for ${currentUrl}`));
const legacyResult = await handleLegacyCheckbox(page, forceDebug);
if (legacyResult.success) {
result.success = true;
result.method = 'legacy_checkbox';
if (forceDebug) console.log(formatLogMessage('cloudflare', `Legacy checkbox method succeeded for ${currentUrl}`));
return result;
}
} catch