@fanboynz/network-scanner
Version:
A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.
385 lines (338 loc) • 14.7 kB
JavaScript
// redirect.js - Enhanced redirect handling module for nwss.js
// Handles HTTP redirects, JavaScript redirects, meta refresh, and delayed redirects
/**
* Enhanced navigation with comprehensive redirect detection including JavaScript redirects
* @param {Page} page - Puppeteer page instance
* @param {string} currentUrl - Original URL to navigate to
* @param {object} siteConfig - Site configuration
* @param {object} gotoOptions - Computed goto options from existing logic
* @param {boolean} forceDebug - Debug logging flag
* @param {Function} formatLogMessage - Log formatting function from main script
* @returns {Promise<{finalUrl: string, redirected: boolean, redirectChain: string[]}>}
*/
async function navigateWithRedirectHandling(page, currentUrl, siteConfig, gotoOptions = {}, forceDebug = false, formatLogMessage) {
const redirectChain = [currentUrl];
let finalUrl = currentUrl;
let redirected = false;
const jsRedirectTimeout = siteConfig.js_redirect_timeout || 5000; // Wait 5s for JS redirects
const maxRedirects = siteConfig.max_redirects || 10;
const detectJSPatterns = siteConfig.detect_js_patterns !== false; // Default to true
// Monitor frame navigations to detect redirects
const navigationHandler = (frame) => {
if (frame === page.mainFrame()) {
const frameUrl = frame.url();
if (frameUrl && frameUrl !== 'about:blank' && !redirectChain.includes(frameUrl)) {
// Check redirect limit before adding
if (redirectChain.length >= maxRedirects) {
if (forceDebug) {
console.log(formatLogMessage('debug', `Maximum redirects (${maxRedirects}) reached, stopping redirect chain`));
}
return; // Stop processing more redirects
}
redirectChain.push(frameUrl);
finalUrl = frameUrl;
redirected = true;
if (forceDebug) {
console.log(formatLogMessage('debug', `Frame navigation detected: ${frameUrl}`));
}
}
}
};
// Monitor JavaScript redirects by intercepting location changes
const jsRedirectDetector = async () => {
try {
await page.evaluateOnNewDocument(() => {
// Store original location methods
const originalReplace = window.location.replace;
const originalAssign = window.location.assign;
const originalHrefSetter = Object.getOwnPropertyDescriptor(window.location, 'href')?.set;
// Flag to track if JS redirect occurred
window._jsRedirectDetected = false;
window._jsRedirectUrl = null;
window._jsRedirectType = null;
// Intercept location.replace()
window.location.replace = function(url) {
window._jsRedirectDetected = true;
window._jsRedirectUrl = url;
window._jsRedirectType = 'location.replace';
console.log('[jsRedirect] location.replace:', url);
return originalReplace.call(this, url);
};
// Intercept location.assign()
window.location.assign = function(url) {
window._jsRedirectDetected = true;
window._jsRedirectUrl = url;
window._jsRedirectType = 'location.assign';
console.log('[jsRedirect] location.assign:', url);
return originalAssign.call(this, url);
};
// Intercept location.href setter
if (originalHrefSetter) {
Object.defineProperty(window.location, 'href', {
set: function(url) {
window._jsRedirectDetected = true;
window._jsRedirectUrl = url;
window._jsRedirectType = 'location.href';
console.log('[jsRedirect] location.href set:', url);
return originalHrefSetter.call(this, url);
},
get: function() {
return window.location.toString();
}
});
}
// Monitor meta refresh redirects
const observer = new MutationObserver((mutations) => {
mutations.forEach((mutation) => {
mutation.addedNodes.forEach((node) => {
if (node.nodeName === 'META' &&
node.getAttribute &&
node.getAttribute('http-equiv') === 'refresh') {
const content = node.getAttribute('content');
if (content) {
window._jsRedirectDetected = true;
window._jsRedirectUrl = content;
window._jsRedirectType = 'meta.refresh';
console.log('[jsRedirect] meta refresh:', content);
}
}
});
});
});
// Start observing when DOM is ready
if (document.head) {
observer.observe(document.head, { childList: true, subtree: true });
} else {
document.addEventListener('DOMContentLoaded', () => {
if (document.head) {
observer.observe(document.head, { childList: true, subtree: true });
}
});
}
});
} catch (jsErr) {
if (forceDebug) {
console.log(formatLogMessage('debug', `Failed to inject JS redirect detector: ${jsErr.message}`));
}
}
};
try {
// Set up event listeners
page.on('framenavigated', navigationHandler);
// Inject JavaScript redirect detection
await jsRedirectDetector();
if (forceDebug && Object.keys(gotoOptions).length > 0) {
console.log(formatLogMessage('debug', `Using goto options: ${JSON.stringify(gotoOptions)}`));
}
// Initial navigation
const response = await page.goto(currentUrl, gotoOptions);
if (response && response.url() !== currentUrl) {
// Check redirect limit before adding
if (redirectChain.length >= maxRedirects) {
if (forceDebug) {
console.log(formatLogMessage('debug', `Maximum redirects (${maxRedirects}) reached during HTTP redirect`));
}
finalUrl = currentUrl; // Keep original URL
} else {
finalUrl = response.url();
redirected = true;
if (!redirectChain.includes(finalUrl)) redirectChain.push(finalUrl);
}
if (forceDebug) {
console.log(formatLogMessage('debug', `HTTP redirect detected: ${currentUrl} ? ${finalUrl}`));
}
}
// Wait for potential JavaScript redirects
if (forceDebug) {
console.log(formatLogMessage('debug', `Waiting ${jsRedirectTimeout}ms for potential JavaScript redirects...`));
}
let jsRedirectAttempts = 0;
const maxJsRedirectAttempts = 3;
while (jsRedirectAttempts < maxJsRedirectAttempts) {
await new Promise(resolve => setTimeout(resolve, jsRedirectTimeout / maxJsRedirectAttempts));
try {
// Check for JavaScript redirect detection
const jsRedirectResult = await page.evaluate(() => {
return {
detected: window._jsRedirectDetected || false,
url: window._jsRedirectUrl || null,
type: window._jsRedirectType || null,
currentUrl: window.location.href
};
});
// Check if URL changed (either through JS redirect or automatic redirect)
const currentPageUrl = page.url();
if (currentPageUrl && currentPageUrl !== finalUrl && !redirectChain.includes(currentPageUrl)) {
// Check redirect limit before adding
if (redirectChain.length >= maxRedirects) {
if (forceDebug) {
console.log(formatLogMessage('debug', `Maximum redirects (${maxRedirects}) reached during JS redirect detection`));
}
break; // Stop processing more redirects
}
redirectChain.push(currentPageUrl);
finalUrl = currentPageUrl;
redirected = true;
if (forceDebug) {
if (jsRedirectResult.detected) {
console.log(formatLogMessage('debug', `JavaScript redirect detected (${jsRedirectResult.type}): ${jsRedirectResult.url || currentPageUrl}`));
} else {
console.log(formatLogMessage('debug', `URL change detected: ${currentPageUrl}`));
}
}
}
// If JS redirect was explicitly detected but URL hasn't changed yet, wait a bit more
if (jsRedirectResult.detected && !redirected) {
if (forceDebug) {
console.log(formatLogMessage('debug', `JS redirect detected (${jsRedirectResult.type}) but not yet executed, waiting...`));
}
jsRedirectAttempts++;
continue;
}
// If no new redirects detected, break out of loop
if (!jsRedirectResult.detected) {
break;
}
} catch (evalErr) {
if (forceDebug) {
console.log(formatLogMessage('debug', `Error checking JS redirects: ${evalErr.message}`));
}
break;
}
jsRedirectAttempts++;
}
// Optional: Detect common JavaScript redirect patterns in page source
if (detectJSPatterns) {
await detectCommonJSRedirects(page, forceDebug, formatLogMessage);
}
// Final URL check
const finalPageUrl = page.url();
if (finalPageUrl && finalPageUrl !== finalUrl) {
// Check redirect limit before final update
if (redirectChain.length >= maxRedirects) {
if (forceDebug) {
console.log(formatLogMessage('debug', `Maximum redirects (${maxRedirects}) reached, keeping current finalUrl`));
}
} else {
finalUrl = finalPageUrl;
redirected = true;
if (!redirectChain.includes(finalUrl)) {
redirectChain.push(finalUrl);
}
}
}
} finally {
page.off('framenavigated', navigationHandler);
}
// Log redirect summary
if (redirected && forceDebug) {
console.log(formatLogMessage('debug', `Redirect chain: ${redirectChain.join(' ? ')}`));
}
// Extract redirect domains to exclude from matching
let redirectDomains = [];
if (redirected && redirectChain.length > 1) {
// Get all intermediate domains (exclude the final domain)
const intermediateDomains = redirectChain.slice(0, -1).map(url => {
try {
return new URL(url).hostname;
} catch {
return null;
}
}).filter(Boolean);
redirectDomains = intermediateDomains;
}
return { finalUrl, redirected, redirectChain, originalUrl: currentUrl, redirectDomains };
}
/**
* Detect common JavaScript redirect patterns in page source
* @param {Page} page - Puppeteer page instance
* @param {boolean} forceDebug - Debug logging flag
* @param {Function} formatLogMessage - Log formatting function
* @returns {Promise<Array>} Array of detected patterns
*/
async function detectCommonJSRedirects(page, forceDebug = false, formatLogMessage) {
try {
const redirectPatterns = await page.evaluate(() => {
const patterns = [];
// Check for common redirect patterns in page source
const pageSource = document.documentElement.outerHTML;
// Pattern 1: window.location = "url"
const locationAssign = pageSource.match(/window\.location\s*=\s*["']([^"']+)["']/g);
if (locationAssign) {
patterns.push({ type: 'window.location assignment', matches: locationAssign });
}
// Pattern 2: location.href = "url"
const hrefAssign = pageSource.match(/location\.href\s*=\s*["']([^"']+)["']/g);
if (hrefAssign) {
patterns.push({ type: 'location.href assignment', matches: hrefAssign });
}
// Pattern 3: setTimeout redirects
const timeoutRedirect = pageSource.match(/setTimeout\s*\([^)]*location[^)]*\)/g);
if (timeoutRedirect) {
patterns.push({ type: 'setTimeout redirect', matches: timeoutRedirect });
}
// Pattern 4: Meta refresh
const metaRefresh = document.querySelector('meta[http-equiv="refresh"]');
if (metaRefresh) {
patterns.push({ type: 'meta refresh', content: metaRefresh.getAttribute('content') });
}
// Pattern 5: document.location redirects
const docLocationAssign = pageSource.match(/document\.location\s*=\s*["']([^"']+)["']/g);
if (docLocationAssign) {
patterns.push({ type: 'document.location assignment', matches: docLocationAssign });
}
return patterns;
});
if (redirectPatterns.length > 0 && forceDebug) {
console.log(formatLogMessage('debug', `Found ${redirectPatterns.length} potential JS redirect pattern(s):`));
redirectPatterns.forEach((pattern, idx) => {
console.log(formatLogMessage('debug', ` [${idx + 1}] ${pattern.type}: ${JSON.stringify(pattern.matches || pattern.content)}`));
});
}
return redirectPatterns;
} catch (detectErr) {
if (forceDebug) {
console.log(formatLogMessage('debug', `Error detecting JS redirect patterns: ${detectErr.message}`));
}
return [];
}
}
/**
* Enhanced timeout error handling for partial redirects
* @param {Page} page - Puppeteer page instance
* @param {string} originalUrl - Original URL that was requested
* @param {Error} error - Navigation timeout error
* @param {Function} safeGetDomain - Domain extraction function
* @param {boolean} forceDebug - Debug logging flag
* @param {Function} formatLogMessage - Log formatting function
* @returns {Promise<{success: boolean, finalUrl: string, redirected: boolean}>}
*/
async function handleRedirectTimeout(page, originalUrl, error, safeGetDomain, forceDebug = false, formatLogMessage) {
if (!error.message.includes('Navigation timeout')) {
return { success: false, finalUrl: originalUrl, redirected: false };
}
try {
const currentPageUrl = page.url();
if (currentPageUrl && currentPageUrl !== 'about:blank' && currentPageUrl !== originalUrl) {
const originalDomain = safeGetDomain(originalUrl);
const currentDomain = safeGetDomain(currentPageUrl);
if (originalDomain !== currentDomain) {
if (forceDebug) {
console.log(formatLogMessage('debug', `Partial redirect timeout recovered: ${originalDomain} ? ${currentDomain}`));
}
return { success: true, finalUrl: currentPageUrl, redirected: true };
}
}
return { success: false, finalUrl: originalUrl, redirected: false };
} catch (urlError) {
if (forceDebug) {
console.log(formatLogMessage('debug', `Error during timeout recovery: ${urlError.message}`));
}
return { success: false, finalUrl: originalUrl, redirected: false };
}
}
module.exports = {
navigateWithRedirectHandling,
detectCommonJSRedirects,
handleRedirectTimeout
};