@fanboynz/network-scanner
Version:
A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.
349 lines (308 loc) • 11.4 kB
JavaScript
// === Clear Site Data Module ===
// Handles comprehensive site data clearing via CDP and page-level fallbacks
// Resolves SecurityError issues with localStorage/sessionStorage access
const { formatLogMessage } = require('./colorize');
/**
* Clears site data using CDP (bypasses same-origin restrictions)
* @param {Page} page - Puppeteer page instance
* @param {string} currentUrl - URL being processed
* @param {boolean} forceDebug - Debug logging flag
* @param {boolean} quickMode - If true, only clear cache/cookies (for reloads)
* @returns {Promise<{success: boolean, operations: string[]}>}
*/
async function clearSiteDataViaCDP(page, currentUrl, forceDebug, quickMode = false) {
let clearDataSession = null;
const completedOperations = [];
try {
clearDataSession = await Promise.race([
page.target().createCDPSession(),
new Promise((_, reject) =>
setTimeout(() => reject(new Error('CDP session timeout')), 10000)
)
]);
const origin = new URL(currentUrl).origin;
// Always clear cache and cookies (even in quick mode)
const basicOperations = [
{ cmd: 'Network.clearBrowserCookies', name: 'cookies' },
{ cmd: 'Network.clearBrowserCache', name: 'cache' }
];
for (const op of basicOperations) {
try {
await clearDataSession.send(op.cmd);
completedOperations.push(op.name);
} catch (opErr) {
if (forceDebug) {
console.log(formatLogMessage('debug', `[clear_sitedata] ${op.name} clear failed: ${opErr.message}`));
}
}
}
// Full storage clearing (skip in quick mode for reloads)
if (!quickMode) {
// Try comprehensive storage clearing first
try {
await clearDataSession.send('Storage.clearDataForOrigin', {
origin: origin,
storageTypes: 'all'
});
completedOperations.push('all_storage');
} catch (allStorageErr) {
// Fallback: try individual storage types
const storageTypes = [
{ type: 'local_storage', name: 'localStorage' },
{ type: 'session_storage', name: 'sessionStorage' },
{ type: 'indexeddb', name: 'indexedDB' },
{ type: 'websql', name: 'webSQL' },
{ type: 'service_workers', name: 'serviceWorkers' }
];
for (const storage of storageTypes) {
try {
await clearDataSession.send('Storage.clearDataForOrigin', {
origin: origin,
storageTypes: storage.type
});
completedOperations.push(storage.name);
} catch (individualErr) {
if (forceDebug) {
console.log(formatLogMessage('debug', `[clear_sitedata] ${storage.name} clear failed: ${individualErr.message}`));
}
}
}
}
}
if (forceDebug && completedOperations.length > 0) {
console.log(formatLogMessage('debug', `[clear_sitedata] CDP cleared: ${completedOperations.join(', ')}`));
}
return { success: completedOperations.length > 0, operations: completedOperations };
} catch (cdpErr) {
if (forceDebug) {
console.log(formatLogMessage('debug', `[clear_sitedata] CDP session failed: ${cdpErr.message}`));
}
return { success: false, operations: completedOperations };
} finally {
if (clearDataSession) {
try {
await clearDataSession.detach();
} catch (detachErr) {
// Ignore detach errors
}
}
}
}
/**
* Fallback page-level clearing with security error handling
* @param {Page} page - Puppeteer page instance
* @param {boolean} forceDebug - Debug logging flag
* @returns {Promise<{success: boolean, operations: string[]}>}
*/
async function clearSiteDataViaPage(page, forceDebug) {
try {
const result = await page.evaluate(() => {
const cleared = [];
// Test and clear localStorage
try {
if (window.localStorage && typeof window.localStorage.setItem === 'function') {
const testKey = '__nwss_access_test__';
localStorage.setItem(testKey, 'test');
localStorage.removeItem(testKey);
localStorage.clear();
cleared.push('localStorage');
}
} catch (e) {
// Security error expected on some sites
}
// Test and clear sessionStorage
try {
if (window.sessionStorage && typeof window.sessionStorage.setItem === 'function') {
const testKey = '__nwss_access_test__';
sessionStorage.setItem(testKey, 'test');
sessionStorage.removeItem(testKey);
sessionStorage.clear();
cleared.push('sessionStorage');
}
} catch (e) {
// Security error expected on some sites
}
// Clear IndexedDB
try {
if (window.indexedDB && typeof window.indexedDB.databases === 'function') {
window.indexedDB.databases().then(dbs => {
dbs.forEach(db => {
try {
window.indexedDB.deleteDatabase(db.name);
} catch (dbErr) {
// Individual DB deletion may fail
}
});
}).catch(() => {
// Database listing may fail
});
cleared.push('indexedDB');
}
} catch (e) {
// IndexedDB may not be available
}
return cleared;
});
if (forceDebug && result.length > 0) {
console.log(formatLogMessage('debug', `[clear_sitedata] Page-level cleared: ${result.join(', ')}`));
}
return { success: result.length > 0, operations: result };
} catch (pageErr) {
if (forceDebug) {
console.log(formatLogMessage('debug', `[clear_sitedata] Page evaluation failed: ${pageErr.message}`));
}
return { success: false, operations: [] };
}
}
/**
* Main entry point for site data clearing
* Attempts CDP clearing first, falls back to page-level if needed
* @param {Page} page - Puppeteer page instance
* @param {string} currentUrl - URL being processed
* @param {boolean} forceDebug - Debug logging flag
* @param {boolean} quickMode - If true, only clear cache/cookies (for reloads)
* @returns {Promise<{success: boolean, operations: string[], method: string}>}
*/
async function clearSiteData(page, currentUrl, forceDebug, quickMode = false) {
// Try CDP clearing first (preferred method)
const cdpResult = await clearSiteDataViaCDP(page, currentUrl, forceDebug, quickMode);
if (cdpResult.success) {
return {
success: true,
operations: cdpResult.operations,
method: 'CDP'
};
}
// Fallback to page-level clearing if CDP failed and not in quick mode
if (!quickMode) {
if (forceDebug) {
console.log(formatLogMessage('debug', `CDP clearing failed, attempting page-level fallback for ${currentUrl}`));
}
const pageResult = await clearSiteDataViaPage(page, forceDebug);
return {
success: pageResult.success,
operations: pageResult.operations,
method: pageResult.success ? 'page-level' : 'failed'
};
}
return {
success: false,
operations: [],
method: 'failed'
};
}
/**
* Enhanced site data clearing with additional browser-level operations
* Includes cache warming prevention and comprehensive storage cleanup
* @param {Page} page - Puppeteer page instance
* @param {string} currentUrl - URL being processed
* @param {boolean} forceDebug - Debug logging flag
* @returns {Promise<{success: boolean, operations: string[], method: string}>}
*/
async function clearSiteDataEnhanced(page, currentUrl, forceDebug) {
let clearDataSession = null;
const completedOperations = [];
try {
clearDataSession = await Promise.race([
page.target().createCDPSession(),
new Promise((_, reject) =>
setTimeout(() => reject(new Error('Enhanced CDP session timeout')), 15000)
)
]);
const origin = new URL(currentUrl).origin;
// Enhanced clearing operations
const enhancedOperations = [
// Network layer
{ cmd: 'Network.clearBrowserCookies', name: 'cookies' },
{ cmd: 'Network.clearBrowserCache', name: 'cache' },
// Storage layer - comprehensive
{
cmd: 'Storage.clearDataForOrigin',
params: { origin, storageTypes: 'all' },
name: 'all_storage'
},
// Runtime layer
{ cmd: 'Runtime.discardConsoleEntries', name: 'console' },
// Security layer
{ cmd: 'Security.disable', name: 'security_reset' }
];
for (const op of enhancedOperations) {
try {
if (op.params) {
await clearDataSession.send(op.cmd, op.params);
} else {
await clearDataSession.send(op.cmd);
}
completedOperations.push(op.name);
} catch (opErr) {
if (forceDebug) {
console.log(formatLogMessage('debug', `[clear_sitedata_enhanced] ${op.name} failed: ${opErr.message}`));
}
// For storage operations, try individual fallbacks
if (op.name === 'all_storage') {
const individualTypes = ['local_storage', 'session_storage', 'indexeddb', 'websql', 'service_workers'];
for (const type of individualTypes) {
try {
await clearDataSession.send('Storage.clearDataForOrigin', {
origin,
storageTypes: type
});
completedOperations.push(type);
} catch (individualErr) {
// Continue trying other types
}
}
}
}
}
// Additional DOM cleanup via page evaluation
try {
await page.evaluate(() => {
// Clear any cached DOM queries
if (window.document && document.querySelectorAll) {
// Force garbage collection of cached selectors
const div = document.createElement('div');
document.body.appendChild(div);
document.body.removeChild(div);
}
// Clear performance entries
if (window.performance && performance.clearMarks) {
performance.clearMarks();
performance.clearMeasures();
}
});
completedOperations.push('dom_cleanup');
} catch (domErr) {
if (forceDebug) {
console.log(formatLogMessage('debug', `[clear_sitedata_enhanced] DOM cleanup failed: ${domErr.message}`));
}
}
if (forceDebug && completedOperations.length > 0) {
console.log(formatLogMessage('debug', `[clear_sitedata_enhanced] Cleared: ${completedOperations.join(', ')}`));
}
return {
success: completedOperations.length > 0,
operations: completedOperations,
method: 'enhanced_CDP'
};
} catch (enhancedErr) {
if (forceDebug) {
console.log(formatLogMessage('debug', `[clear_sitedata_enhanced] Failed: ${enhancedErr.message}`));
}
// Fallback to regular clearing
return await clearSiteData(page, currentUrl, forceDebug, false);
} finally {
if (clearDataSession) {
try {
await clearDataSession.detach();
} catch (detachErr) {
// Ignore detach errors
}
}
}
}
module.exports = {
clearSiteData,
clearSiteDataViaCDP,
clearSiteDataViaPage
};