@fanboynz/network-scanner
Version:
A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.
377 lines (336 loc) • 12.9 kB
JavaScript
/**
* Proxy Module for NWSS Network Scanner
* ======================================
* Routes specific site URLs through SOCKS5, SOCKS4, HTTP, or HTTPS proxies.
*
* Chromium's --proxy-server flag is browser-wide, so sites requiring a proxy
* need a separate browser instance. This module handles:
* - Parsing proxy URLs (all supported protocols)
* - Generating Chromium launch args
* - Per-page authentication via Puppeteer
* - Proxy bypass lists
* - Proxy health checks
*
* CONFIG EXAMPLES:
*
* SOCKS5 (no auth):
* "proxy": "socks5://127.0.0.1:1080"
*
* SOCKS5 with auth:
* "proxy": "socks5://user:pass@127.0.0.1:1080"
* Chromium itself cannot authenticate SOCKS5 (crbug.com/256785), so
* this module auto-starts an in-process no-auth SOCKS5 relay
* (lib/socks-relay.js) that does the upstream RFC 1929 auth. Chromium
* connects to the local relay (no auth — which it CAN do) and the
* relay tunnels to the authenticated upstream. Transparent: keep the
* socks5://user:pass@host form in config. Requires prepareSocksRelays()
* to be awaited once before the scan loop (nwss.js does this).
* NOTE: socks4 with auth is still unsupported (userId-only,
* near-extinct) — use socks5 or an authenticated HTTP proxy.
*
* HTTP proxy (corporate):
* "proxy": "http://proxy.corp.com:3128"
*
* HTTP proxy with auth:
* "proxy": "http://user:pass@proxy.corp.com:8080"
*
* HTTPS proxy:
* "proxy": "https://secure-proxy.example.com:8443"
*
* With bypass list and remote DNS:
* "proxy": "socks5://127.0.0.1:1080",
* "proxy_bypass": ["localhost", "127.0.0.1", "*.local"],
* "proxy_remote_dns": true
*
* Debug mode:
* "proxy": "socks5://127.0.0.1:1080",
* "proxy_debug": true
*
* Legacy key (backwards compatible):
* "socks5_proxy": "socks5://127.0.0.1:1080"
*
* INTEGRATION (in nwss.js):
* const { needsProxy, getProxyArgs, applyProxyAuth, getProxyInfo } = require('./lib/proxy');
*
* // Before browser launch
* if (needsProxy(siteConfig)) {
* const proxyArgs = getProxyArgs(siteConfig, forceDebug);
* browserArgs.push(...proxyArgs);
* }
*
* // After page creation, before page.goto()
* await applyProxyAuth(page, siteConfig, forceDebug);
*
* @version 1.1.0
*/
const { formatLogMessage } = require('./colorize');
const { ensureRelay, getRelayPort } = require('./socks-relay');
const PROXY_MODULE_VERSION = '1.2.0';
const SUPPORTED_PROTOCOLS = ['socks5', 'socks4', 'http', 'https'];
const DEFAULT_PORTS = {
socks5: 1080,
socks4: 1080,
http: 8080,
https: 8443
};
/**
* Returns the configured proxy URL string from siteConfig.
* Supports both "proxy" (preferred) and "socks5_proxy" (legacy) keys.
*
* @param {object} siteConfig
* @returns {string|null}
*/
function getConfiguredProxy(siteConfig) {
return siteConfig.proxy || siteConfig.socks5_proxy || null;
}
/**
* Parses a proxy URL into components.
* Accepts: protocol://host:port, protocol://user:pass@host:port, bare host:port
*
* @param {string} proxyUrl - Proxy URL string
* @returns {object|null} Parsed proxy or null if invalid
*/
function parseProxyUrl(proxyUrl) {
if (!proxyUrl || typeof proxyUrl !== 'string') return null;
let cleaned = proxyUrl.trim();
// Normalise bare host:port to socks5:// URL
if (!cleaned.includes('://')) {
cleaned = `socks5://${cleaned}`;
}
try {
const url = new URL(cleaned);
const protocol = url.protocol.replace(':', '');
if (!SUPPORTED_PROTOCOLS.includes(protocol)) return null;
const host = url.hostname;
if (!host) return null;
const port = parseInt(url.port, 10) || DEFAULT_PORTS[protocol] || 1080;
// decodeURIComponent throws URIError on a literal '%' that isn't a valid
// escape (e.g. a password containing '%'). Fall back to the raw value so
// an otherwise-valid proxy isn't rejected as "Invalid proxy URL".
const safeDecode = (v) => { try { return decodeURIComponent(v); } catch (_) { return v; } };
const username = url.username ? safeDecode(url.username) : null;
const password = url.password ? safeDecode(url.password) : null;
return { protocol, host, port, username, password };
} catch (_) {
return null;
}
}
/**
* Checks if a site config requires a proxy
*
* @param {object} siteConfig
* @returns {boolean}
*/
function needsProxy(siteConfig) {
return !!getConfiguredProxy(siteConfig);
}
/**
* Pre-start local no-auth SOCKS5 relays for every distinct authenticated
* SOCKS5 upstream across the given site configs. Must be awaited ONCE
* before the scan loop — getProxyArgs() then does a pure sync lookup of
* the relay port, so the fragile per-batch browser-launch path stays
* synchronous.
*
* @param {object[]} siteConfigs
* @param {boolean} forceDebug
* @returns {Promise<number>} count of relays started
*/
async function prepareSocksRelays(siteConfigs, forceDebug = false) {
let started = 0;
const seen = new Set();
for (const cfg of (siteConfigs || [])) {
const url = getConfiguredProxy(cfg);
if (!url) continue;
const parsed = parseProxyUrl(url);
// Only socks5 with credentials needs a relay. socks4-auth stays
// unsupported (near-extinct, userId-only); http/https auth works
// natively via page.authenticate().
if (!parsed || parsed.protocol !== 'socks5' || !parsed.username) continue;
const key = `${parsed.host}:${parsed.port}:${parsed.username}`;
if (seen.has(key)) continue;
seen.add(key);
try {
await ensureRelay(parsed, forceDebug);
started++;
} catch (e) {
console.warn(formatLogMessage('proxy', `Failed to start SOCKS5 auth relay for ${parsed.host}:${parsed.port}: ${e.message}`));
}
}
return started;
}
/**
* Returns Chromium launch arguments for the configured proxy.
*
* @param {object} siteConfig
* @param {boolean} forceDebug
* @returns {string[]} Array of Chromium args (empty if no proxy configured)
*/
function getProxyArgs(siteConfig, forceDebug = false) {
const proxyUrl = getConfiguredProxy(siteConfig);
if (!proxyUrl) return [];
const parsed = parseProxyUrl(proxyUrl);
if (!parsed) {
console.warn(formatLogMessage('proxy', `Invalid proxy URL: ${proxyUrl}`));
return [];
}
// Authenticated SOCKS5: Chromium can't auth SOCKS, so point it at the
// local no-auth relay (started upfront by prepareSocksRelays) which does
// the upstream auth. Credentials never reach Chromium. The relay speaks
// SOCKS5 and forwards domain addresses, so the remote-DNS rule below
// still applies correctly to the localhost hop.
let effectiveHost = parsed.host;
let effectivePort = parsed.port;
let effectiveProto = parsed.protocol;
if (parsed.protocol === 'socks5' && parsed.username) {
const relayPort = getRelayPort(parsed);
if (relayPort) {
effectiveHost = '127.0.0.1';
effectivePort = relayPort;
const debug = forceDebug || siteConfig.proxy_debug || siteConfig.socks5_debug;
if (debug) {
console.log(formatLogMessage('proxy', `SOCKS5 auth via local relay 127.0.0.1:${relayPort} -> ${parsed.host}:${parsed.port}`));
}
} else {
// prepareSocksRelays should have started this; defensive only.
console.warn(formatLogMessage('proxy', `No SOCKS5 auth relay for ${parsed.host}:${parsed.port} — call prepareSocksRelays() before the scan. Connection will fail (Chromium can't auth SOCKS).`));
}
}
const args = [
`--proxy-server=${effectiveProto}://${effectiveHost}:${effectivePort}`
];
// Remote DNS: force proxy-side hostname resolution (prevents DNS leaks).
// SOCKS5 only — it can carry a hostname to the proxy for remote
// resolution. SOCKS4 cannot (the protocol only accepts an IPv4 address;
// resolution must happen client-side), so applying MAP * ~NOTFOUND there
// makes Chromium's local resolver fail with nothing able to resolve the
// hostname — every request breaks. HTTP/HTTPS proxies resolve remotely
// by default and need no rule.
const remoteDns = siteConfig.proxy_remote_dns ?? siteConfig.socks5_remote_dns;
if (parsed.protocol === 'socks5' && remoteDns !== false) {
args.push('--host-resolver-rules=MAP * ~NOTFOUND , EXCLUDE 127.0.0.1');
} else if (parsed.protocol === 'socks4' && remoteDns === true) {
console.warn(formatLogMessage('proxy', `proxy_remote_dns ignored: SOCKS4 cannot do proxy-side DNS resolution (use SOCKS5)`));
}
// Bypass list: domains that skip the proxy
const bypass = siteConfig.proxy_bypass || siteConfig.socks5_bypass || [];
if (bypass.length > 0) {
args.push(`--proxy-bypass-list=${bypass.join(';')}`);
}
const debug = forceDebug || siteConfig.proxy_debug || siteConfig.socks5_debug;
if (debug) {
console.log(formatLogMessage('proxy', `[${parsed.protocol}] Args: ${args.join(' ')}`));
}
return args;
}
/**
* Applies proxy authentication to a page via Puppeteer's authenticate API.
* Must be called BEFORE page.goto().
*
* @param {object} page - Puppeteer page instance
* @param {object} siteConfig
* @param {boolean} forceDebug
* @returns {Promise<boolean>} True if auth was applied
*/
async function applyProxyAuth(page, siteConfig, forceDebug = false) {
const proxyUrl = getConfiguredProxy(siteConfig);
if (!proxyUrl) return false;
const parsed = parseProxyUrl(proxyUrl);
if (!parsed || !parsed.username) return false;
// Chromium can't authenticate SOCKS proxies, and page.authenticate() is
// HTTP-407-only. SOCKS5+creds is handled out-of-band by the local
// no-auth relay (prepareSocksRelays + getProxyArgs rewrite) — Chromium
// talks no-auth to 127.0.0.1, so there's nothing for page.authenticate
// to do here; return quietly. SOCKS4 auth (userId-only, near-extinct)
// stays genuinely unsupported.
if (parsed.protocol === 'socks5') {
return false; // relay handles upstream auth
}
if (parsed.protocol === 'socks4') {
console.warn(formatLogMessage('proxy', `SOCKS4 proxy auth is unsupported (use SOCKS5, which is auto-relayed, or an authenticated HTTP proxy).`));
return false;
}
try {
await page.authenticate({
username: parsed.username,
password: parsed.password || ''
});
const debug = forceDebug || siteConfig.proxy_debug || siteConfig.socks5_debug;
if (debug) {
console.log(formatLogMessage('proxy', `Auth set for ${parsed.username}@${parsed.host}:${parsed.port}`));
}
return true;
} catch (err) {
console.warn(formatLogMessage('proxy', `Failed to set proxy auth: ${err.message}`));
return false;
}
}
/**
* Tests proxy connectivity by attempting a TCP connection.
*
* @param {object} siteConfig
* @param {number} timeoutMs - Connection timeout (default 5000ms)
* @returns {Promise<object>} { reachable, latencyMs, error }
*/
async function testProxy(siteConfig, timeoutMs = 5000) {
const proxyUrl = getConfiguredProxy(siteConfig);
if (!proxyUrl) {
return { reachable: false, latencyMs: 0, error: 'No proxy configured' };
}
const parsed = parseProxyUrl(proxyUrl);
if (!parsed) {
return { reachable: false, latencyMs: 0, error: 'Invalid proxy URL' };
}
const net = require('net');
const start = Date.now();
return new Promise((resolve) => {
const socket = new net.Socket();
const onError = (err) => {
socket.destroy();
resolve({ reachable: false, latencyMs: Date.now() - start, error: err.message });
};
socket.setTimeout(timeoutMs);
socket.on('error', onError);
socket.on('timeout', () => onError(new Error('Connection timeout')));
socket.connect(parsed.port, parsed.host, () => {
const latency = Date.now() - start;
socket.destroy();
resolve({ reachable: true, latencyMs: latency, error: null });
});
});
}
/**
* Returns human-readable proxy info string for logging.
*
* @param {object} siteConfig
* @returns {string}
*/
function getProxyInfo(siteConfig) {
const proxyUrl = getConfiguredProxy(siteConfig);
if (!proxyUrl) return 'none';
const parsed = parseProxyUrl(proxyUrl);
if (!parsed) return 'invalid';
const auth = parsed.username ? `${parsed.username}@` : '';
return `${parsed.protocol}://${auth}${parsed.host}:${parsed.port}`;
}
/**
* Returns module version information
*/
function getModuleInfo() {
return { version: PROXY_MODULE_VERSION, name: 'Proxy Handler' };
}
// Re-export relay teardown so nwss.js cleanup paths can close listeners.
const { closeAllRelays: closeAllSocksRelays } = require('./socks-relay');
module.exports = {
parseProxyUrl,
needsProxy,
prepareSocksRelays,
closeAllSocksRelays,
getProxyArgs,
applyProxyAuth,
testProxy,
getProxyInfo,
getModuleInfo,
getConfiguredProxy,
PROXY_MODULE_VERSION,
SUPPORTED_PROTOCOLS
};