UNPKG

@fanboynz/network-scanner

Version:

A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.

1,104 lines (969 loc) 36.1 kB
const net = require('node:net'); const { formatLogMessage, messageColors } = require('./colorize'); // Cross-module validators wired into site-config validation — previously // each had to be called separately (or wasn't called at all). Centralizing // here means a single validateSiteConfig surfaces ALL misconfigurations // at startup instead of mid-scan. // - validateSearchString: had ZERO callers anywhere before this hookup. // - validateVpnConfig / validateOvpnConfig: called inside connectForSite // per-site at scan time. Adding here catches errors at startup. const { validateSearchString } = require('./searchstring'); const { validateVpnConfig: validateWgConfig, normalizeVpnConfig: normalizeWgConfig } = require('./wireguard_vpn'); const { validateOvpnConfig, normalizeOvpnConfig } = require('./openvpn_vpn'); const CLEAN_TAG = messageColors.processing('[clean]'); // Pre-compiled regex constants for validation. IPv4/IPv6 validation now // uses Node's built-in net.isIP() — the old hand-rolled regexes were // incomplete (missing IPv4-mapped IPv6, zone identifiers, etc.) and // silently accepted some malformed inputs like '2001:db8:::1'. const REGEX_LABEL = /^[a-zA-Z0-9-]+$/; const REGEX_TLD = /^[a-zA-Z][a-zA-Z0-9]*$/; // Module-level Set of valid adblock filter modifiers. Was previously // re-allocated inside validateAdblockModifiers on every call — for a // 100k-line filter list that's 100k identical Set allocations. const VALID_MODIFIERS = new Set([ // Resource type modifiers 'script', 'stylesheet', 'image', 'object', 'xmlhttprequest', 'subdocument', 'ping', 'websocket', 'webrtc', 'document', 'elemhide', 'generichide', 'genericblock', 'popup', 'font', 'media', 'other', // Party modifiers 'third-party', 'first-party', '~third-party', '~first-party', // Domain modifiers (domain= is validated separately below) 'domain', // Method modifiers 'match-case', '~match-case', // Action modifiers 'important', 'badfilter', // CSP and redirect modifiers 'csp', 'redirect', 'redirect-rule', // uBlock Origin specific 'inline-script', 'inline-font', 'mp4', 'empty', 'xhr' ]); /** * Enhanced domain validation function * @param {string} domain - The domain to validate * @returns {boolean} True if domain is valid, false otherwise */ function isValidDomain(domain) { if (!domain || typeof domain !== 'string') { return false; } // Trim whitespace domain = domain.trim(); // Check minimum length (shortest valid domain is something like "a.b" = 3 chars) if (domain.length < 3) { return false; } // Check maximum length (RFC 1035 - 253 characters max) if (domain.length > 253) { return false; } // Check for IP addresses (both IPv4 and IPv6) if (isIPAddress(domain)) { return true; // IP addresses are valid targets } // Must contain at least one dot if (!domain.includes('.')) { return false; } // Cannot start or end with dot if (domain.startsWith('.') || domain.endsWith('.')) { return false; } // Cannot contain consecutive dots if (domain.includes('..')) { return false; } // Split into labels and validate each const labels = domain.split('.'); // Must have at least 2 labels (domain.tld) if (labels.length < 2) { return false; } // Validate each label for (const label of labels) { if (!isValidDomainLabel(label)) { return false; } } // TLD (last label) validation const tld = labels[labels.length - 1]; if (!isValidTLD(tld)) { return false; } return true; } /** * Validates a single domain label * @param {string} label - The label to validate * @returns {boolean} True if label is valid */ function isValidDomainLabel(label) { if (!label || label.length === 0) { return false; } // Label cannot be longer than 63 characters (RFC 1035) if (label.length > 63) { return false; } // Label cannot start or end with hyphen if (label.startsWith('-') || label.endsWith('-')) { return false; } // Label can only contain alphanumeric characters and hyphens if (!REGEX_LABEL.test(label)) { return false; } return true; } /** * Validates TLD (Top Level Domain) * @param {string} tld - The TLD to validate * @returns {boolean} True if TLD is valid */ function isValidTLD(tld) { if (!tld || tld.length === 0) { return false; } // TLD must be at least 2 characters if (tld.length < 2) { return false; } // Allow numeric TLDs for modern domains like .1password // but still validate structure // TLD can contain letters and numbers, but must start with letter if (!REGEX_TLD.test(tld)) { return false; } return true; } /** * Checks if a string is an IP address (IPv4 or IPv6). * Delegates to Node's net.isIP() — standards-compliant, no regex to * maintain. Returns true for any valid IP form including IPv4-mapped * IPv6 (::ffff:192.0.2.1) which the old hand-rolled regex missed. * @param {string} str - String to check * @returns {boolean} True if it's an IP address */ function isIPAddress(str) { return net.isIP(str) !== 0; } /** * @param {string} str * @returns {boolean} True if valid IPv4 */ function isIPv4(str) { return net.isIPv4(str); } /** * @param {string} str * @returns {boolean} True if valid IPv6 */ function isIPv6(str) { return net.isIPv6(str); } /** * Validates a regex pattern string * @param {string} pattern - The regex pattern to validate * @returns {object} Validation result with isValid boolean and error message */ function validateRegexPattern(pattern) { if (!pattern || typeof pattern !== 'string') { return { isValid: false, error: 'Pattern must be a non-empty string' }; } try { // Handle /pattern/flags literal syntax. The old `^\/(.*)\/$/` strip // didn't match patterns with flags ('/foo/i'), so they passed through // unchanged to `new RegExp('/foo/i')` — which compiled a regex that // matched the LITERAL string '/foo/i' instead of the intended `foo` // pattern with the `i` flag. Silent acceptance of malformed input. const literalMatch = pattern.match(/^\/(.*)\/([gimsuy]*)$/); if (literalMatch) { new RegExp(literalMatch[1], literalMatch[2]); } else { new RegExp(pattern); } return { isValid: true }; } catch (err) { return { isValid: false, error: `Invalid regex: ${err.message}` }; } } /** * Validates adblock filter modifiers * @param {string} modifiers - The modifier string (e.g., "script,third-party") * @returns {object} Validation result */ function validateAdblockModifiers(modifiers) { if (!modifiers) { return { isValid: true, modifiers: [] }; } const modifierList = modifiers.split(',').map(m => m.trim()); const invalidModifiers = []; const parsedModifiers = []; for (const modifier of modifierList) { if (!modifier) continue; // Handle domain= modifier specially if (modifier.startsWith('domain=')) { const domains = modifier.substring(7); if (domains) { // Validate domain list format (domains separated by |) const domainList = domains.split('|'); for (const domain of domainList) { const cleanDomain = domain.startsWith('~') ? domain.substring(1) : domain; if (cleanDomain && !isValidDomain(cleanDomain)) { invalidModifiers.push(`Invalid domain in domain= modifier: ${cleanDomain}`); } } parsedModifiers.push({ type: 'domain', value: domains }); } else { invalidModifiers.push('Empty domain= modifier'); } continue; } // Handle csp= modifier if (modifier.startsWith('csp=')) { const cspValue = modifier.substring(4); if (!cspValue) { invalidModifiers.push('Empty csp= modifier'); } else { parsedModifiers.push({ type: 'csp', value: cspValue }); } continue; } // Handle redirect= modifier if (modifier.startsWith('redirect=')) { const redirectValue = modifier.substring(9); if (!redirectValue) { invalidModifiers.push('Empty redirect= modifier'); } else { parsedModifiers.push({ type: 'redirect', value: redirectValue }); } continue; } // Check for negated modifiers (starting with ~) const isNegated = modifier.startsWith('~'); const baseModifier = isNegated ? modifier.substring(1) : modifier; if (VALID_MODIFIERS.has(modifier) || VALID_MODIFIERS.has(baseModifier)) { parsedModifiers.push({ type: baseModifier, negated: isNegated, raw: modifier }); } else { invalidModifiers.push(modifier); } } if (invalidModifiers.length > 0) { return { isValid: false, error: `Invalid modifiers: ${invalidModifiers.join(', ')}`, validModifiers: parsedModifiers }; } return { isValid: true, modifiers: parsedModifiers }; } /** * Validates adblock rule format with comprehensive modifier support * @param {string} rule - The rule to validate * @returns {object} Validation result with format type and validity */ function validateAdblockRule(rule) { if (!rule || typeof rule !== 'string') { return { isValid: false, format: 'unknown', error: 'Rule must be a non-empty string' }; } const trimmedRule = rule.trim(); // Skip comments if (trimmedRule.startsWith('!') || trimmedRule.startsWith('#')) { return { isValid: true, format: 'comment' }; } // Strip @@ exception (whitelist) prefix and run the rest of validation // on the remainder. Exception rules are standard adblock syntax // (e.g. '@@||example.com^', '@@||example.com^$image') and appear // throughout real-world filter lists like EasyList — without this, // `nwss --validate-rules easylist.txt` flagged every exception as // 'Unrecognized rule format'. We attach `isException: true` to the // result so downstream consumers can see the whitelist intent. let isException = false; let working = trimmedRule; if (working.startsWith('@@')) { isException = true; working = working.substring(2); if (!working) { return { isValid: false, format: 'unknown', error: '@@ exception prefix with no rule body' }; } } // @@ only makes sense as a prefix for adblock-format rules. Bail // early if it's prefixing something else (e.g. '@@127.0.0.1 host' // is meaningless — localhost format has no exception concept). if (isException && !(working.startsWith('||') && working.includes('^')) && !(working.includes('^$'))) { return { isValid: false, format: 'unknown', isException: true, error: '@@ exception prefix only valid for adblock-format rules' }; } // Adblock format: ||domain.com^ or ||domain.com^$script,third-party. // Uses `working` (post-@@-strip body) instead of `trimmedRule`. const ruleBody = working; if (ruleBody.startsWith('||') && ruleBody.includes('^')) { const parts = ruleBody.substring(2).split('^'); const domain = parts[0]; if (!isValidDomain(domain)) { return { isValid: false, format: 'adblock', isException, error: `Invalid domain in adblock rule: ${domain}` }; } // Check for modifiers after ^$ let modifiers = ''; let modifierValidation = { isValid: true, modifiers: [] }; if (parts.length > 1 && parts[1].startsWith('$')) { modifiers = parts[1].substring(1); modifierValidation = validateAdblockModifiers(modifiers); if (!modifierValidation.isValid) { return { isValid: false, format: 'adblock', isException, error: `${modifierValidation.error} in rule: ${trimmedRule}`, domain, modifiers: modifierValidation.validModifiers || [] }; } } return { isValid: true, format: 'adblock', isException, domain, modifiers: modifierValidation.modifiers, hasModifiers: modifiers.length > 0 }; } // Basic adblock format without ||: domain.com^$modifier if (ruleBody.includes('^') && ruleBody.includes('$')) { const parts = ruleBody.split('^$'); if (parts.length === 2) { const domain = parts[0]; const modifiers = parts[1]; if (!isValidDomain(domain)) { return { isValid: false, format: 'adblock-basic', isException, error: `Invalid domain in adblock rule: ${domain}` }; } const modifierValidation = validateAdblockModifiers(modifiers); if (!modifierValidation.isValid) { return { isValid: false, format: 'adblock-basic', isException, error: modifierValidation.error, domain }; } return { isValid: true, format: 'adblock-basic', isException, domain, modifiers: modifierValidation.modifiers }; } } // Removed: "Simple adblock format" branch for `||domain.com^` without // modifiers. The main `||...^` branch above already handles this case // (parts becomes ['domain.com', ''] on split, the empty modifier check // falls through to the success return as format='adblock'). This branch // was unreachable dead code. // Localhost format: 127.0.0.1 domain.com or 0.0.0.0 domain.com if (trimmedRule.match(/^(127\.0\.0\.1|0\.0\.0\.0)\s+/)) { const parts = trimmedRule.split(/\s+/); if (parts.length >= 2) { const domain = parts[1]; if (isValidDomain(domain)) { return { isValid: true, format: 'localhost', domain }; } else { return { isValid: false, format: 'localhost', error: `Invalid domain in localhost rule: ${domain}` }; } } return { isValid: false, format: 'localhost', error: 'Malformed localhost rule' }; } // DNSmasq format: local=/domain.com/ if (trimmedRule.startsWith('local=/') && trimmedRule.endsWith('/')) { const domain = trimmedRule.substring(6, trimmedRule.length - 1); if (isValidDomain(domain)) { return { isValid: true, format: 'dnsmasq', domain }; } else { return { isValid: false, format: 'dnsmasq', error: `Invalid domain in dnsmasq rule: ${domain}` }; } } // DNSmasq old format: server=/domain.com/ if (trimmedRule.startsWith('server=/') && trimmedRule.endsWith('/')) { const domain = trimmedRule.substring(7, trimmedRule.length - 1); if (isValidDomain(domain)) { return { isValid: true, format: 'dnsmasq-old', domain }; } else { return { isValid: false, format: 'dnsmasq-old', error: `Invalid domain in dnsmasq-old rule: ${domain}` }; } } // Unbound format: local-zone: "domain.com." always_null if (trimmedRule.startsWith('local-zone: "') && trimmedRule.includes('" always_null')) { const domain = trimmedRule.substring(13).split('"')[0]; const cleanDomain = domain.endsWith('.') ? domain.slice(0, -1) : domain; if (isValidDomain(cleanDomain)) { return { isValid: true, format: 'unbound', domain: cleanDomain }; } else { return { isValid: false, format: 'unbound', error: `Invalid domain in unbound rule: ${cleanDomain}` }; } } // Privoxy format: { +block } .domain.com if (trimmedRule.startsWith('{ +block } .')) { const domain = trimmedRule.substring(12); if (isValidDomain(domain)) { return { isValid: true, format: 'privoxy', domain }; } else { return { isValid: false, format: 'privoxy', error: `Invalid domain in privoxy rule: ${domain}` }; } } // Pi-hole regex format: (^|\.)domain\.com$ if (trimmedRule.match(/^\(\^\|\\?\.\).*\$$/)) { const domain = trimmedRule.replace(/^\(\^\|\\?\.\)/, '').replace(/\\\./g, '.').replace(/\$$/, ''); if (isValidDomain(domain)) { return { isValid: true, format: 'pihole', domain }; } else { return { isValid: false, format: 'pihole', error: `Invalid domain in pihole rule: ${domain}` }; } } // Plain domain format if (isValidDomain(trimmedRule)) { return { isValid: true, format: 'plain', domain: trimmedRule }; } return { isValid: false, format: 'unknown', error: 'Unrecognized rule format' }; } /** * Validates an entire ruleset file * @param {string} filePath - Path to the file to validate * @param {object} options - Validation options * @returns {object} Validation results with statistics and errors */ function validateRulesetFile(filePath, options = {}) { const { forceDebug = false, silentMode = false, maxErrors = 10 } = options; const fs = require('fs'); let content; try { content = fs.readFileSync(filePath, 'utf8'); } catch (err) { return { isValid: false, error: `Failed to read file: ${err.message}`, stats: { total: 0, valid: 0, invalid: 0, comments: 0 } }; } const lines = content.split('\n'); const stats = { total: 0, valid: 0, invalid: 0, comments: 0, formats: {} }; const errors = []; const duplicates = new Set(); const seenRules = new Set(); for (let i = 0; i < lines.length; i++) { const line = lines[i].trim(); // Skip empty lines if (!line) continue; stats.total++; const lineNumber = i + 1; const validation = validateAdblockRule(line); if (validation.format === 'comment') { stats.comments++; continue; } if (validation.isValid) { stats.valid++; // Track format types if (!stats.formats[validation.format]) { stats.formats[validation.format] = 0; } stats.formats[validation.format]++; // Check for duplicates if (seenRules.has(line)) { duplicates.add(line); if (forceDebug) { errors.push(`Line ${lineNumber}: Duplicate rule - ${line}`); } } else { seenRules.add(line); } } else { stats.invalid++; errors.push(`Line ${lineNumber}: ${validation.error} - ${line}`); if (errors.length >= maxErrors) { errors.push(`... (stopping after ${maxErrors} errors, ${stats.total - i - 1} lines remaining)`); break; } } } // Log validation results if (!silentMode) { if (forceDebug) { console.log(formatLogMessage('debug', `Validated ${filePath}:`)); console.log(formatLogMessage('debug', ` Total lines: ${stats.total} (${stats.comments} comments)`)); console.log(formatLogMessage('debug', ` Valid rules: ${stats.valid}`)); console.log(formatLogMessage('debug', ` Invalid rules: ${stats.invalid}`)); console.log(formatLogMessage('debug', ` Duplicates found: ${duplicates.size}`)); if (Object.keys(stats.formats).length > 0) { console.log(formatLogMessage('debug', ` Format breakdown:`)); Object.entries(stats.formats).forEach(([format, count]) => { console.log(formatLogMessage('debug', ` ${format}: ${count}`)); }); } } if (errors.length > 0) { console.log(formatLogMessage('warn', `Validation errors in ${filePath}:`)); errors.slice(0, 5).forEach(error => { console.log(formatLogMessage('warn', ` ${error}`)); }); if (errors.length > 5) { console.log(formatLogMessage('warn', ` ... and ${errors.length - 5} more errors`)); } } } return { isValid: stats.invalid === 0, stats, errors, duplicates: Array.from(duplicates), filePath }; } /** * Validates configuration object for site settings * @param {object} siteConfig - Site configuration to validate * @param {number} siteIndex - Index of the site for error reporting * @returns {object} Validation result with warnings and errors */ function validateSiteConfig(siteConfig, siteIndex = 0) { const warnings = []; const errors = []; // Check required fields if (!siteConfig.url) { errors.push(`Site ${siteIndex}: Missing required 'url' field`); } else { // Validate URLs const urls = Array.isArray(siteConfig.url) ? siteConfig.url : [siteConfig.url]; urls.forEach((url, urlIndex) => { try { new URL(url); } catch (urlErr) { errors.push(`Site ${siteIndex}, URL ${urlIndex}: Invalid URL format - ${url}`); } }); } // Validate regex patterns if (siteConfig.filterRegex) { const regexes = Array.isArray(siteConfig.filterRegex) ? siteConfig.filterRegex : [siteConfig.filterRegex]; regexes.forEach((pattern, patternIndex) => { const validation = validateRegexPattern(pattern); if (!validation.isValid) { errors.push(`Site ${siteIndex}, filterRegex ${patternIndex}: ${validation.error}`); } }); } // Validate blocked patterns if (siteConfig.blocked) { if (!Array.isArray(siteConfig.blocked)) { errors.push(`Site ${siteIndex}: 'blocked' must be an array`); } else { siteConfig.blocked.forEach((pattern, patternIndex) => { const validation = validateRegexPattern(pattern); if (!validation.isValid) { errors.push(`Site ${siteIndex}, blocked ${patternIndex}: ${validation.error}`); } }); } } // Validate resource types if (siteConfig.resourceTypes) { if (!Array.isArray(siteConfig.resourceTypes)) { errors.push(`Site ${siteIndex}: 'resourceTypes' must be an array`); } else { const validTypes = ['script', 'stylesheet', 'image', 'font', 'document', 'subdocument', 'xhr', 'fetch', 'websocket', 'media', 'ping', 'other']; siteConfig.resourceTypes.forEach(type => { if (!validTypes.includes(type)) { warnings.push(`Site ${siteIndex}: Unknown resourceType '${type}'. Valid types: ${validTypes.join(', ')}`); } }); } } // Validate CSS selectors if (siteConfig.css_blocked) { if (!Array.isArray(siteConfig.css_blocked)) { errors.push(`Site ${siteIndex}: 'css_blocked' must be an array`); } // Note: CSS selector validation would be complex, skipping for now } // Validate numeric fields const numericFields = ['delay', 'reload', 'timeout']; numericFields.forEach(field => { if (siteConfig[field] !== undefined) { if (typeof siteConfig[field] !== 'number' || siteConfig[field] < 0) { errors.push(`Site ${siteIndex}: '${field}' must be a positive number`); } } }); // Validate boolean fields const booleanFields = ['interact', 'clear_sitedata', 'firstParty', 'thirdParty', 'screenshot', 'headful', 'ignore_similar', 'ignore_similar_ignored_domains']; booleanFields.forEach(field => { if (siteConfig[field] !== undefined && typeof siteConfig[field] !== 'boolean') { warnings.push(`Site ${siteIndex}: '${field}' should be a boolean (true/false)`); } }); // Cross-module validation: searchstring/searchstring_and. validateSearchString // catches things like both-defined-at-once (forbidden), empty arrays, length // caps, non-string elements. Before this call was added, misconfigured // searchstring values silently passed validation and only surfaced as // runtime TypeErrors mid-scan. if (siteConfig.searchstring !== undefined || siteConfig.searchstring_and !== undefined) { const ssValidation = validateSearchString(siteConfig.searchstring, siteConfig.searchstring_and); if (!ssValidation.isValid) { errors.push(`Site ${siteIndex}: ${ssValidation.error}`); } } // Cross-module validation: VPN configs. nwss.js dispatches on field // presence — `vpn` → WireGuard, `openvpn` → OpenVPN. Both validators // require a normalized config object, so normalize first. Previously // VPN errors only surfaced inside connectForSite at scan time; now // misconfigured configs fail loudly at startup. if (siteConfig.vpn !== undefined && siteConfig.openvpn !== undefined) { warnings.push(`Site ${siteIndex}: both 'vpn' (WireGuard) and 'openvpn' set — runtime dispatches to WireGuard, openvpn config will be ignored`); } if (siteConfig.vpn !== undefined) { const normalized = normalizeWgConfig(siteConfig.vpn); const vpnValidation = validateWgConfig(normalized); if (!vpnValidation.isValid) { vpnValidation.errors.forEach(e => errors.push(`Site ${siteIndex} (WireGuard): ${e}`)); } // Validator warnings (e.g. "requires root") propagate too. (vpnValidation.warnings || []).forEach(w => warnings.push(`Site ${siteIndex} (WireGuard): ${w}`)); } if (siteConfig.openvpn !== undefined && siteConfig.vpn === undefined) { const normalized = normalizeOvpnConfig(siteConfig.openvpn); const ovpnValidation = validateOvpnConfig(normalized); if (!ovpnValidation.isValid) { ovpnValidation.errors.forEach(e => errors.push(`Site ${siteIndex} (OpenVPN): ${e}`)); } (ovpnValidation.warnings || []).forEach(w => warnings.push(`Site ${siteIndex} (OpenVPN): ${w}`)); } // Validate ignore_similar_threshold if (siteConfig.ignore_similar_threshold !== undefined) { if (typeof siteConfig.ignore_similar_threshold !== 'number' || siteConfig.ignore_similar_threshold < 0 || siteConfig.ignore_similar_threshold > 100) { errors.push(`Site ${siteIndex}: 'ignore_similar_threshold' must be a number between 0 and 100`); } } // Validate user agent if (siteConfig.userAgent) { const validUserAgents = ['chrome', 'firefox', 'safari']; if (!validUserAgents.includes(siteConfig.userAgent.toLowerCase())) { warnings.push(`Site ${siteIndex}: Unknown userAgent '${siteConfig.userAgent}'. Valid options: ${validUserAgents.join(', ')}`); } } // Check for conflicting output format options const outputFormats = ['localhost', 'localhost_0_0_0_0', 'plain', 'dnsmasq', 'dnsmasq_old', 'unbound', 'privoxy', 'pihole', 'adblock_rules']; const enabledFormats = outputFormats.filter(format => siteConfig[format] === true); if (enabledFormats.length > 1) { warnings.push(`Site ${siteIndex}: Multiple output formats enabled (${enabledFormats.join(', ')}). Only one should be used.`); } return { isValid: errors.length === 0, warnings, errors }; } /** * Cleans a ruleset file by removing invalid lines and optionally duplicates * @param {string} filePath - Path to the file to clean * @param {string} outputPath - Optional output path (defaults to overwriting input file) * @param {object} options - Cleaning options * @returns {object} Cleaning results with statistics */ function cleanRulesetFile(filePath, outputPath = null, options = {}) { const { forceDebug = false, silentMode = false, removeDuplicates = false, backupOriginal = true, dryRun = false } = options; const fs = require('fs'); const path = require('path'); let content; try { content = fs.readFileSync(filePath, 'utf8'); } catch (err) { return { success: false, error: `Failed to read file: ${err.message}`, stats: { total: 0, valid: 0, invalid: 0, removed: 0, duplicates: 0 } }; } const lines = content.split('\n'); const validLines = []; const invalidLines = []; const seenRules = new Set(); const duplicateLines = []; const stats = { total: 0, valid: 0, invalid: 0, removed: 0, duplicates: 0, comments: 0, empty: 0 }; for (let i = 0; i < lines.length; i++) { const line = lines[i]; const trimmed = line.trim(); // Keep empty lines for formatting if (!trimmed) { validLines.push(line); stats.empty++; continue; } stats.total++; const lineNumber = i + 1; const validation = validateAdblockRule(trimmed); // Comments are always valid if (validation.format === 'comment') { validLines.push(line); stats.valid++; stats.comments++; continue; } if (validation.isValid) { // Check for duplicates if requested if (removeDuplicates) { if (seenRules.has(trimmed)) { duplicateLines.push({ line: trimmed, lineNumber }); stats.duplicates++; if (forceDebug) { console.log(formatLogMessage('debug', `${CLEAN_TAG} Removing duplicate line ${lineNumber}: ${trimmed}`)); } continue; // Skip duplicate } else { seenRules.add(trimmed); } } validLines.push(line); stats.valid++; } else { invalidLines.push({ line: trimmed, lineNumber, error: validation.error }); stats.invalid++; if (forceDebug) { console.log(formatLogMessage('debug', `${CLEAN_TAG} Removing invalid line ${lineNumber}: ${trimmed} (${validation.error})`)); } } } stats.removed = stats.invalid + stats.duplicates; // Log cleaning results if (!silentMode) { if (forceDebug) { console.log(formatLogMessage('debug', `Cleaning results for ${filePath}:`)); console.log(formatLogMessage('debug', ` Total lines processed: ${stats.total}`)); console.log(formatLogMessage('debug', ` Valid rules: ${stats.valid} (${stats.comments} comments)`)); console.log(formatLogMessage('debug', ` Invalid rules: ${stats.invalid}`)); console.log(formatLogMessage('debug', ` Duplicates: ${stats.duplicates}`)); console.log(formatLogMessage('debug', ` Total removed: ${stats.removed}`)); } if (invalidLines.length > 0 && forceDebug) { console.log(formatLogMessage('warn', `Invalid lines found:`)); invalidLines.slice(0, 5).forEach(item => { console.log(formatLogMessage('warn', ` Line ${item.lineNumber}: ${item.error}`)); }); if (invalidLines.length > 5) { console.log(formatLogMessage('warn', ` ... and ${invalidLines.length - 5} more invalid lines`)); } } } // Create cleaned content const cleanedContent = validLines.join('\n'); // Determine output path const finalOutputPath = outputPath || filePath; // Create backup if requested and not in dry run mode if (backupOriginal && !dryRun && finalOutputPath === filePath) { try { const backupPath = `${filePath}.backup`; fs.copyFileSync(filePath, backupPath); if (forceDebug) { console.log(formatLogMessage('debug', `Created backup: ${backupPath}`)); } } catch (backupErr) { return { success: false, error: `Failed to create backup: ${backupErr.message}`, stats }; } } // Write cleaned file (unless dry run) if (!dryRun) { try { fs.writeFileSync(finalOutputPath, cleanedContent); if (forceDebug) { console.log(formatLogMessage('debug', `Wrote cleaned file: ${finalOutputPath}`)); } } catch (writeErr) { return { success: false, error: `Failed to write cleaned file: ${writeErr.message}`, stats }; } } return { success: true, stats, invalidLines, duplicateLines, modified: stats.removed > 0, wouldModify: dryRun && stats.removed > 0, backupCreated: backupOriginal && !dryRun && finalOutputPath === filePath }; } /** * Validates full configuration object * @param {object} config - Complete configuration object * @param {object} options - Validation options * @returns {object} Comprehensive validation result */ function validateFullConfig(config, options = {}) { const { forceDebug = false, silentMode = false } = options; const globalErrors = []; const siteValidations = []; // Validate global configuration if (!config) { return { isValid: false, globalErrors: ['Configuration object is required'], siteValidations: [], summary: { totalSites: 0, validSites: 0, sitesWithErrors: 0, sitesWithWarnings: 0 } }; } // Validate sites array if (!config.sites || !Array.isArray(config.sites)) { globalErrors.push('Configuration must contain a "sites" array'); } else if (config.sites.length === 0) { globalErrors.push('Sites array cannot be empty'); } // Validate global blocked patterns if (config.blocked && !Array.isArray(config.blocked)) { globalErrors.push('Global "blocked" must be an array'); } else if (config.blocked) { config.blocked.forEach((pattern, index) => { const validation = validateRegexPattern(pattern); if (!validation.isValid) { globalErrors.push(`Global blocked pattern ${index}: ${validation.error}`); } }); } // Validate global ignore_similar settings if (config.ignore_similar !== undefined && typeof config.ignore_similar !== 'boolean') { globalErrors.push('Global "ignore_similar" must be a boolean (true/false)'); } if (config.ignore_similar_threshold !== undefined) { if (typeof config.ignore_similar_threshold !== 'number' || config.ignore_similar_threshold < 0 || config.ignore_similar_threshold > 100) { globalErrors.push('Global "ignore_similar_threshold" must be a number between 0 and 100'); } } if (config.ignore_similar_ignored_domains !== undefined && typeof config.ignore_similar_ignored_domains !== 'boolean') { globalErrors.push('Global "ignore_similar_ignored_domains" must be a boolean (true/false)'); } // Validate individual sites if (config.sites && Array.isArray(config.sites)) { config.sites.forEach((site, index) => { const siteValidation = validateSiteConfig(site, index); siteValidations.push(siteValidation); }); } // Calculate summary const summary = { totalSites: siteValidations.length, validSites: siteValidations.filter(v => v.isValid).length, sitesWithErrors: siteValidations.filter(v => v.errors.length > 0).length, sitesWithWarnings: siteValidations.filter(v => v.warnings.length > 0).length }; const isValid = globalErrors.length === 0 && summary.sitesWithErrors === 0; return { isValid, globalErrors, siteValidations, summary }; } /** * Test domain validation with known test cases * @returns {boolean} True if all tests pass */ function testDomainValidation() { const testCases = [ // Valid domains { domain: 'example.com', expected: true }, { domain: 'sub.example.com', expected: true }, { domain: 'test-site.co.uk', expected: true }, { domain: '192.168.1.1', expected: true }, // IPv4 { domain: '2001:db8::1', expected: true }, // IPv6 // Invalid domains { domain: '', expected: false }, { domain: 'example', expected: false }, { domain: '.example.com', expected: false }, { domain: 'example.com.', expected: false }, { domain: 'ex..ample.com', expected: false }, { domain: '-example.com', expected: false } ]; let allPassed = true; testCases.forEach(({ domain, expected }) => { const result = isValidDomain(domain); if (result !== expected) { console.error(`Test failed for domain "${domain}": expected ${expected}, got ${result}`); allPassed = false; } }); return allPassed; } // Public surface used by nwss.js (validateRulesetFile, validateFullConfig, // testDomainValidation, cleanRulesetFile). The rest (isValidDomain, // isValidDomainLabel, isValidTLD, isIPAddress, isIPv4, isIPv6, // validateRegexPattern, validateAdblockModifiers, validateAdblockRule, // validateSiteConfig) stay internal-helper-but-exported for now since // downstream callers MAY import them via the dotted path even if grep // shows no current consumers — domain validators are the kind of thing // that gets added to in future. testAdblockValidation and // formatDomainWithValidation were removed entirely (zero callers // anywhere; formatDomainWithValidation looked like an old implementation // superseded by lib/output.js). module.exports = { isValidDomain, isValidDomainLabel, isValidTLD, isIPAddress, isIPv4, isIPv6, validateRegexPattern, validateAdblockModifiers, validateAdblockRule, validateRulesetFile, cleanRulesetFile, validateSiteConfig, validateFullConfig, testDomainValidation };