UNPKG

@fanboynz/network-scanner

Version:

A Puppeteer-based network scanner for analyzing web traffic, generating adblock filter rules, and identifying third-party requests. Features include fingerprint spoofing, Cloudflare bypass, content analysis with curl/grep, and multiple output formats.

873 lines (775 loc) 31.1 kB
// === Adblock Rules Parser (adblock_rules.js) v2.2 - Complete Optimization === // Supports EasyList/AdBlock Plus filter syntax // Optimizations: Map domains + URL cache + skip third-party calc + cached hostname split const fs = require('fs'); const psl = require('psl'); // Hoisted constants — avoid recreating per rule (~80K times for EasyList) const COSMETIC_OPTIONS = new Set(['generichide', 'elemhide', 'specifichide', 'genericblock']); const PARSE_TYPE_MAP = { 'script': 'script', 'stylesheet': 'stylesheet', 'css': 'stylesheet', 'image': 'image', 'xmlhttprequest': 'xhr', 'xhr': 'xhr', 'font': 'font', 'media': 'media', 'websocket': 'websocket', 'subdocument': 'subdocument', 'document': 'document', 'ping': 'ping', 'other': 'other' }; /** * Simple LRU cache for URL parsing results * Prevents memory leaks with fixed size limit */ class URLCache { constructor(maxSize = 1000) { this.cache = new Map(); this.maxSize = maxSize; } get(url) { return this.cache.get(url); } set(url, value) { // LRU eviction: if at max size, delete oldest entry if (this.cache.size >= this.maxSize) { const firstKey = this.cache.keys().next().value; this.cache.delete(firstKey); } this.cache.set(url, value); } clear() { this.cache.clear(); } getStats() { return { size: this.cache.size, maxSize: this.maxSize }; } } /** * Parses adblock filter list and creates matcher * @param {string} filePath - Path to filter list file * @param {Object} options - Parser options * @returns {Object} Rule matcher with matching functions */ function parseAdblockRules(filePath, options = {}) { const { enableLogging = false, caseSensitive = false } = options; let fileContent; try { fileContent = fs.readFileSync(filePath, 'utf-8'); } catch (err) { throw new Error(`Adblock rules file not found: ${filePath}`); } const lines = fileContent.split('\n'); const rules = { domainMap: new Map(), // ||domain.com^ - Exact domains for O(1) lookup domainRules: [], // ||*.domain.com^ - Wildcard domains (fallback) thirdPartyRules: [], // ||domain.com^$third-party firstPartyRules: [], pathRules: [], // /ads/* scriptRules: [], // .js$script regexRules: [], // /regex/ whitelist: [], // @@||domain.com^ - Wildcard whitelist whitelistMap: new Map(), // Exact whitelist domains for O(1) lookup elementHiding: [], // ##.ad-class (not used for network blocking) stats: { total: 0, domain: 0, domainMapEntries: 0, // Exact domain matches in Map thirdParty: 0, firstParty: 0, path: 0, script: 0, regex: 0, whitelist: 0, elementHiding: 0, comments: 0, invalid: 0 } }; for (let line of lines) { line = line.trim(); // Skip empty lines if (!line) continue; // Skip comments if (line.startsWith('!') || line.startsWith('#')) { rules.stats.comments++; continue; } // Skip element hiding rules (cosmetic filters) if (line.includes('##') || line.includes('#@#')) { rules.stats.elementHiding++; continue; } // Skip rules with cosmetic-only options (not for network blocking) // These options only affect element hiding, not network requests let hasCosmeticOption = false; for (const opt of COSMETIC_OPTIONS) { if (line.includes(`$${opt}`) || line.includes(`,${opt}`)) { hasCosmeticOption = true; break; } } if (hasCosmeticOption) { rules.stats.elementHiding++; continue; } rules.stats.total++; try { // Whitelist rules (exception rules) if (line.startsWith('@@')) { const cleanLine = line.substring(2); const parsedRule = parseRule(cleanLine, true, enableLogging); // Store exact domains in Map for O(1) lookup, wildcards in array if (parsedRule.isDomain && parsedRule.domain && !parsedRule.domain.includes('*')) { rules.whitelistMap.set(parsedRule.domain.toLowerCase(), parsedRule); } else { rules.whitelist.push(parsedRule); } rules.stats.whitelist++; continue; } // Regular blocking rules const parsedRule = parseRule(line, false, enableLogging); // Categorize based on rule type if (parsedRule.isThirdParty) { rules.thirdPartyRules.push(parsedRule); rules.stats.thirdParty++; } else if (parsedRule.isFirstParty) { rules.firstPartyRules.push(parsedRule); rules.stats.firstParty++; } else if (parsedRule.isDomain) { // Store exact domains in Map for O(1) lookup, wildcards in array if (parsedRule.domain && !parsedRule.domain.includes('*')) { rules.domainMap.set(parsedRule.domain.toLowerCase(), parsedRule); rules.stats.domainMapEntries++; } else { rules.domainRules.push(parsedRule); } rules.stats.domain++; } else if (parsedRule.isScript) { rules.scriptRules.push(parsedRule); rules.stats.script++; } else if (parsedRule.isRegex) { rules.regexRules.push(parsedRule); rules.stats.regex++; } else { rules.pathRules.push(parsedRule); rules.stats.path++; } } catch (err) { rules.stats.invalid++; if (enableLogging) { console.log(`[Adblock] Failed to parse rule: ${line} - ${err.message}`); } } } if (enableLogging) { console.log(`[Adblock] Loaded ${rules.stats.total} rules:`); console.log(` - Domain rules: ${rules.stats.domain}`); console.log(` • Exact matches (Map): ${rules.stats.domainMapEntries}`); console.log(` • Wildcard patterns (Array): ${rules.domainRules.length}`); console.log(` - Third-party rules: ${rules.stats.thirdParty}`); console.log(` - First-party rules: ${rules.stats.firstParty}`); console.log(` - Path rules: ${rules.stats.path}`); console.log(` - Script rules: ${rules.stats.script}`); console.log(` - Regex rules: ${rules.stats.regex}`); console.log(` - Whitelist rules: ${rules.stats.whitelist}`); console.log(` - Comments/Element hiding: ${rules.stats.comments + rules.stats.elementHiding}`); console.log(` - Invalid rules: ${rules.stats.invalid}`); } return createMatcher(rules, { enableLogging, caseSensitive }); } /** * Parses individual adblock rule * @param {string} rule - Raw rule string * @param {boolean} isWhitelist - Whether this is a whitelist rule * @returns {Object} Parsed rule object */ function parseRule(rule, isWhitelist, enableLogging = false) { const parsed = { raw: enableLogging ? rule : null, // Only store for logging — saves memory on large lists isWhitelist, isDomain: false, isThirdParty: false, isFirstParty: false, isScript: false, resourceTypes: null, // Set of allowed resource types, null = all types excludedResourceTypes: null, // Set of excluded resource types ($~script, $~image) isRegex: false, domainRestrictions: null, // { include: ['site.com'], exclude: ['~site.com'] } pattern: '', matcher: null }; // Split rule and options ($option1,option2) let [pattern, optionsStr] = rule.split('$'); parsed.pattern = pattern; // Parse options into local object (not stored on parsed — freed after this block) if (optionsStr) { const options = optionsStr.split(','); const parsedOptions = {}; for (const opt of options) { const [key, value] = opt.split('='); const trimmedKey = key.trim(); if (!COSMETIC_OPTIONS.has(trimmedKey)) { parsedOptions[trimmedKey] = value ? value.trim() : true; } } // Check for third-party option if (parsedOptions['third-party'] || parsedOptions['3p']) { parsed.isThirdParty = true; } // Check for first-party option ($first-party, $1p, $~third-party) if (parsedOptions['first-party'] || parsedOptions['1p'] || parsedOptions['~third-party']) { parsed.isFirstParty = true; } // Parse resource type options using module-level PARSE_TYPE_MAP const matchedTypes = Object.keys(parsedOptions) .filter(key => PARSE_TYPE_MAP[key]) .map(key => PARSE_TYPE_MAP[key]); const excludedTypes = Object.keys(parsedOptions) .filter(key => key.startsWith('~') && PARSE_TYPE_MAP[key.substring(1)]) .map(key => PARSE_TYPE_MAP[key.substring(1)]); if (matchedTypes.length > 0) { // $document rules act as full domain blocks — no resource type restriction if (matchedTypes.length === 1 && matchedTypes[0] === 'document') { // Don't set resourceTypes — treat as standard block matching all types } else { parsed.resourceTypes = new Set(matchedTypes); } if (parsedOptions['script']) { parsed.isScript = true; } } if (excludedTypes.length > 0) { parsed.excludedResourceTypes = new Set(excludedTypes); } // Parse domain option: $domain=site1.com|site2.com|~excluded.com if (parsedOptions['domain']) { const domainList = parsedOptions['domain']; const domains = domainList.split('|').map(d => d.trim()).filter(d => d); const include = []; const exclude = []; for (const domain of domains) { if (domain.startsWith('~')) { exclude.push(domain.substring(1).toLowerCase()); } else { include.push(domain.toLowerCase()); } } parsed.domainRestrictions = { include: include.length > 0 ? include : null, exclude: exclude.length > 0 ? exclude : null }; } // parsedOptions goes out of scope here — GC can reclaim } // Domain rules: ||domain.com^ or ||domain.com if (pattern.startsWith('||')) { const domain = pattern.substring(2).replace(/[\^\/\*].*$/, ''); const afterDomain = pattern.substring(2 + domain.length); if (!afterDomain || afterDomain === '^') { // Pure domain rule: ||domain.com^ or ||domain.com parsed.isDomain = true; parsed.domain = domain; parsed.matcher = createDomainMatcher(domain); } else { // Domain + path rule: ||domain.com/path or ||domain.com^*path // Split into fast domain check + path pattern to avoid full-URL regex parsed.isDomain = true; parsed.domain = domain; const domainMatcher = createDomainMatcher(domain); const pathMatcher = createPatternMatcher(afterDomain); parsed.matcher = (url, hostname) => { if (!domainMatcher(url, hostname)) return false; // Extract path portion after hostname for path matching const hostIdx = url.indexOf(hostname); if (hostIdx === -1) return false; const pathPart = url.substring(hostIdx + hostname.length); return pathMatcher(pathPart); }; } } // Regex rules: /pattern/ else if (pattern.startsWith('/') && pattern.endsWith('/')) { parsed.isRegex = true; const cached = _regexCache.get(pattern); if (cached) { parsed.matcher = cached; } else { const regexPattern = pattern.substring(1, pattern.length - 1); const regex = new RegExp(regexPattern, 'i'); parsed.matcher = (url) => regex.test(url); _regexCache.set(pattern, parsed.matcher); } } // Path/wildcard rules: /ads/* or ad.js else { parsed.matcher = createPatternMatcher(pattern); } return parsed; } /** * Creates a domain matcher function * @param {string} domain - Domain to match * @returns {Function} Matcher function */ function createDomainMatcher(domain) { const lowerDomain = domain.toLowerCase(); const dotDomain = '.' + lowerDomain; // hostname is already lowercased by shouldBlock() before being passed here return (url, hostname) => { return hostname === lowerDomain || hostname.endsWith(dotDomain); }; } /** * Shared regex cache — deduplicates identical compiled patterns across rules * Large lists (EasyList ~80K rules) often have thousands of duplicate patterns */ const _regexCache = new Map(); /** * Creates a pattern matcher for path/wildcard rules * @param {string} pattern - Pattern with wildcards * @returns {Function} Matcher function */ function createPatternMatcher(pattern) { // Check cache for already-compiled identical pattern const cached = _regexCache.get(pattern); if (cached) return cached; // Convert adblock pattern to regex // * matches anything // ^ matches separator (/, ?, &, =, :) // | matches start/end of URL // Handle | anchors before escaping — only at very start/end of pattern let anchorStart = false; let anchorEnd = false; if (pattern.startsWith('|') && !pattern.startsWith('||')) { anchorStart = true; pattern = pattern.substring(1); } if (pattern.endsWith('|')) { anchorEnd = true; pattern = pattern.slice(0, -1); } let regexPattern = pattern .replace(/[.+?{}()[\]\\|]/g, '\\$&') // Escape regex special chars including literal | .replace(/\*/g, '.*') // * -> .* .replace(/\^/g, '[/?&=:]'); // ^ -> separator chars if (anchorStart) regexPattern = '^' + regexPattern; if (anchorEnd) regexPattern = regexPattern + '$'; const regex = new RegExp(regexPattern, 'i'); const matcher = (url) => regex.test(url); _regexCache.set(pattern, matcher); return matcher; } /** * Creates rule matcher with shouldBlock function * @param {Object} rules - Parsed rules object * @param {Object} options - Matcher options * @returns {Object} Matcher with shouldBlock function */ function createMatcher(rules, options = {}) { const { enableLogging = false, caseSensitive = false } = options; const urlCache = new URLCache(16000); let cacheHits = 0; let cacheMisses = 0; const hasPartyRules = rules.thirdPartyRules.length > 0 || rules.firstPartyRules.length > 0; // Result cache with LRU eviction — evicts oldest entries one at a time // instead of clearing everything when full const resultCache = new URLCache(32000); function resultCacheGet(url, sourceUrl, resourceType) { return resultCache.get(url + '\0' + sourceUrl + '\0' + resourceType); } function resultCacheSet(url, sourceUrl, resourceType, result) { resultCache.set(url + '\0' + sourceUrl + '\0' + resourceType, result); } return { rules, /** * Check if URL should be blocked * @param {string} url - URL to check * @param {string} sourceUrl - Source page URL (for third-party detection) * @param {string} resourceType - Type of resource (script, image, etc) * @returns {Object} { blocked: boolean, rule: string|null, reason: string } */ shouldBlock(url, sourceUrl = '', resourceType = '') { try { // Check result cache — same URL+source+type always produces same result const cachedResult = resultCacheGet(url, sourceUrl, resourceType); if (cachedResult) { cacheHits++; return cachedResult; } cacheMisses++; // OPTIMIZATION: Check cache first for URL parsing (60% faster) let cachedData = urlCache.get(url); let hostname, lowerHostname; if (cachedData) { hostname = cachedData.hostname; lowerHostname = cachedData.lowerHostname; cacheHits++; } else { // Parse URL and cache result const urlObj = new URL(url); hostname = urlObj.hostname; lowerHostname = hostname.toLowerCase(); urlCache.set(url, { hostname, lowerHostname }); cacheMisses++; } // Lazy parent domain computation — only built when exact Map lookup misses let parentDomains = null; function getParentDomains() { if (parentDomains) return parentDomains; parentDomains = []; const hostnameParts = lowerHostname.split('.'); for (let i = 1; i < hostnameParts.length; i++) { parentDomains.push(hostnameParts.slice(i).join('.')); } return parentDomains; } // Extract and cache source page domain for $domain and third-party checks let sourceDomain = null; if (sourceUrl) { const cachedSourceData = urlCache.get(sourceUrl); if (cachedSourceData) { sourceDomain = cachedSourceData.lowerHostname; cacheHits++; } else { // Parse and cache sourceUrl try { const sourceUrlObj = new URL(sourceUrl); sourceDomain = sourceUrlObj.hostname.toLowerCase(); // Cache sourceUrl parsing result (same as request URLs) urlCache.set(sourceUrl, { hostname: sourceUrlObj.hostname, lowerHostname: sourceDomain }); cacheMisses++; } catch (err) { // Invalid sourceUrl, leave as null } } } // Calculate third-party status using already-parsed hostnames const isThirdParty = (sourceDomain && hasPartyRules) ? getBaseDomain(lowerHostname) !== getBaseDomain(sourceDomain) : false; // === WHITELIST CHECK (exception rules take precedence) === // Fast path: Check exact domain in Map (O(1)) let rule = rules.whitelistMap.get(lowerHostname); // V8: Single Map lookup if (rule) { if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) { if (enableLogging) { console.log(`[Adblock] Whitelisted: ${url} (${rule.raw || rule.pattern})`); } const r = { blocked: false, rule: rule.raw || rule.pattern, reason: 'whitelisted' }; resultCacheSet(url, sourceUrl, resourceType, r); return r; } } // Check parent domains for subdomain matches (e.g., sub.example.com -> example.com) const parents = getParentDomains(); for (let i = 0; i < parents.length; i++) { rule = rules.whitelistMap.get(parents[i]); if (rule) { if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) { if (enableLogging) { console.log(`[Adblock] Whitelisted: ${url} (${rule.raw || rule.pattern})`); } const r = { blocked: false, rule: rule.raw || rule.pattern, reason: 'whitelisted' }; resultCacheSet(url, sourceUrl, resourceType, r); return r; } } } // Slow path: Check wildcard whitelist patterns in array const whitelistLen = rules.whitelist.length; // V8: Cache length + indexed access for (let i = 0; i < whitelistLen; i++) { const rule = rules.whitelist[i]; if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) { if (enableLogging) { console.log(`[Adblock] Whitelisted: ${url} (${rule.raw || rule.pattern})`); } const r = { blocked: false, rule: rule.raw || rule.pattern, reason: 'whitelisted' }; resultCacheSet(url, sourceUrl, resourceType, r); return r; } } // === DOMAIN BLOCKING CHECK === // Fast path: Check exact domain in Map (O(1)) rule = rules.domainMap.get(lowerHostname); // V8: Single Map lookup if (rule) { if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) { if (enableLogging) { console.log(`[Adblock] Blocked domain: ${url} (${rule.raw || rule.pattern})`); } const r = { blocked: true, rule: rule.raw || rule.pattern, reason: 'domain_rule' }; resultCacheSet(url, sourceUrl, resourceType, r); return r; } } // Check parent domains for subdomain matches (e.g., ads.example.com -> example.com) for (let i = 0; i < parents.length; i++) { rule = rules.domainMap.get(parents[i]); if (rule) { if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) { if (enableLogging) { console.log(`[Adblock] Blocked domain: ${url} (${rule.raw || rule.pattern})`); } const r = { blocked: true, rule: rule.raw || rule.pattern, reason: 'domain_rule' }; resultCacheSet(url, sourceUrl, resourceType, r); return r; } } } // Slow path: Check wildcard domain patterns in array const domainRulesLen = rules.domainRules.length; // V8: Cache length + indexed access for (let i = 0; i < domainRulesLen; i++) { const rule = rules.domainRules[i]; if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) { if (enableLogging) { console.log(`[Adblock] Blocked domain: ${url} (${rule.raw || rule.pattern})`); } const r = { blocked: true, rule: rule.raw || rule.pattern, reason: 'domain_rule' }; resultCacheSet(url, sourceUrl, resourceType, r); return r; } } // Check third-party rules if (isThirdParty) { const thirdPartyLen = rules.thirdPartyRules.length; // V8: Cache length for (let i = 0; i < thirdPartyLen; i++) { const rule = rules.thirdPartyRules[i]; if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) { if (enableLogging) { console.log(`[Adblock] Blocked third-party: ${url} (${rule.raw || rule.pattern})`); } const r = { blocked: true, rule: rule.raw || rule.pattern, reason: 'third_party_rule' }; resultCacheSet(url, sourceUrl, resourceType, r); return r; } } } // Check first-party rules if (!isThirdParty) { const firstPartyLen = rules.firstPartyRules.length; for (let i = 0; i < firstPartyLen; i++) { const rule = rules.firstPartyRules[i]; if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) { if (enableLogging) { console.log(`[Adblock] Blocked first-party: ${url} (${rule.raw || rule.pattern})`); } const r = { blocked: true, rule: rule.raw || rule.pattern, reason: 'first_party_rule' }; resultCacheSet(url, sourceUrl, resourceType, r); return r; } } } // Check script rules if (resourceType === 'script' || url.endsWith('.js')) { const scriptRulesLen = rules.scriptRules.length; // V8: Cache length for (let i = 0; i < scriptRulesLen; i++) { const rule = rules.scriptRules[i]; if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) { if (enableLogging) { console.log(`[Adblock] Blocked script: ${url} (${rule.raw || rule.pattern})`); } const r = { blocked: true, rule: rule.raw || rule.pattern, reason: 'script_rule' }; resultCacheSet(url, sourceUrl, resourceType, r); return r; } } } // Check path rules const pathRulesLen = rules.pathRules.length; // V8: Cache length for (let i = 0; i < pathRulesLen; i++) { const rule = rules.pathRules[i]; if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) { if (enableLogging) { console.log(`[Adblock] Blocked path: ${url} (${rule.raw || rule.pattern})`); } const r = { blocked: true, rule: rule.raw || rule.pattern, reason: 'path_rule' }; resultCacheSet(url, sourceUrl, resourceType, r); return r; } } // Check regex rules (most expensive, check last) const regexRulesLen = rules.regexRules.length; // V8: Cache length for (let i = 0; i < regexRulesLen; i++) { const rule = rules.regexRules[i]; if (matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain)) { if (enableLogging) { console.log(`[Adblock] Blocked regex: ${url} (${rule.raw || rule.pattern})`); } const r = { blocked: true, rule: rule.raw || rule.pattern, reason: 'regex_rule' }; resultCacheSet(url, sourceUrl, resourceType, r); return r; } } // No match - allow request const r = { blocked: false, rule: null, reason: 'no_match' }; resultCacheSet(url, sourceUrl, resourceType, r); return r; } catch (err) { if (enableLogging) { console.log(`[Adblock] Error checking ${url}: ${err.message}`); } // On error, allow request return { blocked: false, rule: null, reason: 'error' }; } }, /** * Get statistics about loaded rules * @returns {Object} Statistics object */ getStats() { const hitRate = cacheHits + cacheMisses > 0 ? ((cacheHits / (cacheHits + cacheMisses)) * 100).toFixed(1) + '%' : '0%'; return { ...rules.stats, cache: { hits: cacheHits, misses: cacheMisses, hitRate: hitRate, urlCacheSize: urlCache.cache.size, resultCacheSize: resultCache.cache.size, maxSize: urlCache.maxSize } }; } }; } /** * Check if rule's domain restrictions match the source domain * @param {Object} rule - Rule with potential domainRestrictions * @param {string|null} sourceDomain - Domain of the page making the request (lowercase) * @returns {boolean} True if rule should apply on this source domain */ function matchesDomainRestrictions(rule, sourceDomain) { // No domain restrictions = applies everywhere if (!rule.domainRestrictions) { return true; } // No source domain provided = can't check restrictions, allow for safety if (!sourceDomain) { return true; } const { include, exclude } = rule.domainRestrictions; // V8 OPT ADVANCED: For single-domain restrictions, skip loop overhead // This is the most common case (~80% of domain restrictions) // Fast path: Single exclusion if (exclude && exclude.length === 1 && (!include || include.length === 0)) { const excludedDomain = exclude[0]; if (sourceDomain === excludedDomain || sourceDomain.endsWith('.' + excludedDomain)) { return false; } return true; } // Fast path: Single inclusion if (include && include.length === 1 && (!exclude || exclude.length === 0)) { const includedDomain = include[0]; return sourceDomain === includedDomain || sourceDomain.endsWith('.' + includedDomain); } // Slow path: Multiple domains (use indexed loops) // V8 OPT: Check exclusions first (higher priority) - use indexed loop // If domain is explicitly excluded, rule does NOT apply if (exclude && exclude.length > 0) { const excludeLen = exclude.length; for (let i = 0; i < excludeLen; i++) { const excludedDomain = exclude[i]; // Exact match or subdomain match if (sourceDomain === excludedDomain || sourceDomain.endsWith('.' + excludedDomain)) { return false; // Domain is excluded, rule should NOT apply } } } // V8 OPT: Check inclusions - use indexed loop // If there's an include list, domain MUST be in it if (include && include.length > 0) { const includeLen = include.length; for (let i = 0; i < includeLen; i++) { const includedDomain = include[i]; // Exact match or subdomain match if (sourceDomain === includedDomain || sourceDomain.endsWith('.' + includedDomain)) { return true; // Domain is included, rule SHOULD apply } } return false; // Domain not in include list, rule should NOT apply } // Has exclusions but no inclusions, and not excluded = applies return true; } // Module-level constant for resource type normalization (hot path) const RESOURCE_TYPE_ALIASES = { 'script': 'script', 'stylesheet': 'stylesheet', 'image': 'image', 'xhr': 'xhr', 'fetch': 'xhr', 'font': 'font', 'media': 'media', 'websocket': 'websocket', 'subdocument': 'subdocument', 'document': 'document', 'ping': 'ping', 'other': 'other' }; /** * Check if rule matches the given URL * @param {Object} rule - Parsed rule object * @param {string} url - URL to check * @param {string} hostname - Hostname of URL * @param {boolean} isThirdParty - Whether request is third-party * @param {string} resourceType - Resource type * @param {string|null} sourceDomain - Source page domain (for $domain option) * @returns {boolean} True if rule matches */ function matchesRule(rule, url, hostname, isThirdParty, resourceType, sourceDomain) { // Check domain restrictions first if (!matchesDomainRestrictions(rule, sourceDomain)) { return false; } // Check third-party option if (rule.isThirdParty && !isThirdParty) { return false; } // Check first-party option if (rule.isFirstParty && isThirdParty) { return false; } // Normalize resource type once for both checks if (resourceType && (rule.resourceTypes || rule.excludedResourceTypes)) { const normalizedType = RESOURCE_TYPE_ALIASES[resourceType] || resourceType; // Check resource type restrictions if (rule.resourceTypes && !rule.resourceTypes.has(normalizedType)) { return false; } // Check negated resource type restrictions ($~script, $~image, etc.) if (rule.excludedResourceTypes && rule.excludedResourceTypes.has(normalizedType)) { return false; } } // Apply matcher function if (rule.isDomain) { return rule.matcher(url, hostname); } else { return rule.matcher(url); } } /** * Extract base domain from hostname using Public Suffix List * Correctly handles multi-part TLDs like .co.uk, .com.au, .com.br * @param {string} hostname - Full hostname * @returns {string} Base domain */ const _baseDomainCache = new Map(); function getBaseDomain(hostname) { const cached = _baseDomainCache.get(hostname); if (cached) return cached; const parsed = psl.parse(hostname); const result = (parsed && parsed.domain) ? parsed.domain : hostname; // Cap cache size if (_baseDomainCache.size > 10000) _baseDomainCache.clear(); _baseDomainCache.set(hostname, result); return result; } module.exports = { parseAdblockRules, getBaseDomain };