UNPKG

web-vuln-scanner

Version:

Advanced, lightweight web vulnerability scanner with smart detection and easy-to-use interface

982 lines (839 loc) 31.2 kB
const puppeteer = require('puppeteer'); const { URL } = require('url'); const debug = require('debug')('web-vuln-scanner:puppeteer'); class PuppeteerCrawler { constructor(options = {}) { this.baseUrl = options.baseUrl; this.baseUrlObj = new URL(this.baseUrl); this.depth = options.depth || 3; this.maxPages = options.maxPages || 500; this.timeout = options.timeout || 30000; this.userAgent = options.userAgent || 'WebVulnScanner/2.0 (Security Testing)'; this.headers = options.headers || {}; this.cookies = options.cookies || []; this.includeSubdomains = options.includeSubdomains || false; this.screenshot = options.screenshot || false; this.interceptRequests = options.interceptRequests !== false; this.waitForJs = options.waitForJs || 3000; this.maxRetries = options.maxRetries || 2; // State management this.visited = new Set(); this.queue = [this.baseUrl]; this.foundUrls = new Set([this.baseUrl]); this.failedUrls = new Set(); this.pageData = new Map(); this.formData = new Map(); this.ajaxEndpoints = new Set(); this.websocketEndpoints = new Set(); this.apiEndpoints = new Set(); this.jsErrors = new Map(); this.networkRequests = new Map(); // Browser management this.browser = null; this.activeTabs = new Set(); } async initBrowser() { if (!this.browser) { debug('Launching browser with enhanced configuration'); this.browser = await puppeteer.launch({ headless: 'new', args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-accelerated-2d-canvas', '--no-first-run', '--no-zygote', '--disable-gpu', '--disable-web-security', '--disable-features=VizDisplayCompositor', '--disable-background-timer-throttling', '--disable-backgrounding-occluded-windows', '--disable-renderer-backgrounding', '--disable-field-trial-config', '--disable-ipc-flooding-protection' ], timeout: 60000, ignoreDefaultArgs: ['--disable-extensions'] }); // Handle browser disconnect this.browser.on('disconnected', () => { debug('Browser disconnected'); this.browser = null; }); } return this.browser; } async createPage() { const browser = await this.initBrowser(); const page = await browser.newPage(); this.activeTabs.add(page); // Enhanced page configuration await page.setViewport({ width: 1366, height: 768 }); await page.setUserAgent(this.userAgent); // Set extra headers if (Object.keys(this.headers).length > 0) { await page.setExtraHTTPHeaders(this.headers); } // Set cookies if (this.cookies.length > 0) { await page.setCookie(...this.cookies); } // Request/response interception for comprehensive monitoring if (this.interceptRequests) { await page.setRequestInterception(true); page.on('request', (request) => { // Log all requests for endpoint discovery const url = request.url(); const method = request.method(); if (this.isRelevantEndpoint(url)) { this.networkRequests.set(url, { method, headers: request.headers(), postData: request.postData(), timestamp: Date.now() }); // Detect API endpoints if (this.isApiEndpoint(url)) { this.apiEndpoints.add(url); } } request.continue(); }); page.on('response', (response) => { const url = response.url(); const status = response.status(); if (this.networkRequests.has(url)) { const requestData = this.networkRequests.get(url); requestData.status = status; requestData.responseHeaders = response.headers(); this.networkRequests.set(url, requestData); } }); } // Console message monitoring for errors and endpoints page.on('console', (msg) => { if (msg.type() === 'error') { const url = page.url(); if (!this.jsErrors.has(url)) { this.jsErrors.set(url, []); } this.jsErrors.get(url).push(msg.text()); } }); // Page error monitoring page.on('pageerror', (error) => { const url = page.url(); if (!this.jsErrors.has(url)) { this.jsErrors.set(url, []); } this.jsErrors.get(url).push(error.message); }); return page; } async closePage(page) { try { this.activeTabs.delete(page); await page.close(); } catch (error) { debug(`Error closing page: ${error.message}`); } } async closeBrowser() { if (this.browser) { try { // Close all active tabs for (const page of this.activeTabs) { await this.closePage(page); } await this.browser.close(); this.browser = null; } catch (error) { debug(`Error closing browser: ${error.message}`); } } } async crawl() { debug(`Starting comprehensive Puppeteer crawl: ${this.baseUrl}`); debug(`Configuration: depth=${this.depth}, maxPages=${this.maxPages}, timeout=${this.timeout}`); try { let currentDepth = 0; while (this.queue.length && this.visited.size < this.maxPages && currentDepth < this.depth) { const currentLevelUrls = [...this.queue]; this.queue = []; debug(`Processing depth ${currentDepth + 1}, ${currentLevelUrls.length} URLs`); // Process URLs in batches to manage resources const batchSize = 3; for (let i = 0; i < currentLevelUrls.length; i += batchSize) { const batch = currentLevelUrls.slice(i, i + batchSize); const promises = batch.map(url => this.crawlPage(url)); try { await Promise.allSettled(promises); } catch (error) { debug(`Batch processing error: ${error.message}`); } // Small delay between batches to avoid overwhelming the target await this.sleep(500); } currentDepth++; } // Additional discovery phases await this.discoverHiddenEndpoints(); await this.performInteractiveDiscovery(); debug(`Puppeteer crawl complete. Found ${this.foundUrls.size} URLs, ${this.failedUrls.size} failed`); return { urls: [...this.foundUrls], failed: [...this.failedUrls], pageData: Object.fromEntries(this.pageData), forms: Object.fromEntries(this.formData), ajaxEndpoints: [...this.ajaxEndpoints], apiEndpoints: [...this.apiEndpoints], websocketEndpoints: [...this.websocketEndpoints], networkRequests: Object.fromEntries(this.networkRequests), jsErrors: Object.fromEntries(this.jsErrors) }; } finally { await this.closeBrowser(); } } async crawlPage(url, retryCount = 0) { if (this.visited.has(url) || this.foundUrls.size >= this.maxPages) { return; } this.visited.add(url); debug(`Crawling: ${url} (attempt ${retryCount + 1})`); let page; try { page = await this.createPage(); // Navigate to the page with comprehensive waiting const response = await page.goto(url, { waitUntil: ['networkidle0', 'domcontentloaded'], timeout: this.timeout }); if (!response) { throw new Error('No response received'); } const status = response.status(); if (status >= 400) { debug(`HTTP ${status} at ${url}`); this.failedUrls.add(url); return; } // Wait for JavaScript execution and dynamic content await page.waitForTimeout(this.waitForJs); // Extract comprehensive page data await this.extractPageData(page, url); // Take screenshot if enabled if (this.screenshot && this.foundUrls.size < 20) { try { await page.screenshot({ path: `screenshots/${this.sanitizeFilename(url)}.png`, fullPage: true }); } catch (screenshotError) { debug(`Screenshot failed for ${url}: ${screenshotError.message}`); } } } catch (error) { if (retryCount < this.maxRetries) { debug(`Retrying ${url} (attempt ${retryCount + 2})`); await this.sleep(2000); return this.crawlPage(url, retryCount + 1); } debug(`Failed to crawl ${url}: ${error.message}`); this.failedUrls.add(url); } finally { if (page) { await this.closePage(page); } } } async extractPageData(page, url) { try { // Execute comprehensive data extraction in browser context const pageInfo = await page.evaluate(() => { const data = { title: document.title, url: window.location.href, links: [], forms: [], inputs: [], buttons: [], iframes: [], scripts: [], websockets: [], eventListeners: [], localStorage: {}, sessionStorage: {}, cookies: document.cookie, meta: [] }; // Extract all links document.querySelectorAll('a[href], area[href]').forEach((link, index) => { data.links.push({ href: link.href, text: link.textContent?.trim() || '', title: link.title || '', target: link.target || '', rel: link.rel || '' }); }); // Extract forms with detailed information document.querySelectorAll('form').forEach((form, index) => { const formData = { id: form.id || `form_${index}`, action: form.action || window.location.href, method: form.method?.toUpperCase() || 'GET', enctype: form.enctype || 'application/x-www-form-urlencoded', target: form.target || '', inputs: [] }; // Extract all form controls form.querySelectorAll('input, textarea, select, button').forEach(input => { const inputData = { name: input.name || '', type: input.type || 'text', value: input.value || '', placeholder: input.placeholder || '', required: input.required || false, disabled: input.disabled || false, readonly: input.readOnly || false, id: input.id || '', className: input.className || '' }; if (input.tagName === 'SELECT') { inputData.options = []; input.querySelectorAll('option').forEach(option => { inputData.options.push({ value: option.value, text: option.textContent, selected: option.selected }); }); } formData.inputs.push(inputData); }); data.forms.push(formData); }); // Extract all clickable elements document.querySelectorAll('button, input[type="button"], input[type="submit"], [onclick]').forEach(button => { data.buttons.push({ text: button.textContent?.trim() || button.value || '', type: button.type || '', onclick: button.getAttribute('onclick') || '', id: button.id || '', className: button.className || '' }); }); // Extract iframes document.querySelectorAll('iframe, frame').forEach(iframe => { data.iframes.push({ src: iframe.src || '', name: iframe.name || '', id: iframe.id || '' }); }); // Extract script sources document.querySelectorAll('script[src]').forEach(script => { data.scripts.push(script.src); }); // Extract meta information document.querySelectorAll('meta').forEach(meta => { data.meta.push({ name: meta.name || meta.getAttribute('property') || '', content: meta.content || '', httpEquiv: meta.httpEquiv || '' }); }); // Extract storage data (if accessible) try { for (let i = 0; i < localStorage.length; i++) { const key = localStorage.key(i); data.localStorage[key] = localStorage.getItem(key); } } catch (e) {} try { for (let i = 0; i < sessionStorage.length; i++) { const key = sessionStorage.key(i); data.sessionStorage[key] = sessionStorage.getItem(key); } } catch (e) {} return data; }); // Store page data this.pageData.set(url, pageInfo); this.formData.set(url, pageInfo.forms); // Process discovered links pageInfo.links.forEach(link => { const processedUrl = this.processFoundUrl(link.href, url); if (processedUrl && !this.visited.has(processedUrl)) { this.foundUrls.add(processedUrl); this.queue.push(processedUrl); } }); // Process form actions pageInfo.forms.forEach(form => { const processedUrl = this.processFoundUrl(form.action, url); if (processedUrl && !this.visited.has(processedUrl)) { this.foundUrls.add(processedUrl); this.queue.push(processedUrl); } }); // Extract AJAX endpoints from page await this.extractAjaxEndpoints(page, url); // Look for WebSocket connections await this.extractWebSocketEndpoints(page, url); } catch (error) { debug(`Error extracting page data from ${url}: ${error.message}`); } } async extractAjaxEndpoints(page, url) { try { // Override XMLHttpRequest and fetch to capture AJAX calls await page.evaluateOnNewDocument(() => { window.capturedRequests = []; // Intercept XMLHttpRequest const originalXHROpen = XMLHttpRequest.prototype.open; XMLHttpRequest.prototype.open = function(method, url, async, user, password) { window.capturedRequests.push({ type: 'xhr', method, url }); return originalXHROpen.apply(this, arguments); }; // Intercept fetch const originalFetch = window.fetch; window.fetch = function(input, init) { const url = typeof input === 'string' ? input : input.url; const method = init?.method || 'GET'; window.capturedRequests.push({ type: 'fetch', method, url }); return originalFetch.apply(this, arguments); }; }); // Trigger common AJAX patterns by interacting with the page await this.triggerAjaxCalls(page); // Extract captured requests const capturedRequests = await page.evaluate(() => window.capturedRequests || []); capturedRequests.forEach(request => { const processedUrl = this.processFoundUrl(request.url, url); if (processedUrl) { this.ajaxEndpoints.add(processedUrl); if (!this.visited.has(processedUrl)) { this.foundUrls.add(processedUrl); this.queue.push(processedUrl); } } }); } catch (error) { debug(`Error extracting AJAX endpoints from ${url}: ${error.message}`); } } async triggerAjaxCalls(page) { try { // Click on buttons and links that might trigger AJAX const clickableElements = await page.$('button, [onclick], .ajax, [data-ajax], [data-url]'); for (let element of clickableElements.slice(0, 5)) { try { await Promise.race([ element.click(), page.waitForTimeout(1000) ]); await page.waitForTimeout(500); } catch (e) {} } // Trigger form submissions const forms = await page.$('form'); for (let form of forms.slice(0, 3)) { try { await page.evaluate(form => { // Fill form with test data const inputs = form.querySelectorAll('input[type="text"], input[type="email"], textarea'); inputs.forEach(input => { if (input.type === 'email') { input.value = 'test@example.com'; } else { input.value = 'test'; } }); }, form); const submitButton = await form.$('input[type="submit"], button[type="submit"], button'); if (submitButton) { await Promise.race([ submitButton.click(), page.waitForTimeout(1000) ]); await page.waitForTimeout(1000); } } catch (e) {} } } catch (error) { debug(`Error triggering AJAX calls: ${error.message}`); } } async extractWebSocketEndpoints(page, url) { try { // Override WebSocket constructor to capture connections await page.evaluateOnNewDocument(() => { window.capturedWebSockets = []; const originalWebSocket = window.WebSocket; window.WebSocket = function(url, protocols) { window.capturedWebSockets.push({ url, protocols }); return new originalWebSocket(url, protocols); }; }); // Wait for any WebSocket connections to be established await page.waitForTimeout(2000); const webSockets = await page.evaluate(() => window.capturedWebSockets || []); webSockets.forEach(ws => { const processedUrl = this.processFoundUrl(ws.url, url); if (processedUrl) { this.websocketEndpoints.add(processedUrl); } }); } catch (error) { debug(`Error extracting WebSocket endpoints from ${url}: ${error.message}`); } } async discoverHiddenEndpoints(page) { debug('Discovering hidden endpoints through JavaScript analysis'); const commonEndpoints = [ '/api/v1/', '/api/v2/', '/rest/', '/graphql', '/swagger', '/admin/api/', '/backend/', '/internal/', '/private/', '/debug/', '/test/', '/dev/', '/.well-known/' ]; for (const endpoint of commonEndpoints) { if (this.foundUrls.size >= this.maxPages) break; const testUrl = new URL(endpoint, this.baseUrl).toString(); if (!this.visited.has(testUrl)) { this.foundUrls.add(testUrl); this.queue.push(testUrl); } } } async performInteractiveDiscovery() { debug('Performing interactive discovery on key pages'); // Get pages that are likely to have dynamic content const interactivePages = [...this.foundUrls] .filter(url => { const path = new URL(url).pathname.toLowerCase(); return path.includes('admin') || path.includes('dashboard') || path.includes('panel') || path.includes('manage') || path === '/' || path.includes('login'); }) .slice(0, 5); for (const url of interactivePages) { if (this.foundUrls.size >= this.maxPages) break; let page; try { page = await this.createPage(); await page.goto(url, { waitUntil: 'networkidle0' }); // Perform comprehensive interaction await this.performPageInteraction(page); // Enhanced discovery: Look for SPAs and dynamic routes await this.discoverSpaRoutes(page); // Extract data from JavaScript variables await this.extractJsVariables(page); } catch (error) { debug(`Error in interactive discovery for ${url}: ${error.message}`); } finally { if (page) { await this.closePage(page); } } } } async discoverSpaRoutes(page) { try { // Look for common SPA routing patterns const routes = await page.evaluate(() => { const discoveredRoutes = new Set(); // Angular routes if (window.ng && window.ng.getComponent) { try { const router = window.ng.getComponent(document.body)?.router; if (router && router.config) { router.config.forEach(route => { if (route.path) discoveredRoutes.add(route.path); }); } } catch (e) {} } // React Router routes if (window.React && window.ReactRouter) { try { // Look for route definitions in script tags document.querySelectorAll('script').forEach(script => { const content = script.textContent || script.innerHTML; const routeMatches = content.match(/path\s*:\s*["']([^"']+)["']/g); if (routeMatches) { routeMatches.forEach(match => { const route = match.match(/["']([^"']+)["']/)?.[1]; if (route) discoveredRoutes.add(route); }); } }); } catch (e) {} } // Vue Router routes if (window.Vue && window.VueRouter) { try { if (window.$router && window.$router.options.routes) { window.$router.options.routes.forEach(route => { if (route.path) discoveredRoutes.add(route.path); }); } } catch (e) {} } // Look for href patterns in JavaScript document.querySelectorAll('script').forEach(script => { const content = script.textContent || script.innerHTML; // Match common routing patterns const patterns = [ /["']\/[a-zA-Z0-9\-_\/]+["']/g, /route\s*:\s*["']([^"']+)["']/g, /path\s*:\s*["']([^"']+)["']/g ]; patterns.forEach(pattern => { const matches = content.match(pattern); if (matches) { matches.forEach(match => { const route = match.replace(/["']/g, ''); if (route.startsWith('/') && route.length > 1 && route.length < 100) { discoveredRoutes.add(route); } }); } }); }); return Array.from(discoveredRoutes); }); // Add discovered routes to crawl queue routes.forEach(route => { try { const fullUrl = new URL(route, page.url()).toString(); if (!this.visited.has(fullUrl) && this.foundUrls.size < this.maxPages) { this.foundUrls.add(fullUrl); this.queue.push(fullUrl); } } catch (e) {} }); debug(`Discovered ${routes.length} SPA routes`); } catch (error) { debug(`Error discovering SPA routes: ${error.message}`); } } async extractJsVariables(page) { try { const jsData = await page.evaluate(() => { const data = { globalVars: {}, endpoints: [], apiKeys: [], secrets: [] }; // Extract global variables that might contain endpoints Object.keys(window).forEach(key => { try { const value = window[key]; if (typeof value === 'string' && ( value.includes('/api/') || value.includes('/rest/') || value.includes('http://') || value.includes('https://') )) { data.globalVars[key] = value; } } catch (e) {} }); // Look for endpoints in script content document.querySelectorAll('script').forEach(script => { const content = script.textContent || script.innerHTML; // Find API endpoints const endpointPatterns = [ /["']https?:\/\/[^"']+\/api\/[^"']+["']/g, /["']\/api\/[^"']+["']/g, /["']\/rest\/[^"']+["']/g, /baseURL\s*:\s*["']([^"']+)["']/g, /apiUrl\s*:\s*["']([^"']+)["']/g ]; endpointPatterns.forEach(pattern => { const matches = content.match(pattern); if (matches) { matches.forEach(match => { const endpoint = match.replace(/["']/g, '').replace(/.*:\s*/, ''); if (endpoint.includes('/') && endpoint.length > 1) { data.endpoints.push(endpoint); } }); } }); // Look for potential API keys or secrets const secretPatterns = [ /api[_-]?key\s*[:=]\s*["']([^"']{10,})["']/gi, /secret\s*[:=]\s*["']([^"']{10,})["']/gi, /token\s*[:=]\s*["']([^"']{10,})["']/gi, /password\s*[:=]\s*["']([^"']{5,})["']/gi ]; secretPatterns.forEach(pattern => { const matches = content.match(pattern); if (matches) { matches.forEach(match => { data.secrets.push(match); }); } }); }); return data; }); // Process discovered endpoints jsData.endpoints.forEach(endpoint => { try { const fullUrl = new URL(endpoint, page.url()).toString(); if (!this.visited.has(fullUrl) && this.foundUrls.size < this.maxPages) { this.foundUrls.add(fullUrl); this.apiEndpoints.add(fullUrl); } } catch (e) {} }); // Store extracted secrets for security analysis if (jsData.secrets.length > 0) { const url = page.url(); if (!this.pageData.has(url)) { this.pageData.set(url, {}); } this.pageData.get(url).extractedSecrets = jsData.secrets; } } catch (error) { debug(`Error extracting JS variables: ${error.message}`); } } // Enhanced technology detection async detectTechnologies(page) { try { const technologies = await page.evaluate(() => { const detected = { frameworks: [], libraries: [], cms: [], analytics: [], security: [] }; // Frontend frameworks if (window.React) detected.frameworks.push('React'); if (window.Vue) detected.frameworks.push('Vue.js'); if (window.angular) detected.frameworks.push('AngularJS'); if (window.ng) detected.frameworks.push('Angular'); if (window.Backbone) detected.frameworks.push('Backbone.js'); if (window.Ember) detected.frameworks.push('Ember.js'); if (window.Svelte) detected.frameworks.push('Svelte'); // Libraries if (window.jQuery || window.$) detected.libraries.push('jQuery'); if (window.Lodash || window._) detected.libraries.push('Lodash'); if (window.moment) detected.libraries.push('Moment.js'); if (window.axios) detected.libraries.push('Axios'); if (window.bootstrap) detected.libraries.push('Bootstrap'); // CMS detection const metaGenerator = document.querySelector('meta[name="generator"]'); if (metaGenerator) { const content = metaGenerator.content.toLowerCase(); if (content.includes('wordpress')) detected.cms.push('WordPress'); if (content.includes('drupal')) detected.cms.push('Drupal'); if (content.includes('joomla')) detected.cms.push('Joomla'); if (content.includes('magento')) detected.cms.push('Magento'); if (content.includes('shopify')) detected.cms.push('Shopify'); } // Analytics if (window.ga || window.gtag) detected.analytics.push('Google Analytics'); if (window.fbq) detected.analytics.push('Facebook Pixel'); if (window.mixpanel) detected.analytics.push('Mixpanel'); if (window.amplitude) detected.analytics.push('Amplitude'); // Security if (document.querySelector('meta[http-equiv="Content-Security-Policy"]')) { detected.security.push('Content Security Policy'); } if (document.querySelector('meta[name="csrf-token"]')) { detected.security.push('CSRF Token'); } return detected; }); // Store technology information const url = page.url(); if (!this.pageData.has(url)) { this.pageData.set(url, {}); } this.pageData.get(url).technologies = technologies; return technologies; } catch (error) { debug(`Error detecting technologies: ${error.message}`); return {}; } } processFoundUrl(url, baseUrl) { try { // Normalize: trim and to lower case for scheme checks let normalized = typeof url === 'string' ? url.trim().toLowerCase() : ''; if (!url || normalized.startsWith('#') || normalized.startsWith('mailto:') || normalized.startsWith('tel:') || normalized.startsWith('javascript:') || normalized.startsWith('data:') || normalized.startsWith('vbscript:')) { return null; } const fullUrl = new URL(url, baseUrl); // Domain filtering if (!this.includeSubdomains) { if (fullUrl.hostname !== this.baseUrlObj.hostname) { return null; } } else { if (!fullUrl.hostname.endsWith(this.baseUrlObj.hostname)) { return null; } } fullUrl.hash = ''; const normalizedUrl = fullUrl.toString(); if (normalizedUrl.length > 2000) return null; return normalizedUrl; } catch (error) { return null; } } isRelevantEndpoint(url) { try { const urlObj = new URL(url); return urlObj.hostname === this.baseUrlObj.hostname || (this.includeSubdomains && urlObj.hostname.endsWith(this.baseUrlObj.hostname)); } catch (e) { return false; } } isApiEndpoint(url) { const apiPatterns = [ '/api/', '/rest/', '/graphql', '/v1/', '/v2/', '/v3/', '.json', '.xml', '/ajax/', '/rpc/', '/soap/' ]; return apiPatterns.some(pattern => url.includes(pattern)); } sanitizeFilename(url) { return url.replace(/[^a-z0-9]/gi, '_').toLowerCase().substring(0, 50); } sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } } // Standalone crawl function for backward compatibility async function crawlPage(url, options = {}) { const crawler = new PuppeteerCrawler({ baseUrl: url, maxPages: options.maxPages || 20, timeout: options.timeout || 15000, userAgent: options.userAgent, headers: options.headers, cookies: options.cookies }); const results = await crawler.crawl(); return results.urls; } module.exports = { PuppeteerCrawler, crawlPage };