UNPKG

@knowcode/screenshotfetch

Version:

Web application spider with screenshot capture and customer journey documentation. Automate user flow documentation with authentication support.

448 lines (357 loc) β€’ 14.6 kB
const ScreenshotCapture = require('../../capture'); const LoginHandler = require('./LoginHandler'); const FlowTracker = require('./FlowTracker'); const path = require('path'); const fs = require('fs').promises; class ApplicationSpider extends ScreenshotCapture { constructor(options = {}) { super(options); this.spiderOptions = { maxDepth: 3, maxPages: 100, maxFlows: 5, flowMode: true, includeQueryParams: true, outputDir: './docs', screenshotFormat: 'png', ...options }; this.loginHandler = new LoginHandler(options); this.flowTracker = new FlowTracker({ maxFlowDepth: 10, maxFlows: this.spiderOptions.maxFlows, ...options }); this.visitedPages = new Set(); this.screenshotCount = 0; } async spiderApplication(startUrl, username, password) { console.log('πŸ•·οΈ Starting application spider...'); console.log(`πŸ“ Start URL: ${startUrl}`); console.log(`πŸ“ Output directory: ${this.spiderOptions.outputDir}`); try { // Initialize browser await this.init(); // Navigate to start URL const page = await this.browser.newPage(); await page.setViewport(this.options.viewport); await page.goto(startUrl, { waitUntil: 'networkidle2' }); // Handle authentication if credentials provided if (username && password) { const loginSuccess = await this.handleAuthentication(page, username, password); if (!loginSuccess) { throw new Error('Authentication failed'); } } // Create output directory structure await this.setupOutputDirectories(); // Start spidering with flow tracking if (this.spiderOptions.flowMode) { await this.spiderWithFlowTracking(page); } else { await this.spiderBasic(page); } // Generate documentation await this.generateDocumentation(); console.log('βœ… Application spidering completed successfully'); return this.generateSummaryReport(); } catch (error) { console.error('❌ Spider failed:', error.message); throw error; } finally { await this.close(); } } async handleAuthentication(page, username, password) { console.log('πŸ” Handling authentication...'); try { // Check if login is required const needsLogin = await this.loginHandler.isLoginRequired(page); if (needsLogin) { const success = await this.loginHandler.performLogin(page, username, password); if (success) { console.log('βœ… Authentication successful'); // Wait for any post-login redirects or page changes await new Promise(resolve => setTimeout(resolve, 3000)); return true; } else { console.log('❌ Authentication failed'); return false; } } else { console.log('ℹ️ No authentication required'); return true; } } catch (error) { console.error('❌ Authentication error:', error.message); return false; } } async setupOutputDirectories() { const dirs = [ this.spiderOptions.outputDir, path.join(this.spiderOptions.outputDir, 'flows'), path.join(this.spiderOptions.outputDir, 'metadata'), path.join(this.spiderOptions.outputDir, '_images') ]; for (const dir of dirs) { await fs.mkdir(dir, { recursive: true }); } console.log('πŸ“ Output directories created'); } async spiderWithFlowTracking(page) { console.log('πŸš€ Starting flow-based spidering...'); let flowCount = 0; const maxAttempts = this.spiderOptions.maxFlows * 2; // Allow some failed attempts let attempts = 0; while (flowCount < this.spiderOptions.maxFlows && attempts < maxAttempts) { attempts++; try { // Start a new flow const flowId = this.flowTracker.startNewFlow(page.url(), `flow-${flowCount + 1}`); // Take initial screenshot await this.captureFlowStep(page, 'Initial page load'); // Discover and follow clickable elements const success = await this.exploreFlow(page); if (success) { this.flowTracker.completeCurrentFlow(); flowCount++; console.log(`βœ… Completed flow ${flowCount}/${this.spiderOptions.maxFlows}`); } else { console.log(`⚠️ Flow attempt ${attempts} was not successful`); } // Reset to a clean state for next flow (go back to authenticated home) if (flowCount < this.spiderOptions.maxFlows) { await this.resetToStartingPoint(page); } } catch (error) { console.error(`❌ Flow ${flowCount + 1} failed:`, error.message); await this.resetToStartingPoint(page); } } console.log(`🏁 Flow spidering completed: ${flowCount} flows discovered`); } async exploreFlow(page) { let stepCount = 0; const maxSteps = 10; while (stepCount < maxSteps && !this.flowTracker.hasReachedMaxDepth()) { try { // Discover clickable elements on current page const elements = await this.flowTracker.discoverClickableElements(page); if (elements.length === 0) { console.log('ℹ️ No more clickable elements found'); break; } // Pick the first unvisited interesting element const element = elements[0]; if (!element) { break; } // Click the element const clickSuccess = await this.flowTracker.clickElement(page, element); if (!clickSuccess) { break; } // Capture screenshot of the new state const action = `Clicked: ${element.text || element.href}`; await this.captureFlowStep(page, action, { element }); stepCount++; // Wait before next action await new Promise(resolve => setTimeout(resolve, 2000)); } catch (error) { console.error(`❌ Error in flow exploration step ${stepCount}:`, error.message); break; } } return stepCount > 0; // Success if we managed to take at least one step } async captureFlowStep(page, action, metadata = {}) { try { // Add flow step with URL tracking const step = await this.flowTracker.addFlowStep(page, action, metadata); // Generate screenshot filename const flow = this.flowTracker.getCurrentFlow(); const filename = `${step.stepNumber.toString().padStart(2, '0')}-${this.sanitizeFilename(action)}.${this.spiderOptions.screenshotFormat}`; const screenshotPath = path.join( this.spiderOptions.outputDir, 'flows', `flow-${flow.type}`, '_images', filename ); // Ensure directory exists await fs.mkdir(path.dirname(screenshotPath), { recursive: true }); // Take screenshot await page.screenshot({ path: screenshotPath, fullPage: false }); // Update step with screenshot info step.screenshot = { filename, path: screenshotPath, relativePath: path.relative(path.join(this.spiderOptions.outputDir, 'flows', `flow-${flow.type}`), screenshotPath) }; this.screenshotCount++; console.log(`πŸ“Έ Screenshot ${this.screenshotCount}: ${filename}`); return step; } catch (error) { console.error('❌ Error capturing flow step:', error.message); throw error; } } sanitizeFilename(text) { return text .toLowerCase() .replace(/[^a-z0-9\s-]/g, '') .replace(/\s+/g, '-') .substring(0, 50); } async resetToStartingPoint(page) { try { console.log('πŸ”„ Resetting to starting point...'); // Try to navigate back to home/dashboard const currentUrl = page.url(); const baseUrl = new URL(currentUrl).origin; // Common dashboard/home paths to try const homePaths = ['/', '/dashboard', '/home', '/main', '/app']; for (const path of homePaths) { try { await page.goto(baseUrl + path, { waitUntil: 'networkidle2', timeout: 10000 }); console.log(`βœ… Reset to: ${page.url()}`); break; } catch (e) { // Try next path } } // Wait a moment for page to stabilize await new Promise(resolve => setTimeout(resolve, 2000)); } catch (error) { console.error('⚠️ Could not reset to starting point:', error.message); } } async spiderBasic(page) { console.log('πŸ•ΈοΈ Starting basic spidering mode...'); // Basic spidering implementation (for future enhancement) // For now, just capture the current page await this.captureFlowStep(page, 'Basic spider - single page'); } async generateDocumentation() { console.log('πŸ“ Generating documentation...'); try { const flows = this.flowTracker.getCompletedFlows(); // Generate flow documentation for each completed flow for (const flow of flows) { await this.generateFlowDocumentation(flow); } // Generate index documentation await this.generateIndexDocumentation(flows); // Generate metadata files await this.generateMetadataFiles(flows); console.log('βœ… Documentation generated successfully'); } catch (error) { console.error('❌ Error generating documentation:', error.message); throw error; } } async generateFlowDocumentation(flow) { const flowDir = path.join(this.spiderOptions.outputDir, 'flows', `flow-${flow.type}`); const markdownPath = path.join(flowDir, `${flow.type}.md`); // Ensure directory exists await fs.mkdir(flowDir, { recursive: true }); let markdown = `# Customer Journey: ${this.formatFlowTitle(flow.type)}\n\n`; markdown += `## Flow Overview\n`; markdown += `This journey was automatically discovered and documented.\n\n`; markdown += `- **Flow ID:** ${flow.id}\n`; markdown += `- **Total Steps:** ${flow.steps.length}\n`; markdown += `- **Duration:** ${this.formatDuration(flow.metadata.duration)}\n`; markdown += `- **Started:** ${new Date(flow.startTime).toLocaleString()}\n\n`; markdown += `## Journey Steps\n\n`; for (let i = 0; i < flow.steps.length; i++) { const step = flow.steps[i]; const nextStep = flow.steps[i + 1]; markdown += `### Step ${step.stepNumber}: ${step.pageTitle || 'Page'}\n`; if (step.screenshot) { markdown += `![${step.action}](_images/${step.screenshot.filename})\n`; } markdown += `**URL:** \`${step.url.raw}\`\n`; markdown += `**Action:** ${step.action}\n`; if (nextStep) { markdown += `**Next:** Go to next step\n`; } else { markdown += `**Journey Complete**\n`; } markdown += `**Timestamp:** ${new Date(step.timestamp).toLocaleString()}\n\n`; if (nextStep) { markdown += `↓ *User navigates to next step*\n\n`; } } // Add flow metadata section markdown += `## Flow Metadata\n`; markdown += `- **Flow Type:** ${flow.type}\n`; markdown += `- **Start URL:** ${flow.startUrl}\n`; markdown += `- **Total Screenshots:** ${flow.steps.filter(s => s.screenshot).length}\n`; markdown += `- **Unique URLs:** ${new Set(flow.steps.map(s => s.url.normalized)).size}\n`; await fs.writeFile(markdownPath, markdown); console.log(`πŸ“„ Generated flow documentation: ${markdownPath}`); } formatFlowTitle(flowType) { return flowType .split('-') .map(word => word.charAt(0).toUpperCase() + word.slice(1)) .join(' '); } formatDuration(ms) { if (!ms) return 'Unknown'; const seconds = Math.floor(ms / 1000); const minutes = Math.floor(seconds / 60); return minutes > 0 ? `${minutes}m ${seconds % 60}s` : `${seconds}s`; } async generateIndexDocumentation(flows) { const indexPath = path.join(this.spiderOptions.outputDir, 'index.md'); let markdown = `# Web Application Documentation\n\n`; markdown += `**Generated:** ${new Date().toLocaleString()}\n\n`; const summary = this.flowTracker.generateFlowSummary(); markdown += `## Summary\n\n`; markdown += `- **Total Flows Discovered:** ${summary.completedFlows}\n`; markdown += `- **Total Steps:** ${summary.totalSteps}\n`; markdown += `- **Total Screenshots:** ${this.screenshotCount}\n`; markdown += `- **Unique URLs:** ${summary.totalUrls}\n`; markdown += `- **Average Steps per Flow:** ${summary.averageStepsPerFlow}\n\n`; markdown += `## Customer Journeys\n\n`; for (const flow of flows) { markdown += `### [${this.formatFlowTitle(flow.type)}](flows/flow-${flow.type}/${flow.type}.md)\n`; markdown += `- **Steps:** ${flow.steps.length}\n`; markdown += `- **Duration:** ${this.formatDuration(flow.metadata.duration)}\n`; markdown += `- **Start URL:** ${flow.startUrl}\n\n`; } await fs.writeFile(indexPath, markdown); console.log(`πŸ“„ Generated index documentation: ${indexPath}`); } async generateMetadataFiles(flows) { const metadataDir = path.join(this.spiderOptions.outputDir, 'metadata'); // Generate flow metadata for (const flow of flows) { const metadataPath = path.join(metadataDir, `${flow.type}.json`); await fs.writeFile(metadataPath, JSON.stringify(flow, null, 2)); } // Generate URL index const urlIndex = this.flowTracker.urlCapture.generateUrlIndex(); const urlIndexPath = path.join(metadataDir, 'url-index.json'); await fs.writeFile(urlIndexPath, JSON.stringify(urlIndex, null, 2)); // Generate summary const summary = this.flowTracker.generateFlowSummary(); const summaryPath = path.join(metadataDir, 'flow-summary.json'); await fs.writeFile(summaryPath, JSON.stringify(summary, null, 2)); console.log('πŸ“„ Generated metadata files'); } generateSummaryReport() { const summary = this.flowTracker.generateFlowSummary(); return { success: true, summary: { ...summary, screenshotCount: this.screenshotCount, outputDirectory: this.spiderOptions.outputDir } }; } } module.exports = ApplicationSpider;