@knowcode/screenshotfetch
Version:
Web application spider with screenshot capture and customer journey documentation. Automate user flow documentation with authentication support.
448 lines (357 loc) β’ 14.6 kB
JavaScript
const ScreenshotCapture = require('../../capture');
const LoginHandler = require('./LoginHandler');
const FlowTracker = require('./FlowTracker');
const path = require('path');
const fs = require('fs').promises;
class ApplicationSpider extends ScreenshotCapture {
constructor(options = {}) {
super(options);
this.spiderOptions = {
maxDepth: 3,
maxPages: 100,
maxFlows: 5,
flowMode: true,
includeQueryParams: true,
outputDir: './docs',
screenshotFormat: 'png',
...options
};
this.loginHandler = new LoginHandler(options);
this.flowTracker = new FlowTracker({
maxFlowDepth: 10,
maxFlows: this.spiderOptions.maxFlows,
...options
});
this.visitedPages = new Set();
this.screenshotCount = 0;
}
async spiderApplication(startUrl, username, password) {
console.log('π·οΈ Starting application spider...');
console.log(`π Start URL: ${startUrl}`);
console.log(`π Output directory: ${this.spiderOptions.outputDir}`);
try {
// Initialize browser
await this.init();
// Navigate to start URL
const page = await this.browser.newPage();
await page.setViewport(this.options.viewport);
await page.goto(startUrl, { waitUntil: 'networkidle2' });
// Handle authentication if credentials provided
if (username && password) {
const loginSuccess = await this.handleAuthentication(page, username, password);
if (!loginSuccess) {
throw new Error('Authentication failed');
}
}
// Create output directory structure
await this.setupOutputDirectories();
// Start spidering with flow tracking
if (this.spiderOptions.flowMode) {
await this.spiderWithFlowTracking(page);
} else {
await this.spiderBasic(page);
}
// Generate documentation
await this.generateDocumentation();
console.log('β
Application spidering completed successfully');
return this.generateSummaryReport();
} catch (error) {
console.error('β Spider failed:', error.message);
throw error;
} finally {
await this.close();
}
}
async handleAuthentication(page, username, password) {
console.log('π Handling authentication...');
try {
// Check if login is required
const needsLogin = await this.loginHandler.isLoginRequired(page);
if (needsLogin) {
const success = await this.loginHandler.performLogin(page, username, password);
if (success) {
console.log('β
Authentication successful');
// Wait for any post-login redirects or page changes
await new Promise(resolve => setTimeout(resolve, 3000));
return true;
} else {
console.log('β Authentication failed');
return false;
}
} else {
console.log('βΉοΈ No authentication required');
return true;
}
} catch (error) {
console.error('β Authentication error:', error.message);
return false;
}
}
async setupOutputDirectories() {
const dirs = [
this.spiderOptions.outputDir,
path.join(this.spiderOptions.outputDir, 'flows'),
path.join(this.spiderOptions.outputDir, 'metadata'),
path.join(this.spiderOptions.outputDir, '_images')
];
for (const dir of dirs) {
await fs.mkdir(dir, { recursive: true });
}
console.log('π Output directories created');
}
async spiderWithFlowTracking(page) {
console.log('π Starting flow-based spidering...');
let flowCount = 0;
const maxAttempts = this.spiderOptions.maxFlows * 2; // Allow some failed attempts
let attempts = 0;
while (flowCount < this.spiderOptions.maxFlows && attempts < maxAttempts) {
attempts++;
try {
// Start a new flow
const flowId = this.flowTracker.startNewFlow(page.url(), `flow-${flowCount + 1}`);
// Take initial screenshot
await this.captureFlowStep(page, 'Initial page load');
// Discover and follow clickable elements
const success = await this.exploreFlow(page);
if (success) {
this.flowTracker.completeCurrentFlow();
flowCount++;
console.log(`β
Completed flow ${flowCount}/${this.spiderOptions.maxFlows}`);
} else {
console.log(`β οΈ Flow attempt ${attempts} was not successful`);
}
// Reset to a clean state for next flow (go back to authenticated home)
if (flowCount < this.spiderOptions.maxFlows) {
await this.resetToStartingPoint(page);
}
} catch (error) {
console.error(`β Flow ${flowCount + 1} failed:`, error.message);
await this.resetToStartingPoint(page);
}
}
console.log(`π Flow spidering completed: ${flowCount} flows discovered`);
}
async exploreFlow(page) {
let stepCount = 0;
const maxSteps = 10;
while (stepCount < maxSteps && !this.flowTracker.hasReachedMaxDepth()) {
try {
// Discover clickable elements on current page
const elements = await this.flowTracker.discoverClickableElements(page);
if (elements.length === 0) {
console.log('βΉοΈ No more clickable elements found');
break;
}
// Pick the first unvisited interesting element
const element = elements[0];
if (!element) {
break;
}
// Click the element
const clickSuccess = await this.flowTracker.clickElement(page, element);
if (!clickSuccess) {
break;
}
// Capture screenshot of the new state
const action = `Clicked: ${element.text || element.href}`;
await this.captureFlowStep(page, action, { element });
stepCount++;
// Wait before next action
await new Promise(resolve => setTimeout(resolve, 2000));
} catch (error) {
console.error(`β Error in flow exploration step ${stepCount}:`, error.message);
break;
}
}
return stepCount > 0; // Success if we managed to take at least one step
}
async captureFlowStep(page, action, metadata = {}) {
try {
// Add flow step with URL tracking
const step = await this.flowTracker.addFlowStep(page, action, metadata);
// Generate screenshot filename
const flow = this.flowTracker.getCurrentFlow();
const filename = `${step.stepNumber.toString().padStart(2, '0')}-${this.sanitizeFilename(action)}.${this.spiderOptions.screenshotFormat}`;
const screenshotPath = path.join(
this.spiderOptions.outputDir,
'flows',
`flow-${flow.type}`,
'_images',
filename
);
// Ensure directory exists
await fs.mkdir(path.dirname(screenshotPath), { recursive: true });
// Take screenshot
await page.screenshot({ path: screenshotPath, fullPage: false });
// Update step with screenshot info
step.screenshot = {
filename,
path: screenshotPath,
relativePath: path.relative(path.join(this.spiderOptions.outputDir, 'flows', `flow-${flow.type}`), screenshotPath)
};
this.screenshotCount++;
console.log(`πΈ Screenshot ${this.screenshotCount}: ${filename}`);
return step;
} catch (error) {
console.error('β Error capturing flow step:', error.message);
throw error;
}
}
sanitizeFilename(text) {
return text
.toLowerCase()
.replace(/[^a-z0-9\s-]/g, '')
.replace(/\s+/g, '-')
.substring(0, 50);
}
async resetToStartingPoint(page) {
try {
console.log('π Resetting to starting point...');
// Try to navigate back to home/dashboard
const currentUrl = page.url();
const baseUrl = new URL(currentUrl).origin;
// Common dashboard/home paths to try
const homePaths = ['/', '/dashboard', '/home', '/main', '/app'];
for (const path of homePaths) {
try {
await page.goto(baseUrl + path, { waitUntil: 'networkidle2', timeout: 10000 });
console.log(`β
Reset to: ${page.url()}`);
break;
} catch (e) {
// Try next path
}
}
// Wait a moment for page to stabilize
await new Promise(resolve => setTimeout(resolve, 2000));
} catch (error) {
console.error('β οΈ Could not reset to starting point:', error.message);
}
}
async spiderBasic(page) {
console.log('πΈοΈ Starting basic spidering mode...');
// Basic spidering implementation (for future enhancement)
// For now, just capture the current page
await this.captureFlowStep(page, 'Basic spider - single page');
}
async generateDocumentation() {
console.log('π Generating documentation...');
try {
const flows = this.flowTracker.getCompletedFlows();
// Generate flow documentation for each completed flow
for (const flow of flows) {
await this.generateFlowDocumentation(flow);
}
// Generate index documentation
await this.generateIndexDocumentation(flows);
// Generate metadata files
await this.generateMetadataFiles(flows);
console.log('β
Documentation generated successfully');
} catch (error) {
console.error('β Error generating documentation:', error.message);
throw error;
}
}
async generateFlowDocumentation(flow) {
const flowDir = path.join(this.spiderOptions.outputDir, 'flows', `flow-${flow.type}`);
const markdownPath = path.join(flowDir, `${flow.type}.md`);
// Ensure directory exists
await fs.mkdir(flowDir, { recursive: true });
let markdown = `# Customer Journey: ${this.formatFlowTitle(flow.type)}\n\n`;
markdown += `## Flow Overview\n`;
markdown += `This journey was automatically discovered and documented.\n\n`;
markdown += `- **Flow ID:** ${flow.id}\n`;
markdown += `- **Total Steps:** ${flow.steps.length}\n`;
markdown += `- **Duration:** ${this.formatDuration(flow.metadata.duration)}\n`;
markdown += `- **Started:** ${new Date(flow.startTime).toLocaleString()}\n\n`;
markdown += `## Journey Steps\n\n`;
for (let i = 0; i < flow.steps.length; i++) {
const step = flow.steps[i];
const nextStep = flow.steps[i + 1];
markdown += `### Step ${step.stepNumber}: ${step.pageTitle || 'Page'}\n`;
if (step.screenshot) {
markdown += `\n`;
}
markdown += `**URL:** \`${step.url.raw}\`\n`;
markdown += `**Action:** ${step.action}\n`;
if (nextStep) {
markdown += `**Next:** Go to next step\n`;
} else {
markdown += `**Journey Complete**\n`;
}
markdown += `**Timestamp:** ${new Date(step.timestamp).toLocaleString()}\n\n`;
if (nextStep) {
markdown += `β *User navigates to next step*\n\n`;
}
}
// Add flow metadata section
markdown += `## Flow Metadata\n`;
markdown += `- **Flow Type:** ${flow.type}\n`;
markdown += `- **Start URL:** ${flow.startUrl}\n`;
markdown += `- **Total Screenshots:** ${flow.steps.filter(s => s.screenshot).length}\n`;
markdown += `- **Unique URLs:** ${new Set(flow.steps.map(s => s.url.normalized)).size}\n`;
await fs.writeFile(markdownPath, markdown);
console.log(`π Generated flow documentation: ${markdownPath}`);
}
formatFlowTitle(flowType) {
return flowType
.split('-')
.map(word => word.charAt(0).toUpperCase() + word.slice(1))
.join(' ');
}
formatDuration(ms) {
if (!ms) return 'Unknown';
const seconds = Math.floor(ms / 1000);
const minutes = Math.floor(seconds / 60);
return minutes > 0 ? `${minutes}m ${seconds % 60}s` : `${seconds}s`;
}
async generateIndexDocumentation(flows) {
const indexPath = path.join(this.spiderOptions.outputDir, 'index.md');
let markdown = `# Web Application Documentation\n\n`;
markdown += `**Generated:** ${new Date().toLocaleString()}\n\n`;
const summary = this.flowTracker.generateFlowSummary();
markdown += `## Summary\n\n`;
markdown += `- **Total Flows Discovered:** ${summary.completedFlows}\n`;
markdown += `- **Total Steps:** ${summary.totalSteps}\n`;
markdown += `- **Total Screenshots:** ${this.screenshotCount}\n`;
markdown += `- **Unique URLs:** ${summary.totalUrls}\n`;
markdown += `- **Average Steps per Flow:** ${summary.averageStepsPerFlow}\n\n`;
markdown += `## Customer Journeys\n\n`;
for (const flow of flows) {
markdown += `### [${this.formatFlowTitle(flow.type)}](flows/flow-${flow.type}/${flow.type}.md)\n`;
markdown += `- **Steps:** ${flow.steps.length}\n`;
markdown += `- **Duration:** ${this.formatDuration(flow.metadata.duration)}\n`;
markdown += `- **Start URL:** ${flow.startUrl}\n\n`;
}
await fs.writeFile(indexPath, markdown);
console.log(`π Generated index documentation: ${indexPath}`);
}
async generateMetadataFiles(flows) {
const metadataDir = path.join(this.spiderOptions.outputDir, 'metadata');
// Generate flow metadata
for (const flow of flows) {
const metadataPath = path.join(metadataDir, `${flow.type}.json`);
await fs.writeFile(metadataPath, JSON.stringify(flow, null, 2));
}
// Generate URL index
const urlIndex = this.flowTracker.urlCapture.generateUrlIndex();
const urlIndexPath = path.join(metadataDir, 'url-index.json');
await fs.writeFile(urlIndexPath, JSON.stringify(urlIndex, null, 2));
// Generate summary
const summary = this.flowTracker.generateFlowSummary();
const summaryPath = path.join(metadataDir, 'flow-summary.json');
await fs.writeFile(summaryPath, JSON.stringify(summary, null, 2));
console.log('π Generated metadata files');
}
generateSummaryReport() {
const summary = this.flowTracker.generateFlowSummary();
return {
success: true,
summary: {
...summary,
screenshotCount: this.screenshotCount,
outputDirectory: this.spiderOptions.outputDir
}
};
}
}
module.exports = ApplicationSpider;