UNPKG

web-page-analyzer-cli

Version:

一个强大的网站链接抓取工具,支持深度抓取、认证和页面分析

224 lines (188 loc) 7.17 kB
#!/usr/bin/env node const { chromium } = require('playwright'); const fs = require('fs'); const path = require('path'); const { URL } = require('url'); /** * 显示帮助信息 */ function showHelp() { console.log(` 网页截图工具 (Web Screenshot Tool) 功能: 打开指定URL,等待一段时间后进行全页面截图,并返回图片存放的绝对路径。 使用方法: web-screenshot <URL> [选项] 参数: URL 必须提供,要截图的目标网页URL 选项: --cookie="name=value" 为页面请求设置Cookie。可多次使用。 --header="name:value" 为页面请求设置自定义HTTP头。可多次使用。 --localstorage="key=value" 在页面加载前设置LocalStorage。可多次使用。 --token=<your-token> 设置Bearer Token认证。 --auth=<user:pass> 设置Basic Auth认证。 --output-dir=<path> 指定截图文件的输出目录 (默认: ./output/screenshots) --wait-time=<ms> 页面加载后等待的毫秒数 (默认: 10000) --help, -h 显示此帮助信息 示例: # 基本截图 web-screenshot "https://example.com" # 携带认证信息截图 (使用多个参数) web-screenshot "https://admin.example.com/dashboard" \\ --header="X-Tenant-ID: 123" \\ --cookie="session_id=abcxyz" \\ --localstorage="theme=dark" 输出: 脚本执行成功后,会在控制台打印出截图文件的绝对路径。 `); } /** * 解析命令行参数 * @returns {object} 包含解析后配置的对象 */ function parseArguments() { const args = process.argv.slice(2); const options = { targetUrl: null, authOptions: { headers: {}, cookies: [], localstorage: [], }, outputDir: './output/screenshots', waitTime: 10000 }; let urlSet = false; for (const arg of args) { if (arg.startsWith('--cookie=')) { const cookieStr = arg.substring(9); const [name, ...valueParts] = cookieStr.split('='); const value = valueParts.join('='); if (name && value) { options.authOptions.cookies.push({ name: name.trim(), value: value.trim() }); } } else if (arg.startsWith('--header=')) { const headerStr = arg.substring(9); const [name, ...valueParts] = headerStr.split(':'); const value = valueParts.join(':'); if (name && value) { options.authOptions.headers[name.trim()] = value.trim(); } } else if (arg.startsWith('--localstorage=')) { const lsStr = arg.substring(15); const [key, ...valueParts] = lsStr.split('='); const value = valueParts.join('='); if (key && value) { options.authOptions.localstorage.push({ name: key.trim(), value: value.trim() }); } } else if (arg.startsWith('--token=')) { options.authOptions.headers['Authorization'] = `Bearer ${arg.substring(8)}`; } else if (arg.startsWith('--auth=')) { const [username, password] = arg.substring(7).split(':'); if (username && password) { options.authOptions.headers['Authorization'] = `Basic ${Buffer.from(`${username}:${password}`).toString('base64')}`; } } else if (arg.startsWith('--output-dir=')) { options.outputDir = arg.substring(13); } else if (arg.startsWith('--wait-time=')) { const time = parseInt(arg.substring(12), 10); if (!isNaN(time)) { options.waitTime = time; } } else if (!arg.startsWith('-') && !urlSet) { options.targetUrl = arg; urlSet = true; } } // 验证URL if (!options.targetUrl) { console.error('错误: 必须提供目标URL。'); showHelp(); process.exit(1); } try { new URL(options.targetUrl); } catch (error) { console.error(`错误: 无效的URL格式 "${options.targetUrl}"`); process.exit(1); } return options; } /** * 主执行函数 */ async function takeScreenshot() { // 检查帮助标志 if (process.argv.includes('--help') || process.argv.includes('-h')) { showHelp(); process.exit(0); } const options = parseArguments(); const { targetUrl, authOptions, outputDir, waitTime } = options; console.log(`[信息] 正在启动浏览器准备截图: ${targetUrl}`); // 1. 准备浏览器上下文的配置 const contextOptions = {}; const { headers, cookies, localstorage } = authOptions; const targetParsedUrl = new URL(targetUrl); const targetOrigin = targetParsedUrl.origin; const targetDomain = targetParsedUrl.hostname; // 设置 Headers if (Object.keys(headers).length > 0) { contextOptions.extraHTTPHeaders = headers; console.log('[信息] 准备设置自定义 Headers。'); } // 准备 storageState (用于 Cookies 和 LocalStorage) const storageState = { cookies: [], origins: [] }; if (cookies.length > 0) { storageState.cookies = cookies.map(c => ({ ...c, domain: c.domain || targetDomain, path: c.path || '/' })); console.log('[信息] 准备设置自定义 Cookies。'); } if (localstorage.length > 0) { storageState.origins.push({ origin: targetOrigin, localStorage: localstorage }); console.log('[信息] 准备设置 LocalStorage。'); } if (storageState.cookies.length > 0 || storageState.origins.length > 0) { contextOptions.storageState = storageState; } // 2. 启动浏览器并创建带有认证信息的上下文 const browser = await chromium.launch({ headless: true }); const context = await browser.newContext(contextOptions); try { const page = await context.newPage(); // 3. 导航到页面 console.log('[信息] 正在打开页面...'); await page.goto(targetUrl, { waitUntil: 'networkidle', timeout: 60000 }); console.log('[信息] 页面加载完成。'); // 等待指定时间 console.log(`[信息] 等待 ${waitTime}毫秒以便动态内容加载...`); await page.waitForTimeout(waitTime); console.log('[信息] 等待结束,准备截图。'); // 准备输出目录和文件名 const domainDirName = targetDomain.replace(/[^a-zA-Z0-9-]/g, '_'); const finalOutputDir = path.resolve(outputDir, domainDirName); if (!fs.existsSync(finalOutputDir)) { fs.mkdirSync(finalOutputDir, { recursive: true }); } const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); const urlPath = targetParsedUrl.pathname.replace(/^\/|\/$/g, '').replace(/[^a-zA-Z0-9_-]/g, '_'); const screenshotFileName = `screenshot_${urlPath || 'index'}_${timestamp}.png`; const screenshotPath = path.join(finalOutputDir, screenshotFileName); // 4. 执行截图 await page.screenshot({ path: screenshotPath, fullPage: true }); console.log(`[成功] 截图已保存到: ${screenshotPath}`); // 在标准输出中打印绝对路径 console.log(screenshotPath); } catch (error) { console.error(`[错误] 脚本执行失败: ${error.message}`); process.exit(1); } finally { await browser.close(); } } // 执行主函数 takeScreenshot();