web-page-analyzer-cli
Version:
一个强大的网站链接抓取工具,支持深度抓取、认证和页面分析
224 lines (188 loc) • 7.17 kB
JavaScript
#!/usr/bin/env node
const { chromium } = require('playwright');
const fs = require('fs');
const path = require('path');
const { URL } = require('url');
/**
* 显示帮助信息
*/
function showHelp() {
console.log(`
网页截图工具 (Web Screenshot Tool)
功能: 打开指定URL,等待一段时间后进行全页面截图,并返回图片存放的绝对路径。
使用方法: web-screenshot <URL> [选项]
参数:
URL 必须提供,要截图的目标网页URL
选项:
--cookie="name=value" 为页面请求设置Cookie。可多次使用。
--header="name:value" 为页面请求设置自定义HTTP头。可多次使用。
--localstorage="key=value" 在页面加载前设置LocalStorage。可多次使用。
--token=<your-token> 设置Bearer Token认证。
--auth=<user:pass> 设置Basic Auth认证。
--output-dir=<path> 指定截图文件的输出目录 (默认: ./output/screenshots)
--wait-time=<ms> 页面加载后等待的毫秒数 (默认: 10000)
--help, -h 显示此帮助信息
示例:
# 基本截图
web-screenshot "https://example.com"
# 携带认证信息截图 (使用多个参数)
web-screenshot "https://admin.example.com/dashboard" \\
--header="X-Tenant-ID: 123" \\
--cookie="session_id=abcxyz" \\
--localstorage="theme=dark"
输出:
脚本执行成功后,会在控制台打印出截图文件的绝对路径。
`);
}
/**
* 解析命令行参数
* @returns {object} 包含解析后配置的对象
*/
function parseArguments() {
const args = process.argv.slice(2);
const options = {
targetUrl: null,
authOptions: {
headers: {},
cookies: [],
localstorage: [],
},
outputDir: './output/screenshots',
waitTime: 10000
};
let urlSet = false;
for (const arg of args) {
if (arg.startsWith('--cookie=')) {
const cookieStr = arg.substring(9);
const [name, ...valueParts] = cookieStr.split('=');
const value = valueParts.join('=');
if (name && value) {
options.authOptions.cookies.push({ name: name.trim(), value: value.trim() });
}
} else if (arg.startsWith('--header=')) {
const headerStr = arg.substring(9);
const [name, ...valueParts] = headerStr.split(':');
const value = valueParts.join(':');
if (name && value) {
options.authOptions.headers[name.trim()] = value.trim();
}
} else if (arg.startsWith('--localstorage=')) {
const lsStr = arg.substring(15);
const [key, ...valueParts] = lsStr.split('=');
const value = valueParts.join('=');
if (key && value) {
options.authOptions.localstorage.push({ name: key.trim(), value: value.trim() });
}
} else if (arg.startsWith('--token=')) {
options.authOptions.headers['Authorization'] = `Bearer ${arg.substring(8)}`;
} else if (arg.startsWith('--auth=')) {
const [username, password] = arg.substring(7).split(':');
if (username && password) {
options.authOptions.headers['Authorization'] = `Basic ${Buffer.from(`${username}:${password}`).toString('base64')}`;
}
} else if (arg.startsWith('--output-dir=')) {
options.outputDir = arg.substring(13);
} else if (arg.startsWith('--wait-time=')) {
const time = parseInt(arg.substring(12), 10);
if (!isNaN(time)) {
options.waitTime = time;
}
} else if (!arg.startsWith('-') && !urlSet) {
options.targetUrl = arg;
urlSet = true;
}
}
// 验证URL
if (!options.targetUrl) {
console.error('错误: 必须提供目标URL。');
showHelp();
process.exit(1);
}
try {
new URL(options.targetUrl);
} catch (error) {
console.error(`错误: 无效的URL格式 "${options.targetUrl}"`);
process.exit(1);
}
return options;
}
/**
* 主执行函数
*/
async function takeScreenshot() {
// 检查帮助标志
if (process.argv.includes('--help') || process.argv.includes('-h')) {
showHelp();
process.exit(0);
}
const options = parseArguments();
const { targetUrl, authOptions, outputDir, waitTime } = options;
console.log(`[信息] 正在启动浏览器准备截图: ${targetUrl}`);
// 1. 准备浏览器上下文的配置
const contextOptions = {};
const { headers, cookies, localstorage } = authOptions;
const targetParsedUrl = new URL(targetUrl);
const targetOrigin = targetParsedUrl.origin;
const targetDomain = targetParsedUrl.hostname;
// 设置 Headers
if (Object.keys(headers).length > 0) {
contextOptions.extraHTTPHeaders = headers;
console.log('[信息] 准备设置自定义 Headers。');
}
// 准备 storageState (用于 Cookies 和 LocalStorage)
const storageState = { cookies: [], origins: [] };
if (cookies.length > 0) {
storageState.cookies = cookies.map(c => ({
...c,
domain: c.domain || targetDomain,
path: c.path || '/'
}));
console.log('[信息] 准备设置自定义 Cookies。');
}
if (localstorage.length > 0) {
storageState.origins.push({
origin: targetOrigin,
localStorage: localstorage
});
console.log('[信息] 准备设置 LocalStorage。');
}
if (storageState.cookies.length > 0 || storageState.origins.length > 0) {
contextOptions.storageState = storageState;
}
// 2. 启动浏览器并创建带有认证信息的上下文
const browser = await chromium.launch({ headless: true });
const context = await browser.newContext(contextOptions);
try {
const page = await context.newPage();
// 3. 导航到页面
console.log('[信息] 正在打开页面...');
await page.goto(targetUrl, { waitUntil: 'networkidle', timeout: 60000 });
console.log('[信息] 页面加载完成。');
// 等待指定时间
console.log(`[信息] 等待 ${waitTime}毫秒以便动态内容加载...`);
await page.waitForTimeout(waitTime);
console.log('[信息] 等待结束,准备截图。');
// 准备输出目录和文件名
const domainDirName = targetDomain.replace(/[^a-zA-Z0-9-]/g, '_');
const finalOutputDir = path.resolve(outputDir, domainDirName);
if (!fs.existsSync(finalOutputDir)) {
fs.mkdirSync(finalOutputDir, { recursive: true });
}
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
const urlPath = targetParsedUrl.pathname.replace(/^\/|\/$/g, '').replace(/[^a-zA-Z0-9_-]/g, '_');
const screenshotFileName = `screenshot_${urlPath || 'index'}_${timestamp}.png`;
const screenshotPath = path.join(finalOutputDir, screenshotFileName);
// 4. 执行截图
await page.screenshot({ path: screenshotPath, fullPage: true });
console.log(`[成功] 截图已保存到: ${screenshotPath}`);
// 在标准输出中打印绝对路径
console.log(screenshotPath);
} catch (error) {
console.error(`[错误] 脚本执行失败: ${error.message}`);
process.exit(1);
} finally {
await browser.close();
}
}
// 执行主函数
takeScreenshot();