web-page-analyzer-cli
Version:
一个强大的网站链接抓取工具,支持深度抓取、认证和页面分析
826 lines (699 loc) • 27.2 kB
JavaScript
const { chromium } = require('playwright');
const fs = require('fs');
const url = require('url');
const path = require('path');
// 全局配置
const CONFIG = {
// 默认的日志错误检测关键字 (如果未通过参数传入)
defaultLogKeywords: [
'error',
'exception',
'failed',
'runtime',
'application error',
'crash',
'timeout',
'connection refused',
'not found',
'unauthorized',
'forbidden',
'internal server error',
'bad request',
'service unavailable'
],
// 默认的页面问题检测关键字 (如果未通过参数传入)
defaultPageKeywords: [
'not found',
'404',
'page not found',
'error',
'exception',
'runtime error',
'application error',
'client-side exception',
'failed to load',
'loading error',
'network error'
],
// 默认分析深度
defaultDepth: 1,
// 默认并发数
defaultConcurrency: 8,
// 页面超时时间(毫秒)
pageTimeout: 15000,
// 页面加载等待超时时间(毫秒)
loadTimeout: 7000
};
// 初始化配置 - 将配置应用到全局变量
function initializeConfig() {
console.log('配置初始化完成');
}
// 浏览器管理器 - 用于复用浏览器实例和并发控制
class BrowserManager {
constructor(maxConcurrency = CONFIG.defaultConcurrency, options = {}) {
this.maxConcurrency = maxConcurrency;
this.browser = null;
this.activePages = 0;
this.queue = [];
this.options = options; // 包含 headless, proxy, authOptions, targetUrl 等
this.isShutdown = false;
}
async getBrowser() {
if (!this.browser && !this.isShutdown) {
const browserOptions = {
headless: this.options.headless !== false
};
if (this.options.proxy) {
browserOptions.proxy = { server: this.options.proxy };
}
this.browser = await chromium.launch(browserOptions);
}
return this.browser;
}
async createPage() {
const browser = await this.getBrowser();
if (!browser) {
throw new Error('浏览器未初始化');
}
// *** CORRECTED LOGIC START: 准备页面创建选项 ***
const pageOptions = {};
// 从 this.options 中安全地获取 authOptions
const authOpts = this.options.authOptions || {};
const { headers = {}, cookies = [], localstorage = [] } = authOpts;
// 确保 targetUrl 存在
if (!this.options.targetUrl) {
throw new Error("BrowserManager 需要 targetUrl 来设置认证信息。");
}
const targetOrigin = new URL(this.options.targetUrl).origin;
const targetDomain = new URL(this.options.targetUrl).hostname;
// 1. 设置 Headers
if (Object.keys(headers).length > 0) {
pageOptions.extraHTTPHeaders = headers;
}
const storageState = {
cookies: [],
origins: []
};
// 2. 设置 Cookies
if (cookies.length > 0) {
storageState.cookies = cookies.map(c => ({
...c,
domain: c.domain || targetDomain, // 允许cookie自带domain,否则使用目标domain
path: c.path || '/'
}));
}
// 3. 设置 LocalStorage
if (localstorage.length > 0) {
storageState.origins.push({
origin: targetOrigin,
localStorage: localstorage
});
}
if (storageState.cookies.length > 0 || storageState.origins.length > 0) {
pageOptions.storageState = storageState;
}
this.activePages++;
// 使用 pageOptions 创建页面,保证认证信息在页面创建时就绪
const page = await browser.newPage(pageOptions);
page.setDefaultTimeout(CONFIG.pageTimeout);
return page;
}
async closePage(page) {
if (page && !page.isClosed()) {
await page.close();
}
this.activePages = Math.max(0, this.activePages - 1);
}
async shutdown() {
this.isShutdown = true;
if (this.browser) {
try {
await this.browser.close();
} catch (error) {
console.error('关闭浏览器时出错:', error.message);
}
this.browser = null;
}
}
async waitForSlot() {
while (this.activePages >= this.maxConcurrency) {
await new Promise(resolve => setTimeout(resolve, 100));
}
}
}
// 检查是否需要显示帮助信息
if (process.argv.includes('--help') || process.argv.includes('-h')) {
console.log(`
Web Page Analyzer - 网站页面分析工具 (精简版)
使用方法: web-page-analyzer <URL> [选项]
参数:
URL 要分析的目标网站URL。在所有模式下都必须提供。
选项:
--main_page_url=URL 额外指定要分析的URL路径。可多次使用。
如果使用此选项,脚本会进入“直接分析”模式。
--page_keyword=KEYWORD 在页面内容中搜索的关键字。可多次使用。
--log_keyword=KEYWORD 在浏览器日志中搜索的关键字。可多次使用。
--depth=N 分析深度 (仅在爬取模式下生效),默认为1
--ui 启用浏览器UI界面(非无头模式)
--sequential 使用顺序模式(禁用并发)
--concurrency=N 设置并发数量,默认为8
--proxy=server 设置代理服务器(如:http://127.0.0.1:10809)
--output-dir=dir 指定输出目录,默认为./output
--cookie="name=value" 设置Cookie。可多次使用。
--header="name:value" 设置HTTP头。可多次使用。
--localstorage="key=value" 在页面加载前设置LocalStorage。可多次使用。
--token=your-token 设置Bearer Token (等同于 --header="Authorization: Bearer your-token")
--auth=username:password 设置Basic Auth (等同于 --header="Authorization: Basic ...")
--skip_url_keyword=keyword 跳过路径中包含关键词的URL(可多次使用)
模式说明:
- 爬取模式 (默认): 仅提供一个URL,脚本会从该URL开始爬取。
示例: web-page-analyzer "https://example.com" --depth=1
- 直接分析模式: 提供一个URL,并使用一个或多个 --main_page_url。
脚本将分析主URL和所有指定的额外URL。
示例: web-page-analyzer https://example.com/dashboard --main_page_url=/login
(这将分析 /dashboard 和 /login 两个页面)
`);
process.exit(0);
}
// 解析命令行参数
function parseArguments() {
const args = process.argv.slice(2);
const options = {
targetUrl: null,
maxDepth: CONFIG.defaultDepth,
headless: true,
authOptions: {
headers: {},
cookies: [],
localstorage: [],
},
proxy: null,
concurrent: true,
maxConcurrency: CONFIG.defaultConcurrency,
outputDir: './output',
skipUrlKeywords: [],
mainPageUrls: [],
pageKeywords: [],
logKeywords: [],
};
let urlProvided = false;
for (let i = 0; i < args.length; i++) {
const arg = args[i];
if (!arg.startsWith('--') && !arg.startsWith('-')) {
if (!urlProvided) {
options.targetUrl = arg;
urlProvided = true;
}
continue;
}
if (arg.startsWith('--depth=')) {
options.maxDepth = parseInt(arg.substring(8), 10) || CONFIG.defaultDepth;
} else if (arg === '--ui') {
options.headless = false;
} else if (arg === '--sequential') {
options.concurrent = false;
} else if (arg.startsWith('--concurrency=')) {
options.maxConcurrency = parseInt(arg.substring(14), 10) || CONFIG.defaultConcurrency;
} else if (arg.startsWith('--cookie=')) {
const cookieStr = arg.substring(9);
const [name, ...valueParts] = cookieStr.split('=');
const value = valueParts.join('=');
if (name && value) {
options.authOptions.cookies.push({ name: name.trim(), value: value.trim() });
}
} else if (arg.startsWith('--header=')) {
const headerStr = arg.substring(9);
const [name, ...valueParts] = headerStr.split(':');
const value = valueParts.join(':');
if (name && value) {
// 合并headers而不是覆盖
options.authOptions.headers[name.trim()] = value.trim();
}
} else if (arg.startsWith('--localstorage=')) {
const lsStr = arg.substring(15);
const [key, ...valueParts] = lsStr.split('=');
const value = valueParts.join('=');
if (key && value) {
options.authOptions.localstorage.push({ name: key.trim(), value: value.trim() });
}
} else if (arg.startsWith('--token=')) {
options.authOptions.headers['Authorization'] = `Bearer ${arg.substring(8)}`;
} else if (arg.startsWith('--auth=')) {
const [username, password] = arg.substring(7).split(':');
if (username && password) {
options.authOptions.headers['Authorization'] = `Basic ${Buffer.from(`${username}:${password}`).toString('base64')}`;
}
} else if (arg.startsWith('--proxy=')) {
options.proxy = arg.substring(8);
} else if (arg.startsWith('--output-dir=')) {
options.outputDir = arg.substring(13);
} else if (arg.startsWith('--skip_url_keyword=')) {
options.skipUrlKeywords.push(arg.substring(19));
} else if (arg.startsWith('--main_page_url=')) {
options.mainPageUrls.push(arg.substring(16));
} else if (arg.startsWith('--page_keyword=')) {
options.pageKeywords.push(arg.substring(15));
} else if (arg.startsWith('--log_keyword=')) {
options.logKeywords.push(arg.substring(14));
}
}
if (!options.targetUrl) {
console.error('错误: 必须提供一个基础URL。');
console.error('示例 (爬取模式): web-page-analyzer https://example.com');
console.error('示例 (直接分析): web-page-analyzer https://example.com --main_page_url=/login');
process.exit(1);
}
try {
new URL(options.targetUrl);
} catch (error) {
console.error(`错误: 提供的基础URL格式无效: ${options.targetUrl}`);
process.exit(1);
}
if (options.pageKeywords.length === 0) {
options.pageKeywords = CONFIG.defaultPageKeywords;
console.log(`未提供页面关键字,使用默认值: ${options.pageKeywords.join(', ')}`);
}
if (options.logKeywords.length === 0) {
options.logKeywords = CONFIG.defaultLogKeywords;
console.log(`未提供日志关键字,使用默认值: ${options.logKeywords.join(', ')}`);
}
return options;
}
const config = parseArguments();
const {
targetUrl,
maxDepth,
headless,
authOptions,
proxy,
outputDir,
concurrent,
maxConcurrency,
skipUrlKeywords,
mainPageUrls,
pageKeywords,
logKeywords
} = config;
const visitedUrls = new Set();
const allLinks = [];
const healthChecks = [];
// 页面分析功能
class HealthChecker {
constructor() {
this.consoleMessages = [];
this.networkErrors = [];
this.logCounts = new Map(); // 用于统计重复日志
this.errorLogs = []; // 专门收集错误日志
}
async setupPageListeners(page) {
this.consoleMessages = [];
this.networkErrors = [];
this.logCounts.clear();
this.errorLogs = [];
// *** REMOVED: 认证逻辑已移至 BrowserManager ***
page.on('console', msg => {
const logText = msg.text();
if (logText.includes('Download the React DevTools')) return;
const log = {
type: msg.type(),
text: logText,
toString: () => `[${msg.type().toUpperCase()}] ${logText}`
};
const logKey = log.toString();
if (this.logCounts.has(logKey)) {
this.logCounts.set(logKey, this.logCounts.get(logKey) + 1);
} else {
this.logCounts.set(logKey, 1);
this.consoleMessages.push(log);
}
if (msg.type() === 'error') {
if (!this.errorLogs.includes(logKey)) {
this.errorLogs.push(logKey);
}
}
});
page.on('pageerror', error => {
const errorLog = `[PAGE_ERROR] ${error.message}`;
if (this.logCounts.has(errorLog)) {
this.logCounts.set(errorLog, this.logCounts.get(errorLog) + 1);
} else {
this.logCounts.set(errorLog, 1);
this.errorLogs.push(errorLog);
this.consoleMessages.push({
type: 'error',
text: error.message,
toString: () => errorLog
});
}
});
page.on('requestfailed', request => {
const errorLog = `[NETWORK_ERROR] ${request.method()} ${request.url()} - ${request.failure()?.errorText || 'Unknown error'}`;
if (this.logCounts.has(errorLog)) {
this.logCounts.set(errorLog, this.logCounts.get(errorLog) + 1);
} else {
this.logCounts.set(errorLog, 1);
this.networkErrors.push({
url: request.url(),
failure: request.failure()?.errorText || 'Unknown error',
toString: () => errorLog
});
this.errorLogs.push(errorLog);
this.consoleMessages.push({
type: 'error',
text: `${request.method()} ${request.url()} - ${request.failure()?.errorText || 'Unknown error'}`,
toString: () => errorLog
});
}
});
}
async collectHealthCheckData(page, pageUrl, currentDepth, linksFound, linksFiltered) {
const pageTitle = await page.title();
const finalUrl = page.url();
const { matchedPageKeywords, pageText } = await page.evaluate((keywords) => {
const text = document.body ? document.body.innerText : '';
const lowerCaseText = text.toLowerCase();
const matches = keywords.filter(keyword => lowerCaseText.includes(keyword.toLowerCase()));
return { matchedPageKeywords: matches, pageText: text };
}, pageKeywords);
const matchedLogIssues = [];
const consoleLogText = this.consoleMessages.map(m => m.text.toLowerCase()).join('\n');
logKeywords.forEach(keyword => {
if (consoleLogText.includes(keyword.toLowerCase())) {
matchedLogIssues.push(`日志包含关键字: ${keyword}`);
}
});
let statusCode = 200;
try {
const response = await page.waitForResponse(res => res.url() === finalUrl, { timeout: 5000 }).catch(() => null);
if (response) {
statusCode = response.status();
}
} catch (e) {
// 忽略
}
const issues = [];
if (matchedPageKeywords.length > 0) {
issues.push(`页面内容包含关键字: ${matchedPageKeywords.join(', ')}`);
}
issues.push(...matchedLogIssues);
if (this.networkErrors.length > 0) {
issues.push(`存在网络错误(${this.networkErrors.length}条)`);
}
if (statusCode >= 400) {
issues.push(`HTTP状态码异常: ${statusCode}`);
}
const processedConsoleMessages = this.consoleMessages.map(log => {
const count = this.logCounts.get(log.toString());
return count > 1 ? `${log.toString()} (重复 ${count} 次)` : log.toString();
});
return {
url: pageUrl,
title: pageTitle,
depth: currentDepth,
statusCode: statusCode,
linksFound: linksFound,
linksFiltered: linksFiltered,
pageText: pageText,
consoleMessages: processedConsoleMessages,
networkErrors: this.networkErrors.map(e => e.toString()),
finalUrl: finalUrl,
timestamp: new Date().toISOString(),
health: {
status: issues.length > 0 ? 'WARNING' : 'HEALTHY',
issues: issues
},
logStats: {
uniqueLogs: this.consoleMessages.length,
totalLogs: Array.from(this.logCounts.values()).reduce((sum, count) => sum + count, 0),
uniqueErrors: this.errorLogs.length
}
};
}
outputHealthSummary(healthData) {
const status = healthData.health.status;
const issuesText = healthData.health.issues.length > 0 ? `${healthData.health.issues.join('; ')}` : '无';
let finalUrlText = '';
if (healthData.finalUrl !== healthData.url) {
finalUrlText = ` (重定向至 -> ${healthData.finalUrl})`;
}
const textLengthInfo = `文本长度: ${healthData.pageText ? healthData.pageText.length : 0}`;
const logInfo = `日志: ${healthData.logStats.uniqueLogs}唯一/${healthData.logStats.totalLogs}总计`;
logProgress('info', `[${status}] 页面: ${healthData.url}${finalUrlText} | ${textLengthInfo} | ${logInfo} | 问题: ${issuesText}`);
}
}
function logProgress(level, message) {
const timestamp = new Date().toISOString();
console.log(`[${timestamp}] [${level.toUpperCase()}] ${message}`);
}
async function performHealthCheck(pageUrl, currentDepth = 1, browserManager = null, isDirectAnalysis = false) {
if (visitedUrls.has(pageUrl) || (!isDirectAnalysis && currentDepth > maxDepth)) {
return null;
}
if (!isDirectAnalysis && skipUrlKeywords.some(keyword => pageUrl.includes(keyword))) {
logProgress('debug', `URL包含跳过关键词,跳过: ${pageUrl}`);
return [];
}
logProgress('info', `分析中... ${pageUrl} (深度: ${currentDepth})`);
visitedUrls.add(pageUrl);
let page = null;
try {
await browserManager.waitForSlot();
page = await browserManager.createPage();
// *** CORRECTED LOGIC: HealthChecker is now simpler ***
const healthChecker = new HealthChecker();
await healthChecker.setupPageListeners(page);
await page.goto(pageUrl, {
waitUntil: 'domcontentloaded',
timeout: CONFIG.pageTimeout
});
await page.waitForLoadState('networkidle', { timeout: CONFIG.loadTimeout }).catch(() => {
logProgress('warn', `页面 ${pageUrl} 网络空闲等待超时,继续分析...`);
});
await new Promise(resolve => setTimeout(resolve, 10000));
let links = [];
let filteredLinks = [];
if (!isDirectAnalysis) {
links = await page.evaluate((baseUrl) => {
return Array.from(document.querySelectorAll('a'), anchor => {
try {
return new URL(anchor.href, baseUrl).href;
} catch (e) {
return null;
}
}).filter(Boolean);
}, pageUrl);
filteredLinks = filterLinks(links, pageUrl);
allLinks.push(...filteredLinks);
}
const healthData = await healthChecker.collectHealthCheckData(
page, pageUrl, currentDepth, links.length, filteredLinks.length
);
healthChecks.push(healthData);
healthChecker.outputHealthSummary(healthData);
await browserManager.closePage(page);
return filteredLinks.map(url => ({ url }));
} catch (error) {
logProgress('error', `分析页面时出错 ${pageUrl}: ${error.message}`);
const failureReport = {
url: pageUrl,
title: 'Analysis Failed',
depth: currentDepth,
statusCode: null,
pageText: null,
consoleMessages: [`Error during analysis setup: ${error.message}`],
networkErrors: [],
finalUrl: pageUrl,
timestamp: new Date().toISOString(),
health: {
status: 'ERROR',
issues: [`分析失败: ${error.message}`]
}
};
healthChecks.push(failureReport);
if (page) {
await browserManager.closePage(page);
}
return [];
}
}
async function performConcurrentHealthCheck(urls, currentDepth, browserManager, isDirectAnalysis = false) {
logProgress('info', `并发分析 ${urls.length} 个URL (深度: ${currentDepth})`);
const results = await Promise.allSettled(
urls.map(url => performHealthCheck(url, currentDepth, browserManager, isDirectAnalysis))
);
const allDiscoveredLinks = [];
results.forEach(result => {
if (result.status === 'fulfilled' && result.value) {
allDiscoveredLinks.push(...result.value);
}
});
return allDiscoveredLinks;
}
function filterLinks(links, sourceUrl) {
const sourceHostname = new URL(sourceUrl).hostname;
return links.filter(linkUrl => {
try {
const parsedUrl = new URL(linkUrl);
const urlWithoutHash = linkUrl.split('#')[0];
if (visitedUrls.has(urlWithoutHash)) return false;
if (parsedUrl.hostname !== sourceHostname) return false;
const ext = path.extname(parsedUrl.pathname).toLowerCase();
if (['.jpg', '.png', '.gif', '.pdf', '.zip'].includes(ext)) return false;
if (skipUrlKeywords.some(keyword => urlWithoutHash.includes(keyword))) return false;
return true;
} catch (e) {
return false;
}
}).map(linkUrl => linkUrl.split('#')[0]);
}
function saveResults() {
const domain = new URL(targetUrl).hostname;
const domainDir = path.join(outputDir, domain);
if (!fs.existsSync(domainDir)) {
fs.mkdirSync(domainDir, { recursive: true });
}
const healthyCount = healthChecks.filter(h => h.health.status === 'HEALTHY').length;
const warningCount = healthChecks.filter(h => h.health.status === 'WARNING').length;
const errorCount = healthChecks.filter(h => h.health.status === 'ERROR').length;
const totalLogs = healthChecks.reduce((sum, check) => sum + (check.logStats?.totalLogs || 0), 0);
const uniqueLogs = healthChecks.reduce((sum, check) => sum + (check.logStats?.uniqueLogs || 0), 0);
const uniqueErrors = healthChecks.reduce((sum, check) => sum + (check.logStats?.uniqueErrors || 0), 0);
console.log(`\n--- 分析总结 ---`);
console.log(`总共分析页面: ${visitedUrls.size}`);
console.log(` - 健康 [OK]: ${healthyCount}`);
console.log(` - 警告 [WARN]: ${warningCount}`);
console.log(` - 失败/错误 [ERROR]: ${errorCount}`);
console.log(`日志统计:`);
console.log(` - 唯一日志: ${uniqueLogs}`);
console.log(` - 总日志数: ${totalLogs}`);
console.log(` - 唯一错误: ${uniqueErrors}`);
console.log(`报告输出目录: ${domainDir}`);
return { domainDir };
}
async function crawlAndAnalyze() {
logProgress('info', `启动爬取分析模式...`);
const startTime = Date.now();
// *** CORRECTED LOGIC: Pass the entire config object to BrowserManager ***
const browserManager = new BrowserManager(maxConcurrency, config);
try {
let urlsToProcess = [targetUrl];
for (let depth = 1; depth <= maxDepth; depth++) {
if (urlsToProcess.length === 0) {
logProgress('info', `深度 ${depth}: 无新链接可分析,结束爬取。`);
break;
}
const discoveredLinks = await performConcurrentHealthCheck(urlsToProcess, depth, browserManager);
const nextUrls = [...new Set(discoveredLinks.map(link => link.url))];
urlsToProcess = nextUrls.filter(url => !visitedUrls.has(url));
}
} catch (error) {
logProgress('error', `爬取分析过程中发生严重错误: ${error.message}`);
} finally {
await browserManager.shutdown();
logProgress('info', `浏览器已关闭`);
const { domainDir } = saveResults();
const jsonPath = outputJsonResults(domainDir);
const duration = (Date.now() - startTime) / 1000;
logProgress('info', `分析完成. 总耗时: ${duration.toFixed(2)}s`);
console.log(jsonPath);
}
}
async function analyzeDirectPages() {
logProgress('info', `启动直接页面分析模式...`);
const startTime = Date.now();
// *** CORRECTED LOGIC: Pass the entire config object to BrowserManager ***
const browserManager = new BrowserManager(maxConcurrency, config);
try {
const urlSet = new Set();
urlSet.add(targetUrl);
mainPageUrls.forEach(pagePath => {
try {
const fullUrl = new URL(pagePath, targetUrl).href;
urlSet.add(fullUrl);
} catch (e) {
logProgress('warn', `无法将 "${pagePath}" 和基础URL "${targetUrl}" 组合,跳过此项。`);
}
});
const fullUrlsToAnalyze = Array.from(urlSet);
logProgress('info', `将要分析的完整URL列表 (${fullUrlsToAnalyze.length}个): ${fullUrlsToAnalyze.join(', ')}`);
await performConcurrentHealthCheck(fullUrlsToAnalyze, 1, browserManager, true);
} catch (error) {
logProgress('error', `直接页面分析过程中发生严重错误: ${error.message}`);
} finally {
await browserManager.shutdown();
logProgress('info', `浏览器已关闭`);
const { domainDir } = saveResults();
const jsonPath = outputJsonResults(domainDir);
const duration = (Date.now() - startTime) / 1000;
logProgress('info', `分析完成. 总耗时: ${duration.toFixed(2)}s`);
console.log(jsonPath);
}
}
(async () => {
try {
initializeConfig();
console.log('\n=== Web Page Analyzer (精简版) ===');
const mode = mainPageUrls.length > 0 ? '直接页面分析' : '爬取分析';
console.log(`运行模式: ${mode}`);
console.log(`基础URL: ${targetUrl}`);
if (mode === '直接页面分析') {
console.log(`额外指定路径数量: ${mainPageUrls.length}`);
} else {
console.log(`分析深度: ${maxDepth}`);
}
console.log(`页面内容关键字: ${pageKeywords.join(', ')}`);
console.log(`日志内容关键字: ${logKeywords.join(', ')}`);
console.log('===================================\n');
if (mainPageUrls.length > 0) {
await analyzeDirectPages();
} else {
await crawlAndAnalyze();
}
} catch (error) {
console.error('脚本执行出错:', error);
process.exit(1);
} finally {
process.exit(0);
}
})();
function outputJsonResults(domainDir) {
const jsonOutput = {
summary: {
total_pages_analyzed: visitedUrls.size,
healthy_pages: healthChecks.filter(h => h.health.status === 'HEALTHY').length,
warning_pages: healthChecks.filter(h => h.health.status === 'WARNING').length,
error_pages: healthChecks.filter(h => h.health.status === 'ERROR').length,
mode: mainPageUrls.length > 0 ? 'direct_analysis' : 'crawl',
base_target: targetUrl,
total_logs: healthChecks.reduce((sum, check) => sum + (check.logStats?.totalLogs || 0), 0),
unique_logs: healthChecks.reduce((sum, check) => sum + (check.logStats?.uniqueLogs || 0), 0),
unique_errors: healthChecks.reduce((sum, check) => sum + (check.logStats?.uniqueErrors || 0), 0),
},
results: healthChecks.map(check => ({
url: check.url,
final_url: check.finalUrl,
title: check.title,
status: check.health.status,
status_code: check.statusCode,
issues: check.health.issues,
page_text: check.pageText,
console_logs: check.consoleMessages,
network_errors: check.networkErrors,
log_stats: check.logStats || {
uniqueLogs: 0,
totalLogs: 0,
uniqueErrors: 0
}
})),
timestamp: new Date().toISOString()
};
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
const jsonFileName = `analysis_report_${timestamp}.json`;
const jsonFilePath = path.join(domainDir, jsonFileName);
fs.writeFileSync(jsonFilePath, JSON.stringify(jsonOutput, null, 2));
const absoluteJsonPath = path.resolve(jsonFilePath);
console.log(`JSON报告已保存至: ${absoluteJsonPath}`);
console.log(absoluteJsonPath);
return absoluteJsonPath;
}