UNPKG

sitemap-audit

Version:

Comprehensive sitemap auditor for website health checks

260 lines (259 loc) 7.86 kB
// src/index.ts import { parseStringPromise } from "xml2js"; import { promises as fs } from "fs"; import axios from "axios"; import path from "path"; var Semaphore = class { constructor(maxConcurrent) { this.maxConcurrent = maxConcurrent; this.current = 0; this.queue = []; } async acquire() { if (this.current < this.maxConcurrent) { this.current++; return this.release.bind(this); } return new Promise((resolve) => this.queue.push(resolve)); } release() { this.current--; if (this.queue.length > 0) { const next = this.queue.shift(); if (next) { this.current++; next(this.release.bind(this)); } } } }; var SiteChecker = class { constructor(config = {}) { this.config = { resultsFolder: path.join(process.cwd(), "test-results"), batchSize: 20, maxConnections: 50, ...config }; this.non200Responses = []; this.logQueue = {}; this.writePending = false; } async flushWrites() { if (this.writePending) { await new Promise((resolve) => setTimeout(resolve, 1500)); } } async ensureResultsFolder() { try { await fs.mkdir(this.config.resultsFolder, { recursive: true }); } catch (error) { console.error("Error creating results folder:", error.message); } } async batchWriter(fileName, data) { await this.ensureResultsFolder(); const filePath = path.join(this.config.resultsFolder, fileName); try { await fs.appendFile(filePath, data + "\n", "utf8"); } catch (error) { console.error(`Error writing to ${fileName}:`, error.message); } } async fetchAndSplitUrls(sitemapUrl) { try { const response = await axios.get(sitemapUrl); const result = await parseStringPromise(response.data); if (!result.urlset || !result.urlset.url) { throw new Error("Invalid sitemap format."); } const urls = result.urlset.url.map((url) => url.loc[0]); console.log(`\u2705 Total URLs found in sitemap: ${urls.length}`); return urls; } catch (err) { console.error("\u274C Error fetching or parsing sitemap:", err.message); return []; } } async checkUrlStatus(urls) { await this.ensureResultsFolder(); const http = axios.create({ maxRedirects: 5, timeout: 3e4, maxContentLength: 50 * 1e3 * 1e3 }); const errorResponses = []; const semaphore = new Semaphore(this.config.maxConnections); const processUrl = async (url) => { const release = await semaphore.acquire(); try { let response = null; let attempt = 0; while (attempt < 2) { attempt++; try { response = await http.get(url, { validateStatus: () => true }); break; } catch (err) { if (attempt >= 2) throw err; await new Promise((resolve) => setTimeout(resolve, 2e3)); } } if (response && response.status >= 400) { errorResponses.push({ url, status: response.status }); } } catch (err) { errorResponses.push({ url, status: "error", error: err.message }); console.error(`\u274C Failed to check ${url}:`, err.message); } finally { release(); } }; await Promise.all(urls.map((url) => processUrl(url))); const filePath = path.join( this.config.resultsFolder, "non-200-responses.json" ); await fs.writeFile( filePath, JSON.stringify(errorResponses, null, 2), "utf8" ); console.log(`\u2705 Non-200 responses saved to ${filePath}`); } async checkPageNetworkRequests(context, url) { const page = await context.newPage(); const failures = []; let scrollAttempts = 0; const ignorableErrors = [ "net::ERR_ABORTED", "net::ERR_FAILED", "net::ERR_TIMED_OUT" ]; const ignorableResourceTypes = [ "image", "media", "stylesheet", "font", "script", "fetch" ]; page.on("requestfailed", (request) => { const failureUrl = request.url(); const failureError = request.failure()?.errorText || "Unknown error"; const resourceType = request.resourceType(); const isIgnorable = ignorableErrors.includes(failureError) && ignorableResourceTypes.includes(resourceType); if (!isIgnorable) { failures.push({ url: failureUrl, status: "blocked", resourceType, initiatingPage: url, error: failureError }); } }); page.on("response", (response) => { const status = response.status(); const resourceType = response.request().resourceType(); if (status >= 400) { failures.push({ url: response.url(), status, resourceType, initiatingPage: url }); } }); const autoScroll = async () => { await page.evaluate(async () => { await new Promise((resolve) => { let totalHeight = 0; const distance = 500; const timer = setInterval(() => { const scrollHeight = document.body.scrollHeight; window.scrollBy(0, distance); totalHeight += distance; if (totalHeight >= scrollHeight) { clearInterval(timer); resolve(); } }, 200); }); }); }; const loadPageWithRetry = async (retries = 1) => { try { await page.goto(url, { waitUntil: "domcontentloaded", timeout: 6e4 }); } catch (error) { if (retries > 0) { console.warn(`\u{1F501} Retry for ${url} due to error: ${error.message}`); await page.waitForTimeout(3e3); return loadPageWithRetry(retries - 1); } else { console.error(`\u274C Failed to load ${url}: ${error.message}`); } } }; try { await loadPageWithRetry(1); await page.evaluate(() => { const links = document.querySelectorAll("link[rel*='icon']"); if (links.length === 0) { const favicon = document.createElement("link"); favicon.rel = "icon"; favicon.href = "/favicon.ico"; document.head.appendChild(favicon); } }); while (scrollAttempts < 2) { await autoScroll(); scrollAttempts++; await page.waitForTimeout(2e3); } await page.waitForLoadState("networkidle"); await page.waitForTimeout(5e3); } catch (error) { console.error(`\u274C Error processing ${url}:`, error.message); } finally { await page.close().catch(() => { }); } return failures; } async checkAllNetworkRequests(context, urls) { const allFailures = []; const semaphore = new Semaphore(this.config.maxConnections); const seenUrls = /* @__PURE__ */ new Set(); await Promise.all( urls.map(async (url) => { const release = await semaphore.acquire(); try { const failures = await this.checkPageNetworkRequests(context, url); const uniqueFailures = failures.filter((failure) => { const key = `${failure.url}|${failure.status}`; if (!seenUrls.has(key)) { seenUrls.add(key); return true; } return false; }); allFailures.push(...uniqueFailures); } finally { release(); } }) ); const filePath = path.join( this.config.resultsFolder, "network-failures.json" ); await fs.writeFile(filePath, JSON.stringify(allFailures, null, 2), "utf8"); console.log(`\u2705 Network failures saved to ${filePath}`); } }; var index_default = SiteChecker; export { SiteChecker, index_default as default };