sitemap-audit
Version:
Comprehensive sitemap auditor for website health checks
260 lines (259 loc) • 7.86 kB
JavaScript
// src/index.ts
import { parseStringPromise } from "xml2js";
import { promises as fs } from "fs";
import axios from "axios";
import path from "path";
var Semaphore = class {
constructor(maxConcurrent) {
this.maxConcurrent = maxConcurrent;
this.current = 0;
this.queue = [];
}
async acquire() {
if (this.current < this.maxConcurrent) {
this.current++;
return this.release.bind(this);
}
return new Promise((resolve) => this.queue.push(resolve));
}
release() {
this.current--;
if (this.queue.length > 0) {
const next = this.queue.shift();
if (next) {
this.current++;
next(this.release.bind(this));
}
}
}
};
var SiteChecker = class {
constructor(config = {}) {
this.config = {
resultsFolder: path.join(process.cwd(), "test-results"),
batchSize: 20,
maxConnections: 50,
...config
};
this.non200Responses = [];
this.logQueue = {};
this.writePending = false;
}
async flushWrites() {
if (this.writePending) {
await new Promise((resolve) => setTimeout(resolve, 1500));
}
}
async ensureResultsFolder() {
try {
await fs.mkdir(this.config.resultsFolder, { recursive: true });
} catch (error) {
console.error("Error creating results folder:", error.message);
}
}
async batchWriter(fileName, data) {
await this.ensureResultsFolder();
const filePath = path.join(this.config.resultsFolder, fileName);
try {
await fs.appendFile(filePath, data + "\n", "utf8");
} catch (error) {
console.error(`Error writing to ${fileName}:`, error.message);
}
}
async fetchAndSplitUrls(sitemapUrl) {
try {
const response = await axios.get(sitemapUrl);
const result = await parseStringPromise(response.data);
if (!result.urlset || !result.urlset.url) {
throw new Error("Invalid sitemap format.");
}
const urls = result.urlset.url.map((url) => url.loc[0]);
console.log(`\u2705 Total URLs found in sitemap: ${urls.length}`);
return urls;
} catch (err) {
console.error("\u274C Error fetching or parsing sitemap:", err.message);
return [];
}
}
async checkUrlStatus(urls) {
await this.ensureResultsFolder();
const http = axios.create({
maxRedirects: 5,
timeout: 3e4,
maxContentLength: 50 * 1e3 * 1e3
});
const errorResponses = [];
const semaphore = new Semaphore(this.config.maxConnections);
const processUrl = async (url) => {
const release = await semaphore.acquire();
try {
let response = null;
let attempt = 0;
while (attempt < 2) {
attempt++;
try {
response = await http.get(url, { validateStatus: () => true });
break;
} catch (err) {
if (attempt >= 2) throw err;
await new Promise((resolve) => setTimeout(resolve, 2e3));
}
}
if (response && response.status >= 400) {
errorResponses.push({ url, status: response.status });
}
} catch (err) {
errorResponses.push({ url, status: "error", error: err.message });
console.error(`\u274C Failed to check ${url}:`, err.message);
} finally {
release();
}
};
await Promise.all(urls.map((url) => processUrl(url)));
const filePath = path.join(
this.config.resultsFolder,
"non-200-responses.json"
);
await fs.writeFile(
filePath,
JSON.stringify(errorResponses, null, 2),
"utf8"
);
console.log(`\u2705 Non-200 responses saved to ${filePath}`);
}
async checkPageNetworkRequests(context, url) {
const page = await context.newPage();
const failures = [];
let scrollAttempts = 0;
const ignorableErrors = [
"net::ERR_ABORTED",
"net::ERR_FAILED",
"net::ERR_TIMED_OUT"
];
const ignorableResourceTypes = [
"image",
"media",
"stylesheet",
"font",
"script",
"fetch"
];
page.on("requestfailed", (request) => {
const failureUrl = request.url();
const failureError = request.failure()?.errorText || "Unknown error";
const resourceType = request.resourceType();
const isIgnorable = ignorableErrors.includes(failureError) && ignorableResourceTypes.includes(resourceType);
if (!isIgnorable) {
failures.push({
url: failureUrl,
status: "blocked",
resourceType,
initiatingPage: url,
error: failureError
});
}
});
page.on("response", (response) => {
const status = response.status();
const resourceType = response.request().resourceType();
if (status >= 400) {
failures.push({
url: response.url(),
status,
resourceType,
initiatingPage: url
});
}
});
const autoScroll = async () => {
await page.evaluate(async () => {
await new Promise((resolve) => {
let totalHeight = 0;
const distance = 500;
const timer = setInterval(() => {
const scrollHeight = document.body.scrollHeight;
window.scrollBy(0, distance);
totalHeight += distance;
if (totalHeight >= scrollHeight) {
clearInterval(timer);
resolve();
}
}, 200);
});
});
};
const loadPageWithRetry = async (retries = 1) => {
try {
await page.goto(url, { waitUntil: "domcontentloaded", timeout: 6e4 });
} catch (error) {
if (retries > 0) {
console.warn(`\u{1F501} Retry for ${url} due to error: ${error.message}`);
await page.waitForTimeout(3e3);
return loadPageWithRetry(retries - 1);
} else {
console.error(`\u274C Failed to load ${url}: ${error.message}`);
}
}
};
try {
await loadPageWithRetry(1);
await page.evaluate(() => {
const links = document.querySelectorAll("link[rel*='icon']");
if (links.length === 0) {
const favicon = document.createElement("link");
favicon.rel = "icon";
favicon.href = "/favicon.ico";
document.head.appendChild(favicon);
}
});
while (scrollAttempts < 2) {
await autoScroll();
scrollAttempts++;
await page.waitForTimeout(2e3);
}
await page.waitForLoadState("networkidle");
await page.waitForTimeout(5e3);
} catch (error) {
console.error(`\u274C Error processing ${url}:`, error.message);
} finally {
await page.close().catch(() => {
});
}
return failures;
}
async checkAllNetworkRequests(context, urls) {
const allFailures = [];
const semaphore = new Semaphore(this.config.maxConnections);
const seenUrls = /* @__PURE__ */ new Set();
await Promise.all(
urls.map(async (url) => {
const release = await semaphore.acquire();
try {
const failures = await this.checkPageNetworkRequests(context, url);
const uniqueFailures = failures.filter((failure) => {
const key = `${failure.url}|${failure.status}`;
if (!seenUrls.has(key)) {
seenUrls.add(key);
return true;
}
return false;
});
allFailures.push(...uniqueFailures);
} finally {
release();
}
})
);
const filePath = path.join(
this.config.resultsFolder,
"network-failures.json"
);
await fs.writeFile(filePath, JSON.stringify(allFailures, null, 2), "utf8");
console.log(`\u2705 Network failures saved to ${filePath}`);
}
};
var index_default = SiteChecker;
export {
SiteChecker,
index_default as default
};