@qctrl/website-validator
Version:
Q-CTRL Website Validator provides a set of utility tools that validate HTML pages and hyperlinks.
104 lines (103 loc) • 4.57 kB
JavaScript
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
import { LinkChecker, LinkState } from "linkinator";
import { program } from "commander";
import signale from "signale";
import getPagesFromSiteMap from "../helpers/getPagesFromSiteMap.js";
const DEFAULT_CONCURRENCY = 2;
const DEFAULT_ERROR_RETRY_JITTERS = 5;
const DEFAULT_ERROR_RETRIES = 3;
const LOGGER_MAP = {
OK: signale.success,
BROKEN: signale.error,
SKIPPED: signale.info,
};
program.option("-u, --url <string>", "website url to check for links");
program.option("--ignoreUrls <string>", "urls to ignore (comma separated values)");
program.option("--concurrency <number>", "concurrent number of connections");
program.option("--shard <number>", "shard number to run eg. 1/3");
program.option("--verbose", "log output to console");
program.parse();
function run() {
return __awaiter(this, void 0, void 0, function* () {
const urlsToIgnore = [];
const brokenLinks = [];
const { url, ignoreUrls, concurrency, shard, verbose } = program.opts();
if (ignoreUrls) {
urlsToIgnore.push(...ignoreUrls.split(","));
}
const checker = new LinkChecker(url);
signale.start("Checking for links at %s", url);
let pageUrls = yield getPagesFromSiteMap(new URL("/sitemap.xml", url).toString());
checker.on("link", (link) => {
const logger = LOGGER_MAP[link.state];
if (logger && verbose && link.state !== LinkState.SKIPPED) {
logger(link.url);
}
if (link.state === LinkState.BROKEN) {
brokenLinks.push(link);
}
else if (link.state === LinkState.OK) {
urlsToIgnore.push(link.url);
}
});
if (shard !== undefined && shard.includes("/")) {
const totalShards = parseInt(shard.split("/")[1], 10);
const shardNumber = parseInt(shard.split("/")[0], 10);
pageUrls = pageUrls.filter((_, index) => index % totalShards === shardNumber - 1);
signale.log("Shard", shardNumber, "of", totalShards);
}
signale.log("Checking", pageUrls.length, "pages");
yield pageUrls.reduce((previousPromise, pageUrl) => __awaiter(this, void 0, void 0, function* () {
yield previousPromise;
const linksToSkip = (link) => new Promise((resolve) => {
if (link.startsWith("#")) {
resolve(true);
return;
}
if (/^https:\/\/.+\..+\/.+\..+$/.test(link)) {
resolve(true);
return;
}
resolve(urlsToIgnore.includes(link));
});
return checker.check({
path: pageUrl,
retry: true,
retryErrors: true,
retryErrorsCount: DEFAULT_ERROR_RETRIES,
retryErrorsJitter: DEFAULT_ERROR_RETRY_JITTERS,
linksToSkip,
concurrency: concurrency !== null && concurrency !== void 0 ? concurrency : DEFAULT_CONCURRENCY,
});
}), Promise.resolve({
links: [],
passed: true,
}));
signale.complete("Check completed", url);
const brokenLinksWithHttpErrors = brokenLinks.filter((item) => item.status && item.status !== 0);
if (brokenLinksWithHttpErrors.length > 0) {
signale.log("Invalid links found");
const resultsLogger = signale.scope("Results");
brokenLinksWithHttpErrors.forEach((link) => {
resultsLogger.error({
url: link.url,
status: link.status,
parent: link.parent,
});
});
}
if (brokenLinksWithHttpErrors.length > 0) {
process.exit(1);
}
});
}
run();