UNPKG

@qctrl/website-validator

Version:

Q-CTRL Website Validator provides a set of utility tools that validate HTML pages and hyperlinks.

104 lines (103 loc) 4.57 kB
#!/usr/bin/env node var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; import { LinkChecker, LinkState } from "linkinator"; import { program } from "commander"; import signale from "signale"; import getPagesFromSiteMap from "../helpers/getPagesFromSiteMap.js"; const DEFAULT_CONCURRENCY = 2; const DEFAULT_ERROR_RETRY_JITTERS = 5; const DEFAULT_ERROR_RETRIES = 3; const LOGGER_MAP = { OK: signale.success, BROKEN: signale.error, SKIPPED: signale.info, }; program.option("-u, --url <string>", "website url to check for links"); program.option("--ignoreUrls <string>", "urls to ignore (comma separated values)"); program.option("--concurrency <number>", "concurrent number of connections"); program.option("--shard <number>", "shard number to run eg. 1/3"); program.option("--verbose", "log output to console"); program.parse(); function run() { return __awaiter(this, void 0, void 0, function* () { const urlsToIgnore = []; const brokenLinks = []; const { url, ignoreUrls, concurrency, shard, verbose } = program.opts(); if (ignoreUrls) { urlsToIgnore.push(...ignoreUrls.split(",")); } const checker = new LinkChecker(url); signale.start("Checking for links at %s", url); let pageUrls = yield getPagesFromSiteMap(new URL("/sitemap.xml", url).toString()); checker.on("link", (link) => { const logger = LOGGER_MAP[link.state]; if (logger && verbose && link.state !== LinkState.SKIPPED) { logger(link.url); } if (link.state === LinkState.BROKEN) { brokenLinks.push(link); } else if (link.state === LinkState.OK) { urlsToIgnore.push(link.url); } }); if (shard !== undefined && shard.includes("/")) { const totalShards = parseInt(shard.split("/")[1], 10); const shardNumber = parseInt(shard.split("/")[0], 10); pageUrls = pageUrls.filter((_, index) => index % totalShards === shardNumber - 1); signale.log("Shard", shardNumber, "of", totalShards); } signale.log("Checking", pageUrls.length, "pages"); yield pageUrls.reduce((previousPromise, pageUrl) => __awaiter(this, void 0, void 0, function* () { yield previousPromise; const linksToSkip = (link) => new Promise((resolve) => { if (link.startsWith("#")) { resolve(true); return; } if (/^https:\/\/.+\..+\/.+\..+$/.test(link)) { resolve(true); return; } resolve(urlsToIgnore.includes(link)); }); return checker.check({ path: pageUrl, retry: true, retryErrors: true, retryErrorsCount: DEFAULT_ERROR_RETRIES, retryErrorsJitter: DEFAULT_ERROR_RETRY_JITTERS, linksToSkip, concurrency: concurrency !== null && concurrency !== void 0 ? concurrency : DEFAULT_CONCURRENCY, }); }), Promise.resolve({ links: [], passed: true, })); signale.complete("Check completed", url); const brokenLinksWithHttpErrors = brokenLinks.filter((item) => item.status && item.status !== 0); if (brokenLinksWithHttpErrors.length > 0) { signale.log("Invalid links found"); const resultsLogger = signale.scope("Results"); brokenLinksWithHttpErrors.forEach((link) => { resultsLogger.error({ url: link.url, status: link.status, parent: link.parent, }); }); } if (brokenLinksWithHttpErrors.length > 0) { process.exit(1); } }); } run();