UNPKG

link-checker-cli

Version:

CLI tool to check for broken links in a website or project

363 lines (352 loc) 11.8 kB
#!/usr/bin/env node // src/main.ts import { command, string, run, boolean, number } from "@drizzle-team/brocli"; // src/services/siteCommandProcessor.ts import chalk2 from "chalk"; // src/views.ts import { TaskView } from "hanji"; var Spinner = class { constructor(frames) { this.frames = frames; this.offset = 0; this.tick = () => { this.iterator(); }; this.value = () => { return this.frames[this.offset]; }; this.iterator = () => { this.offset += 1; this.offset %= frames.length - 1; }; } }; var GetUrlsView = class extends TaskView { constructor() { super(); this.spinner = new Spinner("\u28F7\u28EF\u28DF\u287F\u28BF\u28FB\u28FD\u28FE".split("")); this.counter = 0; this.setCounter = (count) => { this.counter = count; }; this.timeout = setInterval(() => { this.spinner.tick(); this.requestLayout(); }, 128); this.on("detach", () => clearInterval(this.timeout)); } render(status) { if (status === "pending") { const spin = this.spinner.value(); return `${spin} Searching for urls. Found: ${this.counter} `; } return ""; } }; var CheckLinkView = class extends TaskView { constructor(linksCount) { super(); this.linksCount = linksCount; this.spinner = new Spinner("\u28F7\u28EF\u28DF\u287F\u28BF\u28FB\u28FD\u28FE".split("")); this.counter = 0; this.increment = () => { this.counter++; }; this.timeout = setInterval(() => { this.spinner.tick(); this.requestLayout(); }, 128); this.on("detach", () => clearInterval(this.timeout)); } render(status) { if (status === "pending") { const spin = this.spinner.value(); return `${spin} Checked ${this.counter}/${this.linksCount} `; } return ""; } }; // src/services/parserHTML.ts import { parse } from "parse5"; // src/utils.ts function isLink(value) { const isLinkRegex = /(ftp|http|https):\/\/(\w+:{0,1}\w*@)?(\S+)(:[0-9]+)?(\/|\/([\w#!:.?+=&%@!\-/]))?/; return isLinkRegex.test(value); } function normalizeUrl(input) { if (!/^https?:\/\//i.test(input)) { return `https://${input}`; } return input; } // src/services/parserHTML.ts var IMAGE_EXTENSIONS = ["jpg", "jpeg", "png", "gif", "bmp", "svg", "webp"]; var ParserHtml = class { constructor() { this.parsePage = (page, url) => { const document = parse(page); const links = []; const ids = []; this.getLinksFromNode(document, links, ids, url); return { links, ids }; }; this.getLinksFromNode = (node, links, ids, baseUrl) => { if ((node.nodeName === "a" || node.nodeName === "img" || node.nodeName === "link") && node.attrs) { let link = node.attrs.find((attr) => attr.name === "href"); const isImage = node.nodeName === "img"; const isCss = node.nodeName === "link" && node.attrs.some( (attr) => attr.name === "rel" && attr.value === "stylesheet" ); if (isImage) { link = node.attrs.find((attr) => attr.name === "src"); } if (link) { let { value } = link; value = value.replace(" ", "%20"); const isAnchor = value.startsWith("#"); if (baseUrl) value = new URL(value, baseUrl).href; if (isImage) links.push({ value, type: "image" }); else if (isAnchor) links.push({ value, type: "anchor" }); else if (isCss) links.push({ value, type: "style" }); else if (isLink(value) && baseUrl) { const type = value.startsWith(baseUrl.origin) ? "internal" : "external"; this.isImageExtension(value) ? links.push({ value, type: "image" }) : links.push({ value, type }); } } } if ("attrs" in node) { const idAttr = node.attrs?.find((attr) => attr.name === "id"); if (idAttr && idAttr.value) { ids.push(idAttr.value); } } if ("childNodes" in node) { node.childNodes.forEach( (node2) => this.getLinksFromNode(node2, links, ids, baseUrl) ); } }; this.isImageExtension = (link) => { const url = new URL(link); const extension = url.pathname.split(".").pop()?.toLowerCase(); return IMAGE_EXTENSIONS.includes(extension || ""); }; } }; var parserHtml = new ParserHtml(); // src/promisePool.ts var PromisePool = class { constructor(options) { this.results = []; this.errors = []; this.process = async () => { const promises = /* @__PURE__ */ new Set(); for (const item of this.items) { const promise = this.handler(item).then((result) => { this.results.push(result); return result; }).catch((error) => { this.errors.push(error); throw error; }).finally(() => promises.delete(promise)); promises.add(promise); if (promises.size >= this.concurrency) { await Promise.race(promises); } } await Promise.all(promises); return { results: this.results, errors: this.errors }; }; this.items = options.items; this.concurrency = options.concurrency; this.handler = options.handler; } }; function promisePool(options) { const pool = new PromisePool(options); return pool.process(); } // src/services/linksProcessor.ts import { render } from "hanji"; import chalk from "chalk"; var LinksProcessor = class { constructor(size) { this.pageIds = /* @__PURE__ */ new Map(); this.uniqueLinks = /* @__PURE__ */ new Set(); this.history = /* @__PURE__ */ new Set(); this.poolSize = 10; this.getAllLinks = async (link, recursive, view) => { if (this.history.has(link)) return []; else this.history.add(link); let result = []; let responseText = null; try { const response = await fetch(link); if (response.ok) { responseText = await response.text(); if (response.redirected) { this.history.delete(link); this.history.add(response.url); link = response.url; } } } catch (err) { render(chalk.red(`ERROR fetching link ${link}`)); console.log(err); } const url = new URL(link); if (responseText) { const body = responseText; const { links, ids } = parserHtml.parsePage(body, url); this.pageIds.set(link, ids); const internalLinks = []; for (const link2 of links) { const { value } = link2; if (link2.type === "internal" && !this.history.has(value)) { internalLinks.push(link2); } if (!this.uniqueLinks.has(value)) { this.uniqueLinks.add(link2.value); result.push(link2); } ; } if (view) view.setCounter(this.uniqueLinks.size); if (recursive) { await promisePool({ items: internalLinks, concurrency: this.poolSize, handler: async (internalLink) => { const links2 = await this.getAllLinks(internalLink.value, recursive, view); result = result.concat(links2); } }); } } return result; }; this.checkAllLinks = async (links, included, view) => { const brokenLinks = []; let validCount = 0; let excluded = 0; await promisePool({ items: links, concurrency: this.poolSize, handler: async (link) => { if (included.includes(link.type)) { const res = await this.checkLink(link, void 0, view); if (!res.isValid) brokenLinks.push(res); else validCount++; } else excluded++; } }); return { validCount, brokenLinks, excluded }; }; this.checkLink = async (link, userAgent, view) => { const { value, type } = link; const defaultUserAgent = "Broken links checker/1.0 (Node.js; is link active)"; try { const headers = { "User-Agent": userAgent ?? defaultUserAgent }; if (type === "anchor") { const [site, anchor] = value.split("#"); const ids = this.pageIds.get(site) ?? []; const hasAnchor = ids.includes(anchor); const checkRes = hasAnchor ? { link: value, statusCode: 200, isValid: true } : { link: value, statusCode: 404, isValid: false, message: "Anchor section not found!" }; return checkRes; } const response = await fetch(value, { headers }); if (view) view.increment(); return { link: value, statusCode: response.status, isValid: response.status === 200 ? true : false }; } catch { if (view) view.increment(); return { link: value, statusCode: 500, isValid: false }; } }; this.getLinksFromRobotsTxt = async (url) => { const robotsTxtResponse = await fetch(url); const sitemaps = []; if (robotsTxtResponse.ok) { const robotsTxt = await robotsTxtResponse.text(); const sitemapRegex = /^sitemap:\s*(.+)$/i; robotsTxt.split("\n").forEach((line) => { const match = line.match(sitemapRegex); if (match && match[1]) { sitemaps.push(match[1].trim()); } }); } return sitemaps; }; this.poolSize = size; } }; // src/services/siteCommandProcessor.ts import { render as render2, renderWithTask } from "hanji"; var CliCommandProcessor = class { constructor(options) { this.options = options; this.createResult = async () => { const { image, external, style, recursive } = this.options; let { source } = this.options; source = normalizeUrl(source); const included = ["internal", "anchor"]; if (external) included.push("external"); if (style) included.push("style"); if (image) included.push("image"); const getLinksView = new GetUrlsView(); const links = await renderWithTask( getLinksView, this.linksProcessor.getAllLinks(source, recursive, getLinksView) ); render2(`Unique links found: ${links.length}`); const checkLinksView = new CheckLinkView(links.length); const { brokenLinks, validCount, excluded } = await renderWithTask( checkLinksView, this.linksProcessor.checkAllLinks(links, included, checkLinksView) ); render2( chalk2.green("Valid links:", validCount) + " " + chalk2.gray(`${included.toString()}`) ); render2(chalk2.red("Unavailable links:", brokenLinks.length)); render2(chalk2.gray("Excluded links:", excluded)); brokenLinks.map(({ link, statusCode, message }) => { render2( chalk2.red(`ERROR ${statusCode}: ${message ? message : ""} `) + chalk2.gray(link) ); }); }; const { concurrencySize } = options; this.linksProcessor = new LinksProcessor(concurrencySize); } }; // src/main.ts var defaultOptions = { source: string().required().alias("-s").desc("Path to website"), recursive: boolean().default(false).alias("-r"), image: boolean().default(false).alias("-img").desc("Fetches all page images."), style: boolean().default(true).alias("-css").desc("Add style links to response."), external: boolean().default(false).alias("-e").desc("Add external links to response."), userAgent: string(), concurrencySize: number().default(10).alias("-c").desc("Number of requests running simultaneous.") }; var inspect = command({ name: "inspect", options: defaultOptions, handler: async (opts) => { const commandProcessor = new CliCommandProcessor(opts); await commandProcessor.createResult(); } }); run([inspect]);