link-checker-cli
Version:
CLI tool to check for broken links in a website or project
363 lines (352 loc) • 11.8 kB
JavaScript
// src/main.ts
import { command, string, run, boolean, number } from "@drizzle-team/brocli";
// src/services/siteCommandProcessor.ts
import chalk2 from "chalk";
// src/views.ts
import { TaskView } from "hanji";
var Spinner = class {
constructor(frames) {
this.frames = frames;
this.offset = 0;
this.tick = () => {
this.iterator();
};
this.value = () => {
return this.frames[this.offset];
};
this.iterator = () => {
this.offset += 1;
this.offset %= frames.length - 1;
};
}
};
var GetUrlsView = class extends TaskView {
constructor() {
super();
this.spinner = new Spinner("\u28F7\u28EF\u28DF\u287F\u28BF\u28FB\u28FD\u28FE".split(""));
this.counter = 0;
this.setCounter = (count) => {
this.counter = count;
};
this.timeout = setInterval(() => {
this.spinner.tick();
this.requestLayout();
}, 128);
this.on("detach", () => clearInterval(this.timeout));
}
render(status) {
if (status === "pending") {
const spin = this.spinner.value();
return `${spin} Searching for urls. Found: ${this.counter}
`;
}
return "";
}
};
var CheckLinkView = class extends TaskView {
constructor(linksCount) {
super();
this.linksCount = linksCount;
this.spinner = new Spinner("\u28F7\u28EF\u28DF\u287F\u28BF\u28FB\u28FD\u28FE".split(""));
this.counter = 0;
this.increment = () => {
this.counter++;
};
this.timeout = setInterval(() => {
this.spinner.tick();
this.requestLayout();
}, 128);
this.on("detach", () => clearInterval(this.timeout));
}
render(status) {
if (status === "pending") {
const spin = this.spinner.value();
return `${spin} Checked ${this.counter}/${this.linksCount}
`;
}
return "";
}
};
// src/services/parserHTML.ts
import { parse } from "parse5";
// src/utils.ts
function isLink(value) {
const isLinkRegex = /(ftp|http|https):\/\/(\w+:{0,1}\w*@)?(\S+)(:[0-9]+)?(\/|\/([\w#!:.?+=&%@!\-/]))?/;
return isLinkRegex.test(value);
}
function normalizeUrl(input) {
if (!/^https?:\/\//i.test(input)) {
return `https://${input}`;
}
return input;
}
// src/services/parserHTML.ts
var IMAGE_EXTENSIONS = ["jpg", "jpeg", "png", "gif", "bmp", "svg", "webp"];
var ParserHtml = class {
constructor() {
this.parsePage = (page, url) => {
const document = parse(page);
const links = [];
const ids = [];
this.getLinksFromNode(document, links, ids, url);
return { links, ids };
};
this.getLinksFromNode = (node, links, ids, baseUrl) => {
if ((node.nodeName === "a" || node.nodeName === "img" || node.nodeName === "link") && node.attrs) {
let link = node.attrs.find((attr) => attr.name === "href");
const isImage = node.nodeName === "img";
const isCss = node.nodeName === "link" && node.attrs.some(
(attr) => attr.name === "rel" && attr.value === "stylesheet"
);
if (isImage) {
link = node.attrs.find((attr) => attr.name === "src");
}
if (link) {
let { value } = link;
value = value.replace(" ", "%20");
const isAnchor = value.startsWith("#");
if (baseUrl) value = new URL(value, baseUrl).href;
if (isImage) links.push({ value, type: "image" });
else if (isAnchor) links.push({ value, type: "anchor" });
else if (isCss) links.push({ value, type: "style" });
else if (isLink(value) && baseUrl) {
const type = value.startsWith(baseUrl.origin) ? "internal" : "external";
this.isImageExtension(value) ? links.push({ value, type: "image" }) : links.push({ value, type });
}
}
}
if ("attrs" in node) {
const idAttr = node.attrs?.find((attr) => attr.name === "id");
if (idAttr && idAttr.value) {
ids.push(idAttr.value);
}
}
if ("childNodes" in node) {
node.childNodes.forEach(
(node2) => this.getLinksFromNode(node2, links, ids, baseUrl)
);
}
};
this.isImageExtension = (link) => {
const url = new URL(link);
const extension = url.pathname.split(".").pop()?.toLowerCase();
return IMAGE_EXTENSIONS.includes(extension || "");
};
}
};
var parserHtml = new ParserHtml();
// src/promisePool.ts
var PromisePool = class {
constructor(options) {
this.results = [];
this.errors = [];
this.process = async () => {
const promises = /* @__PURE__ */ new Set();
for (const item of this.items) {
const promise = this.handler(item).then((result) => {
this.results.push(result);
return result;
}).catch((error) => {
this.errors.push(error);
throw error;
}).finally(() => promises.delete(promise));
promises.add(promise);
if (promises.size >= this.concurrency) {
await Promise.race(promises);
}
}
await Promise.all(promises);
return { results: this.results, errors: this.errors };
};
this.items = options.items;
this.concurrency = options.concurrency;
this.handler = options.handler;
}
};
function promisePool(options) {
const pool = new PromisePool(options);
return pool.process();
}
// src/services/linksProcessor.ts
import { render } from "hanji";
import chalk from "chalk";
var LinksProcessor = class {
constructor(size) {
this.pageIds = /* @__PURE__ */ new Map();
this.uniqueLinks = /* @__PURE__ */ new Set();
this.history = /* @__PURE__ */ new Set();
this.poolSize = 10;
this.getAllLinks = async (link, recursive, view) => {
if (this.history.has(link)) return [];
else this.history.add(link);
let result = [];
let responseText = null;
try {
const response = await fetch(link);
if (response.ok) {
responseText = await response.text();
if (response.redirected) {
this.history.delete(link);
this.history.add(response.url);
link = response.url;
}
}
} catch (err) {
render(chalk.red(`ERROR fetching link ${link}`));
console.log(err);
}
const url = new URL(link);
if (responseText) {
const body = responseText;
const { links, ids } = parserHtml.parsePage(body, url);
this.pageIds.set(link, ids);
const internalLinks = [];
for (const link2 of links) {
const { value } = link2;
if (link2.type === "internal" && !this.history.has(value)) {
internalLinks.push(link2);
}
if (!this.uniqueLinks.has(value)) {
this.uniqueLinks.add(link2.value);
result.push(link2);
}
;
}
if (view) view.setCounter(this.uniqueLinks.size);
if (recursive) {
await promisePool({
items: internalLinks,
concurrency: this.poolSize,
handler: async (internalLink) => {
const links2 = await this.getAllLinks(internalLink.value, recursive, view);
result = result.concat(links2);
}
});
}
}
return result;
};
this.checkAllLinks = async (links, included, view) => {
const brokenLinks = [];
let validCount = 0;
let excluded = 0;
await promisePool({
items: links,
concurrency: this.poolSize,
handler: async (link) => {
if (included.includes(link.type)) {
const res = await this.checkLink(link, void 0, view);
if (!res.isValid) brokenLinks.push(res);
else validCount++;
} else excluded++;
}
});
return { validCount, brokenLinks, excluded };
};
this.checkLink = async (link, userAgent, view) => {
const { value, type } = link;
const defaultUserAgent = "Broken links checker/1.0 (Node.js; is link active)";
try {
const headers = { "User-Agent": userAgent ?? defaultUserAgent };
if (type === "anchor") {
const [site, anchor] = value.split("#");
const ids = this.pageIds.get(site) ?? [];
const hasAnchor = ids.includes(anchor);
const checkRes = hasAnchor ? { link: value, statusCode: 200, isValid: true } : { link: value, statusCode: 404, isValid: false, message: "Anchor section not found!" };
return checkRes;
}
const response = await fetch(value, {
headers
});
if (view) view.increment();
return {
link: value,
statusCode: response.status,
isValid: response.status === 200 ? true : false
};
} catch {
if (view) view.increment();
return {
link: value,
statusCode: 500,
isValid: false
};
}
};
this.getLinksFromRobotsTxt = async (url) => {
const robotsTxtResponse = await fetch(url);
const sitemaps = [];
if (robotsTxtResponse.ok) {
const robotsTxt = await robotsTxtResponse.text();
const sitemapRegex = /^sitemap:\s*(.+)$/i;
robotsTxt.split("\n").forEach((line) => {
const match = line.match(sitemapRegex);
if (match && match[1]) {
sitemaps.push(match[1].trim());
}
});
}
return sitemaps;
};
this.poolSize = size;
}
};
// src/services/siteCommandProcessor.ts
import { render as render2, renderWithTask } from "hanji";
var CliCommandProcessor = class {
constructor(options) {
this.options = options;
this.createResult = async () => {
const { image, external, style, recursive } = this.options;
let { source } = this.options;
source = normalizeUrl(source);
const included = ["internal", "anchor"];
if (external) included.push("external");
if (style) included.push("style");
if (image) included.push("image");
const getLinksView = new GetUrlsView();
const links = await renderWithTask(
getLinksView,
this.linksProcessor.getAllLinks(source, recursive, getLinksView)
);
render2(`Unique links found: ${links.length}`);
const checkLinksView = new CheckLinkView(links.length);
const { brokenLinks, validCount, excluded } = await renderWithTask(
checkLinksView,
this.linksProcessor.checkAllLinks(links, included, checkLinksView)
);
render2(
chalk2.green("Valid links:", validCount) + " " + chalk2.gray(`${included.toString()}`)
);
render2(chalk2.red("Unavailable links:", brokenLinks.length));
render2(chalk2.gray("Excluded links:", excluded));
brokenLinks.map(({ link, statusCode, message }) => {
render2(
chalk2.red(`ERROR ${statusCode}: ${message ? message : ""} `) + chalk2.gray(link)
);
});
};
const { concurrencySize } = options;
this.linksProcessor = new LinksProcessor(concurrencySize);
}
};
// src/main.ts
var defaultOptions = {
source: string().required().alias("-s").desc("Path to website"),
recursive: boolean().default(false).alias("-r"),
image: boolean().default(false).alias("-img").desc("Fetches all page images."),
style: boolean().default(true).alias("-css").desc("Add style links to response."),
external: boolean().default(false).alias("-e").desc("Add external links to response."),
userAgent: string(),
concurrencySize: number().default(10).alias("-c").desc("Number of requests running simultaneous.")
};
var inspect = command({
name: "inspect",
options: defaultOptions,
handler: async (opts) => {
const commandProcessor = new CliCommandProcessor(opts);
await commandProcessor.createResult();
}
});
run([inspect]);