link-checker-cli
Version:
CLI tool to check for broken links in a website or project
77 lines (76 loc) • 3.57 kB
JavaScript
import { parse } from "parse5";
import { isLink } from "../utils.js";
const IMAGE_EXTENSIONS = ["jpg", "jpeg", "png", "gif", "bmp", "svg", "webp"];
export class ParserHtml {
constructor() {
this.parsePage = (page, url) => {
const document = parse(page);
const links = [];
const ids = [];
this.getLinksFromNode(document, links, ids, url);
return { links, ids };
};
this.getLinksFromNode = (node, links, ids, baseUrl) => {
if ((node.nodeName === "a" ||
node.nodeName === "img" ||
node.nodeName === "link") &&
node.attrs) {
let link = node.attrs.find((attr) => attr.name === "href");
const isImage = node.nodeName === "img";
const isCss = node.nodeName === "link" &&
node.attrs.some((attr) => attr.name === "rel" && attr.value === "stylesheet");
if (isImage) {
link = node.attrs.find((attr) => attr.name === "src");
}
if (link) {
let { value } = link;
value = value.replace(" ", "%20");
const isAnchor = value.startsWith("#");
if (baseUrl)
value = new URL(value, baseUrl).href;
if (isImage)
links.push({
value, type: "image", parent: `${JSON.stringify(node.attrs.find(attr => attr.name === 'class'))} `
});
else if (isAnchor) {
links.push({
value, type: "anchor", parent: `${JSON.stringify(node.attrs.find(attr => attr.name === 'class'))} `
});
}
else if (isCss)
links.push({
value, type: "style", parent: `${JSON.stringify(node.attrs.find(attr => attr.name === 'class'))} `
});
else if (isLink(value) && baseUrl) {
const type = value.startsWith(baseUrl.origin)
? "internal"
: "external";
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
this.isImageExtension(value)
? links.push({
value, type: "image", parent: `${JSON.stringify(node.attrs.find(attr => attr.name === 'class'))} `
})
: links.push({
value, type, parent: `${JSON.stringify(node.attrs.find(attr => attr.name === 'class'))} `
});
}
}
}
if ("attrs" in node) {
const idAttr = node.attrs?.find((attr) => attr.name === "id");
if (idAttr && idAttr.value) {
ids.push(idAttr.value);
}
}
if ("childNodes" in node) {
node.childNodes.forEach((node) => this.getLinksFromNode(node, links, ids, baseUrl));
}
};
this.isImageExtension = (link) => {
const url = new URL(link);
const extension = url.pathname.split(".").pop()?.toLowerCase();
return IMAGE_EXTENSIONS.includes(extension || "");
};
}
}
export const parserHtml = new ParserHtml();