UNPKG

link-checker-cli

Version:

CLI tool to check for broken links in a website or project

77 lines (76 loc) 3.57 kB
import { parse } from "parse5"; import { isLink } from "../utils.js"; const IMAGE_EXTENSIONS = ["jpg", "jpeg", "png", "gif", "bmp", "svg", "webp"]; export class ParserHtml { constructor() { this.parsePage = (page, url) => { const document = parse(page); const links = []; const ids = []; this.getLinksFromNode(document, links, ids, url); return { links, ids }; }; this.getLinksFromNode = (node, links, ids, baseUrl) => { if ((node.nodeName === "a" || node.nodeName === "img" || node.nodeName === "link") && node.attrs) { let link = node.attrs.find((attr) => attr.name === "href"); const isImage = node.nodeName === "img"; const isCss = node.nodeName === "link" && node.attrs.some((attr) => attr.name === "rel" && attr.value === "stylesheet"); if (isImage) { link = node.attrs.find((attr) => attr.name === "src"); } if (link) { let { value } = link; value = value.replace(" ", "%20"); const isAnchor = value.startsWith("#"); if (baseUrl) value = new URL(value, baseUrl).href; if (isImage) links.push({ value, type: "image", parent: `${JSON.stringify(node.attrs.find(attr => attr.name === 'class'))} ` }); else if (isAnchor) { links.push({ value, type: "anchor", parent: `${JSON.stringify(node.attrs.find(attr => attr.name === 'class'))} ` }); } else if (isCss) links.push({ value, type: "style", parent: `${JSON.stringify(node.attrs.find(attr => attr.name === 'class'))} ` }); else if (isLink(value) && baseUrl) { const type = value.startsWith(baseUrl.origin) ? "internal" : "external"; // eslint-disable-next-line @typescript-eslint/no-unused-expressions this.isImageExtension(value) ? links.push({ value, type: "image", parent: `${JSON.stringify(node.attrs.find(attr => attr.name === 'class'))} ` }) : links.push({ value, type, parent: `${JSON.stringify(node.attrs.find(attr => attr.name === 'class'))} ` }); } } } if ("attrs" in node) { const idAttr = node.attrs?.find((attr) => attr.name === "id"); if (idAttr && idAttr.value) { ids.push(idAttr.value); } } if ("childNodes" in node) { node.childNodes.forEach((node) => this.getLinksFromNode(node, links, ids, baseUrl)); } }; this.isImageExtension = (link) => { const url = new URL(link); const extension = url.pathname.split(".").pop()?.toLowerCase(); return IMAGE_EXTENSIONS.includes(extension || ""); }; } } export const parserHtml = new ParserHtml();