UNPKG

@mintlify/link-rot

Version:

Static checking for broken internal links

307 lines (306 loc) 11.2 kB
import { getFileListWithDirectories } from '@mintlify/prebuild'; import { existsSync } from 'fs'; import isAbsoluteUrl from 'is-absolute-url'; import { parse, join, resolve, relative, dirname, basename } from 'path'; import { sep as WINDOWS_SEPARATOR } from 'path/win32'; import { getLinkPaths, normalizePath } from './prebuild.js'; var PathType; (function (PathType) { PathType["INTERNAL"] = "internal"; PathType["EXTERNAL"] = "external"; PathType["DATA"] = "data"; })(PathType || (PathType = {})); export var Wrapper; (function (Wrapper) { Wrapper["MD"] = "md"; Wrapper["SRC"] = "src"; Wrapper["HREF"] = "href"; Wrapper["CARD"] = "card"; })(Wrapper || (Wrapper = {})); export var EdgeType; (function (EdgeType) { EdgeType["CONTENT"] = "content"; EdgeType["NAVIGATION"] = "navigation"; })(EdgeType || (EdgeType = {})); /** * An MdxPath is a path in an Mdx page. Contains all path-related information. */ export class MdxPath { static getPathType(url) { let pathType; if (isAbsoluteUrl(url)) { pathType = PathType.EXTERNAL; } else if (url.startsWith('data:')) { pathType = PathType.DATA; } else { pathType = PathType.INTERNAL; } return pathType; } /** * Generate all the computed fields in an MdxPath */ constructParsedPath() { const pathType = MdxPath.getPathType(this.originalPath); let urlPath; let path; if (pathType === PathType.INTERNAL) { let normalizedPath = this.originalPath.replaceAll(WINDOWS_SEPARATOR, '/'); if (normalizedPath.startsWith('.')) { // also include paths that start with "..", etc. normalizedPath = relative('.', resolve(this.relativeDir, normalizedPath)); // TODO path starts with .. if path resolves to file system outside of mintlify project } // remove leading slash normalizedPath = normalizedPath.startsWith('/') ? normalizedPath.slice(1) : normalizedPath; // resolve relative path // add arbitrary protocol to parse as URL and isolate pathname urlPath = new URL(`mintlify:${normalizedPath}`); normalizedPath = urlPath.pathname; // remove trailing slash if (normalizedPath.endsWith('/')) { normalizedPath = normalizedPath.slice(0, -1); urlPath.pathname = normalizedPath; } // if path is "", then path points to same page if (normalizedPath === '') { normalizedPath = join(this.relativeDir, this.filename); urlPath.pathname = normalizedPath; } path = parse(normalizedPath); } else { try { urlPath = new URL(this.originalPath); path = urlPath; } catch (e) { throw new Error(`Failed to parse path as URL: ${this.originalPath}. If this is a purposefully malformed path, please wrap it in backticks, i.e \`${this.originalPath}\` `); } } const anchorLink = urlPath.hash || undefined; const queryParams = urlPath.searchParams; return { path, pathType, anchorLink, queryParams }; } constructor(originalPath, relativeDir = '', filename = '', wrapper) { this.originalPath = originalPath; this.relativeDir = relativeDir; this.filename = filename; this.wrapper = wrapper; this.relativeDir = normalizePath(relativeDir); const { path, pathType, anchorLink, queryParams } = this.constructParsedPath(); this.path = path; this.pathType = pathType; this.anchorLink = anchorLink; this.queryParams = queryParams; } toString() { if (this.pathType === PathType.INTERNAL) { const parsedPath = this.path; return join(parsedPath.dir, parsedPath.base); } return this.path.toString(); } getResolvedFiles(allFiles, baseDir) { if (this.pathType !== PathType.INTERNAL) return []; const parsedPath = this.path; const resolvedFiles = []; if (existsSync(resolve(baseDir, this.toString()))) { resolvedFiles.push(join(parsedPath.dir, parsedPath.base)); } // there may be multiple files/folders corresponding to the node for (const file of allFiles) { const fileWithoutLeadingSlash = file.startsWith('/') ? file.slice(1) : file; const parsedFilePath = parse(fileWithoutLeadingSlash); if (parsedFilePath.dir === parsedPath.dir && parsedFilePath.name === parsedPath.name) { resolvedFiles.push(fileWithoutLeadingSlash); } if (parsedFilePath.dir === parsedPath.dir && parsedPath.base === parsedFilePath.name) { // account for edge case when there is another . in the filename resolvedFiles.push(fileWithoutLeadingSlash); } } return resolvedFiles; } resolvesTo(node, allFiles, baseDir) { if (this.pathType !== PathType.INTERNAL) return false; if (this.toString() === node.toString()) return true; if (this.getResolvedFiles(allFiles, baseDir).includes(node.toString())) return true; return false; } static filterMapInternalPaths(paths) { return paths .map((path) => new MdxPath(path)) .filter(({ pathType }) => pathType === PathType.INTERNAL) .map((path) => path.toString()); } } /* * A node is a representation of a page * One node can contain multiple MdxPaths (corresponding to internal/external links in the page) */ export class Node { constructor(label, paths = []) { this.label = label; this.paths = paths; this.edges = []; this.relativeDir = normalizePath(dirname(label)); this.filename = basename(label); this.label = join(this.relativeDir, this.filename); } toString() { return this.label; } equals(other) { return this.label === other.label; } addPath(originalPath, wrapper) { const path = new MdxPath(originalPath, this.relativeDir, this.filename, wrapper); this.paths.push(path); } addOutgoingEdge(edge) { const existingEdge = this.edges.find((e) => e.equals(edge)); if (existingEdge) { existingEdge.incrementCount(); } else { this.edges.push(edge); } } getChildNodes(depth = 1) { const children = []; for (const edge of this.edges) { const child = edge.target; children.push(child); if (depth > 1) { children.push(...child.getChildNodes(depth - 1)); } } return children; } } export class Edge { constructor(source, target, edgeType) { this.source = source; this.target = target; this.edgeType = edgeType; this.count = 0; this.count++; } incrementCount() { this.count++; } getCount() { return this.count; } equals(other) { // directionality of edge matters return (this.source.equals(other.source) && this.target.equals(other.target) && this.edgeType === other.edgeType); } } export class Graph { constructor(baseDir) { this.baseDir = baseDir; this.nodes = {}; this.edges = []; this.fileResolutionMap = new Map(); this.baseDir = resolve(baseDir); } addNode(label) { if (this.nodes[label]) return this.nodes[label]; this.nodes[label] = new Node(label); return this.nodes[label]; } addNodes(labels) { for (const label of labels) { this.addNode(label); } } /* * Aliases are additional paths that are valid due to redirects */ addAliasNodes() { // TODO move aliases computation to mint config package, since it'll depend on version // for now this is a hacky implementation const aliases = MdxPath.filterMapInternalPaths(getFileListWithDirectories(this.baseDir)); this.addNodes(aliases); } getNode(label) { return this.nodes[label]; } addEdge(source, target, edgeType = EdgeType.CONTENT) { const newEdge = new Edge(source, target, edgeType); const existingEdge = this.edges.find((edge) => { if (edge.equals(newEdge)) { edge.incrementCount(); return true; } return false; }); if (existingEdge) return existingEdge; this.edges.push(newEdge); return newEdge; } addEdgesBetweenNodes() { const allFiles = getLinkPaths(this.baseDir); for (const node of Object.values(this.nodes)) { for (const path of node.paths) { if (path.pathType === PathType.INTERNAL) { const targetNode = Object.values(this.nodes).find((otherNode) => path.resolvesTo(otherNode, allFiles, this.baseDir)); if (targetNode) { const edge = this.addEdge(node, targetNode); node.addOutgoingEdge(edge); } } } } } precomputeFileResolutions() { const allFiles = getLinkPaths(this.baseDir); // Build resolution map for all internal paths Object.values(this.nodes).forEach((node) => { node.paths .filter((path) => path.pathType === PathType.INTERNAL) .forEach((path) => { const resolvedFiles = path.getResolvedFiles(allFiles, this.baseDir); const pathKey = path.toString(); this.fileResolutionMap.set(pathKey, new Set(resolvedFiles)); }); }); } getBrokenInternalLinks() { if (this.fileResolutionMap.size === 0) { this.precomputeFileResolutions(); } const brokenLinks = []; const nodeSet = new Set(Object.values(this.nodes).map((node) => node.toString())); for (const node of Object.values(this.nodes)) { for (const path of node.paths) { if (path.pathType === PathType.INTERNAL) { const resolvedFiles = this.fileResolutionMap.get(path.toString()); if (!resolvedFiles || ![...resolvedFiles].some((file) => nodeSet.has(file))) { brokenLinks.push(path); } } } } return brokenLinks; } // DEBUGGING getAllInternalPaths() { return Object.values(this.nodes) .flatMap((node) => node.paths) .filter((path) => path.pathType === PathType.INTERNAL) .map((path) => path.originalPath + ' => ' + path.toString()); } }